From 4ea34eed61e8d331e8880ee27cc27b4a56fe3358 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Thu, 21 Jun 2018 17:10:00 -0500 Subject: [PATCH 001/319] Initial teams - my_pe and n_pes --- content/shmem_team_my_pe.tex | 39 ++++++++++++++++++++++++++++++++++++ content/shmem_team_n_pes.tex | 39 ++++++++++++++++++++++++++++++++++++ main_spec.tex | 9 +++++++-- 3 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 content/shmem_team_my_pe.tex create mode 100644 content/shmem_team_n_pes.tex diff --git a/content/shmem_team_my_pe.tex b/content/shmem_team_my_pe.tex new file mode 100644 index 000000000..531b3f01e --- /dev/null +++ b/content/shmem_team_my_pe.tex @@ -0,0 +1,39 @@ +\apisummary{ + shmem\_team\_my\_pe returns the calling process's virtual rank in the + provided team. +} + +\begin{apidefinition} + +\begin{Csynopsis} +int @\FuncDecl{shmemx\_team\_my\_pe}@(shmem_team_t newteam); +\end{Csynopsis} + +\begin{apiarguments} +\apiargument{IN}{newteam}{A valid SHMEM team handle.} +\end{apiarguments} + +\apidescription{ +The shmemx\_team\_my\_pe function returns the calling process's virtual +rank in the provided team. The rank will be a value between 0 and N-1, +for a team of size N. Different members of a team cannot have the same +rank. For the team SHMEM\_TEAM\_WORLD, this will return shmem\_my\_pe. + +Error checking will be done to ensure a valid team handle is provided. +All errors are considered fatal, and will result in the job aborting +with an informative error message. +} + +\apireturnvalues{ +Calling process's virtual rank in the provided team. +} + +\apinotes{ +By default, SHMEM creates two predefined teams that will be available +for use once the routine start\_pes has been called. These teams can be +referenced in the application by the constants SHMEM\_TEAM\_WORLD and +SHMEM\_TEAM\_NODE. Every PE process is a member of the SHMEM\_TEAM\_WORLD +team, and its rank in SHMEM\_TEAM\_WORLD corresponds to the value of its +global PE rank. The SHMEM\_TEAM\_NODE team only contains the set of PEs +that reside on the same node as the current PE. +} diff --git a/content/shmem_team_n_pes.tex b/content/shmem_team_n_pes.tex new file mode 100644 index 000000000..5736a7aa2 --- /dev/null +++ b/content/shmem_team_n_pes.tex @@ -0,0 +1,39 @@ +\apisummary{ + shmem\_team\_n\_pes returns the total number of \acp{PE} in the provided team. +} + +\begin{apidefinition} + +\begin{Csynopsis} +int @\FuncDecl{shmemx\_team\_n\_pes}@(shmem_team_t newteam); +\end{Csynopsis} + +\begin{apiarguments} +\apiargument{IN}{newteam}{A valid SHMEM team handle.} +\end{apiarguments} + +\apidescription{ +The shmemx\_team\_n\_pes function returns the number of processes in the +team. This will always be a value between 1 and the total number of +\acp{PE}. For the team SHMEM\_TEAM\_WORLD, this will return shmem\_n\_pes. +Every team must have a least one member. All processes in the team +will get back the same value for the team size. + +Error checking will be done to ensure a valid team handle is provided. +All errors are considered fatal and will result in the job aborting +with an informative error message. +} + +\apireturnvalues{ +Total number of \acp{PE} in the provided team. +} + +\apinotes{ +By default, SHMEM creates two predefined teams that will be available +for use once the routine start\_pes has been called. These teams can be +referenced in the application by the constants SHMEM\_TEAM\_WORLD and +SHMEM\_TEAM\_NODE. Every \ac{PE}process is a member of the SHMEM\_TEAM\_WORLD +team, and its rank in SHMEM\_TEAM\_WORLD corresponds to the value of its +global \ac{PE}rank. The SHMEM\_TEAM\_NODE team only contains the set of \acp{PE} +that reside on the same node as the current PE. +} diff --git a/main_spec.tex b/main_spec.tex index 6c2c46596..1f5949c60 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -269,8 +269,15 @@ \subsubsection{\textbf{SHMEM\_ALLTOALLS}}\label{subsec:shmem_alltoalls} \input{content/shmem_alltoalls.tex} +\color{Green} +\subsection{Teams or PE Subsets}\label{subsec:team} +\subsubsection{\textbf{SHMEM\_TEAM\_MY\_PE}}\label{subsec:shmem_team_my_pe} +\input{content/shmem_team_my_pe.tex} +\subsubsection{\textbf{SHMEM\_TEAM\_N\_PES}}\label{subsec:shmem_team_n_pes} +\input{content/shmem_team_n_pes.tex} +\color{Black} \subsection{Point-To-Point Synchronization Routines}\label{subsec:p2p_intro} \input{content/p2p_sync_intro.tex} @@ -278,8 +285,6 @@ \subsection{Point-To-Point Synchronization Routines}\label{subsec:p2p_intro} \subsubsection{\textbf{SHMEM\_WAIT\_UNTIL}}\label{subsec:shmem_wait_until} \input{content/shmem_wait_until.tex} -\subsubsection{\textbf{SHMEM\_TEST}}\label{subsec:shmem_test} -\input{content/shmem_test.tex} From 63f8aa321290c079a583e46f700db325b02920d8 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Thu, 21 Jun 2018 17:41:21 -0500 Subject: [PATCH 002/319] initial team management routines --- content/shmem_team_destroy.tex | 39 +++++++++++++++ content/shmem_team_my_pe.tex | 2 + content/shmem_team_n_pes.tex | 2 + content/shmem_team_split_3d.tex | 70 +++++++++++++++++++++++++++ content/shmem_team_split_strided.tex | 72 ++++++++++++++++++++++++++++ content/shmem_team_translate.tex | 52 ++++++++++++++++++++ main_spec.tex | 13 +++++ 7 files changed, 250 insertions(+) create mode 100644 content/shmem_team_destroy.tex create mode 100644 content/shmem_team_split_3d.tex create mode 100644 content/shmem_team_split_strided.tex create mode 100644 content/shmem_team_translate.tex diff --git a/content/shmem_team_destroy.tex b/content/shmem_team_destroy.tex new file mode 100644 index 000000000..6496c1204 --- /dev/null +++ b/content/shmem_team_destroy.tex @@ -0,0 +1,39 @@ +\apisummary{ + shmem\_team\_destroy function destroys existing team. +} + +\begin{apidefinition} + +\begin{Csynopsis} +int @\FuncDecl{shmemx\_team\_destroy}@(shmem_team_t newteam); +\end{Csynopsis} + +\begin{apiarguments} +\apiargument{IN}{newteam}{A valid SHMEM team handle.} +\end{apiarguments} + +\apidescription{ +The shmemx\_team\_destroy function destroys an existing team. This is a +collective call, in which every member of the team being destroyed needs +to participate. This will free all internal memory structures associated +with the team and invalidate the team handle. Upon return, the team +handle is set to SHMEM\_TEAM\_NULL, after which it can no longer be +used for team collective calls. + +It is considered erroneous to free SHMEM\_TEAM\_WORLD or +SHMEM\_TEAM\_NODE. Error checking will be done to ensure a valid +team handle is provided. All errors are considered fatal, and will +result in the job aborting with an informative error message. +} + +\apireturnvalues{ +None. +} + +\apinotes{ +Note that SHMEM team handles have local semantics only. That is, team +handles should not be stored in shared variables and used across other +processes. Doing so will result in unpredictable behavior. +} + +\end{apidefinition} diff --git a/content/shmem_team_my_pe.tex b/content/shmem_team_my_pe.tex index 531b3f01e..1d423a8ee 100644 --- a/content/shmem_team_my_pe.tex +++ b/content/shmem_team_my_pe.tex @@ -37,3 +37,5 @@ global PE rank. The SHMEM\_TEAM\_NODE team only contains the set of PEs that reside on the same node as the current PE. } + +\end{apidefinition} diff --git a/content/shmem_team_n_pes.tex b/content/shmem_team_n_pes.tex index 5736a7aa2..b4278cafe 100644 --- a/content/shmem_team_n_pes.tex +++ b/content/shmem_team_n_pes.tex @@ -37,3 +37,5 @@ global \ac{PE}rank. The SHMEM\_TEAM\_NODE team only contains the set of \acp{PE} that reside on the same node as the current PE. } + +\end{apidefinition} diff --git a/content/shmem_team_split_3d.tex b/content/shmem_team_split_3d.tex new file mode 100644 index 000000000..49aae8e15 --- /dev/null +++ b/content/shmem_team_split_3d.tex @@ -0,0 +1,70 @@ +\apisummary{ +shmemx\_team\_split\_3d - partitions an existing parent team into three subgroups, +based on the three-dimensional Cartesian space defined by the triplet (xrange, +yrange, and zrange) describing the size of the Cartesian space in X, Y, and Z +dimensions. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmemx\_team\_split\_3d}@(shmem_team_t parent_team, int xrange, +int yrange, int zrange, shmem_team_t *xaxis_team, shmem_team_t *yaxis_team, +shmem_team_t *zaxis_team); +\end{Csynopsis} + +\begin{apiarguments} +\apiargument{IN}{parent\_team}{A valid SHMEM team. The predefined teams +SHMEM\_TEAM\_WORLD or SHMEM\_TEAM\_NODE may be used, or any team created by the +users.} + +\apiargument{IN}{xrange}{A non-negative integer representing the number of +elements in the first dimension.} + +\apiargument{IN}{yrange}{A non-negative integer representing the number of +elements in the second dimension.} + +\apiargument{IN}{zrange}{A non-negative integer representing the number of +elements in the third dimension.} + +\apiargument{OUT}{xaxis\_team}{A new \ac{PE} team handle representing a \ac{PE} +subset consisting of all the \acp{PE} that are in the same row in the X-axis.} + +\apiargument{OUT}{yaxis\_team}{A new \ac{PE} team handle representing a \ac{PE} +subset consisting of all the \acp{PE} that are in the same column in the Y-axis.} + +\apiargument{OUT}{zaxis\_team}{A new \ac{PE} team handle representing a \ac{PE} +subset consisting of all the \acp{PE} that are in the same position in in the +Z-axis.} +\end{apiarguments} + +\apidescription{ +The shmemx\_team\_split\_3d routine is a collective routine. It +partitions an existing parent team into three subgroups, based on the +three-dimensional Cartesian space defined by the triplet (xrange, +yrange, and zrange) describing the size of the Cartesian space in X, +Y, and Z dimensions. Each subgroup contains all \acp{PE} that are in the same +dimension, along the X-axis, Y-axis and Z-axis. Within each subgroup, +the \acp{PE} are ranked based on the position of the \ac{PE} with respect to its +dimension in three-dimensional Cartesian space. + +Any valid \ac{PE} team can be used as the parent team. This routine must be +called by all \acp{PE} in the parent team. The value of the triplets must be +non-negative, and the size of the parent team should be greater than or +equal to the size of the three-dimensional Cartesian space. None of the +parameters need to reside in symmetric memory. + +Error checking will be done to ensure a valid team handle is provided. +All errors are considered fatal and will result in the job aborting with +an informative error message. +} + +\apireturnvalues{ +None. +} + +\begin{apiexamples} + +\end{apiexamples} + +\end{apidefinition} diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex new file mode 100644 index 000000000..b31805c8d --- /dev/null +++ b/content/shmem_team_split_strided.tex @@ -0,0 +1,72 @@ +\apisummary{ +shmemx\_team\_split\_strided is a collective routine to partition the existing +parent team into a new SHMEM team based on the \ac{PE}triplet (PE\_start, +PE\_stride, and PE\_size) supplied to the function. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmemx\_team\_split\_strided}@(shmem_team_t parent_team, +int PE_start, int PE_stride, int PE_size, shmem_team_t *newteam); +\end{Csynopsis} + +\begin{apiarguments} +\apiargument{IN}{parent\_team}{A valid SHMEM team. The predefined teams +SHMEM\_TEAM\_WORLD or SHMEM\_TEAM\_NODE may be used, or any team created by the +users.} + +\apiargument{IN}{PE\_start}{The lowest virtual \ac{PE} number of the +parent\_team of \acp{PE}.} + +\apiargument{IN}{PE\_stride}{The stride between consecutive virtual \ac{PE} +numbers in the parent\_team.} + +\apiargument{IN}{PE\_size}{The number of \acp{PE} in the defined set.} + +\apiargument{OUT}{newteam}{A new SHMEM team handle, representing a \ac{PE} +subset of all the \acp{PE}, that is created from the \ac{PE} triplet provided.} +\end{apiarguments} + +\apidescription{ +The shmemx\_team\_split\_strided function is a collective routine. +It partitions the existing parent team into a new SHMEM team based on +the \ac{PE} triplet (PE\_start, PE\_stride, and PE\_size) supplied to +the function. It is important to note the use of the less restrictive +PE\_stride argument instead of logPE\_stride. This method of +creating a team with an arbitrary set of \acp{PE} is inherently restricted by +its parameters, but allows for many additional use-cases over using a +logPE\_stride parameter, and may provide an easier transition for +existing SHMEM programs to create and use SHMEM teams. This function +must be called by all processes contained in the SHMEM triplet +specification. It may be called by additional \acp{PE} not included in the +triplet specification, but for those processes a newteam value of +SHMEM\_TEAM\_NULL is returned. All calling processes must provide the +same values for the \ac{PE} triplet. This function will return a newteam +containing the \ac{PE} subset specified by the triplet, and ordered by the +existing global \ac{PE} rank value. None of the parameters need to reside in +symmetric memory. + +Error checking will be done to ensure a valid \ac{PE} triplet is provided, +and also to determine whether a valid team handle is provided for the +parent\_team. + +All errors are considered fatal and will result in the job aborting with +an informative error message. +} + +\apireturnvalues{ +None. +} + +\apinotes{ +Note that SHMEM team handles have local semantics only. That is, team +handles should not be stored in shared variables and used across other +processes. Doing so will result in unpredictable behavior. +} + +\begin{apiexamples} + +\end{apiexamples} + +\end{apidefinition} diff --git a/content/shmem_team_translate.tex b/content/shmem_team_translate.tex new file mode 100644 index 000000000..cfeb4ed9d --- /dev/null +++ b/content/shmem_team_translate.tex @@ -0,0 +1,52 @@ +\apisummary{ + shmemx\_team\_translate\_pe -- Translate a given virtual rank of one team + to its corresponding virtual rank in another team. +} + +\begin{apidefinition} + +\begin{Csynopsis} +int @\FuncDecl{shmemx\_team\_translate\_pe}@(shmem_team_t team1, int team1_pe, + shmem_team_t team2); +\end{Csynopsis} + +\begin{apiarguments} +\apiargument{IN}{team1}{A valid SHMEM team handle.} +\apiargument{IN}{team1\_pe}{A virtual team rank in team1.} +\apiargument{IN}{team2}{A valid SHMEM team handle.} +\end{apiarguments} + +\apidescription{ +The shmemx\_team\_translate\_pe function will translate a virtual rank of +one team to its corresponding virtual rank in another team. +Specifically, given the team1\_pe in team1, this function returns that +\ac{PE}'s virtual rank in team2. + +If SHMEM\_TEAM\_WORLD is provided as the team2 parameter, this function +acts as a global \ac{PE} rank translator and will return the corresponding +SHMEM\_TEAM\_WORLD rank. This may be useful when performing point-to- +point operations between \acp{PE} in a subset, as point-to-point operations +require the global (SHMEM\_TEAM\_WORLD) rank. This function requires +team1\_pe to be a member of team1. If team1\_pe is not a member of +team2, a value of -1 is returned. + +Error checking will be done to ensure valid team handles are provided. +All team handle errors are considered fatal and will result in the job +aborting with an informative error message. +} + +\apireturnvalues{ +Calling process's virtual rank in the provided team. +} + +\apinotes{ +By default, SHMEM creates two predefined teams that will be available +for use once the routine start\_pes has been called. These teams can be +referenced in the application by the constants SHMEM\_TEAM\_WORLD and +SHMEM\_TEAM\_NODE. Every \ac{PE} process is a member of the SHMEM\_TEAM\_WORLD +team, and its rank in SHMEM\_TEAM\_WORLD corresponds to the value of its +global \ac{PE} rank. The SHMEM\_TEAM\_NODE team only contains the set of PEs +that reside on the same node as the current PE. +} + +\end{apidefinition} diff --git a/main_spec.tex b/main_spec.tex index 1f5949c60..4f4e32e0c 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -277,6 +277,19 @@ \subsubsection{\textbf{SHMEM\_TEAM\_MY\_PE}}\label{subsec:shmem_team_my_pe} \subsubsection{\textbf{SHMEM\_TEAM\_N\_PES}}\label{subsec:shmem_team_n_pes} \input{content/shmem_team_n_pes.tex} + +\subsubsection{\textbf{SHMEM\_TEAM\_TRANSLATE}}\label{subsec:shmem_team_translate} +\input{content/shmem_team_translate.tex} + +\subsubsection{\textbf{SHMEM\_TEAM\_SPLIT\_STRIDED}}\label{subsec:shmem_team_split_strided} +\input{content/shmem_team_split_strided.tex} + +\subsubsection{\textbf{SHMEM\_TEAM\_SPLIT\_3D}}\label{subsec:shmem_team_split_3d} +\input{content/shmem_team_split_3d.tex} + +\subsubsection{\textbf{SHMEM\_TEAM\_DESTROY}}\label{subsec:shmem_team_destroy} +\input{content/shmem_team_destroy.tex} + \color{Black} \subsection{Point-To-Point Synchronization Routines}\label{subsec:p2p_intro} From 60219ca6728e2531d18b6cb88720295af909181f Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Fri, 22 Jun 2018 09:48:31 -0500 Subject: [PATCH 003/319] Manually merge shmem_test.tex include missed in automerge --- main_spec.tex | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/main_spec.tex b/main_spec.tex index 4f4e32e0c..01d710835 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -269,6 +269,9 @@ \subsubsection{\textbf{SHMEM\_ALLTOALLS}}\label{subsec:shmem_alltoalls} \input{content/shmem_alltoalls.tex} + + + \color{Green} \subsection{Teams or PE Subsets}\label{subsec:team} @@ -289,15 +292,20 @@ \subsubsection{\textbf{SHMEM\_TEAM\_SPLIT\_3D}}\label{subsec:shmem_team_split_3d \subsubsection{\textbf{SHMEM\_TEAM\_DESTROY}}\label{subsec:shmem_team_destroy} \input{content/shmem_team_destroy.tex} - \color{Black} + + + + \subsection{Point-To-Point Synchronization Routines}\label{subsec:p2p_intro} \input{content/p2p_sync_intro.tex} \subsubsection{\textbf{SHMEM\_WAIT\_UNTIL}}\label{subsec:shmem_wait_until} \input{content/shmem_wait_until.tex} +\subsubsection{\textbf{SHMEM\_TEST}}\label{subsec:shmem_test} +\input{content/shmem_test.tex} From 8f393c3d376dd413cb038a34a5fab8778af70115 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Fri, 22 Jun 2018 10:10:21 -0500 Subject: [PATCH 004/319] Added notes section to team split 3d --- content/shmem_team_split_3d.tex | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/content/shmem_team_split_3d.tex b/content/shmem_team_split_3d.tex index 49aae8e15..55d2bfa9e 100644 --- a/content/shmem_team_split_3d.tex +++ b/content/shmem_team_split_3d.tex @@ -63,6 +63,12 @@ None. } +\apinotes{ +Note that SHMEM team handles have local semantics only. That is, team +handles should not be stored in shared variables and used across other +processes. Doing so will result in unpredictable behavior. +} + \begin{apiexamples} \end{apiexamples} From ac209b49445e05d09d633fbc29667df89ec1cf8a Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 26 Jun 2018 10:05:11 -0500 Subject: [PATCH 005/319] Added teams handles and constants --- content/library_constants.tex | 12 ++++++++++++ content/library_handles.tex | 24 ++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/content/library_constants.tex b/content/library_constants.tex index 599da54db..26c99494b 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -39,6 +39,18 @@ See Section~\ref{subsec:thread_support} for more detail about its use. \tabularnewline \hline %% +\LibConstDecl{SHMEM\_TEAM\_NOCOLLECTIVE} & +The team creation option which specifies that the new team will not +be initialized with support for team collective operations. +See Section~\ref{subsec:team} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_TEAM\_NULL} & +Predefined constant that can be compared against handles of type +\CTYPE{shmem\_team\_t} to determine if they refer to a valid team. +See Section~\ref{subsec:team} for more detail about its use. +\tabularnewline \hline +%% \LibConstDecl{SHMEM\_CTX\_SERIALIZED} & The context creation option which specifies that the given context is shareable but will not be used by multiple threads concurrently. diff --git a/content/library_handles.tex b/content/library_handles.tex index d2ec45a48..50e28b59f 100644 --- a/content/library_handles.tex +++ b/content/library_handles.tex @@ -13,6 +13,30 @@ \tabularnewline \hline \endhead %% +\LibHandleDecl{SHMEM\_TEAM\_WORLD} & +Handle of type \CTYPE{shmem\_team\_t} that corresponds to the +default team of all pes in the OpenSHMEM program. All point-to-point +communication operations and synchronizations that do not specify a team +are performed on the default team. +See Section~\ref{subsec:team} for more detail about its use. +\tabularnewline \hline +%% +\LibHandleDecl{SHMEM\_TEAM\_NODE} & +Handle of type \CTYPE{shmem\_team\_t} that corresponds a team of pes +which share node level resources, such as shared memory, network +interfaces, etc. When this handle is used by some pe, it will refer +to the node level team containing that pe. +See Section~\ref{subsec:team} for more detail about its use. +\tabularnewline \hline +%% +\LibHandleDecl{SHMEM\_TEAM\_WORLD} & +Handle of type \CTYPE{shmem\_team\_t} that corresponds to the +default team of all pes in the OpenSHMEM program. All point-to-point +communication operations and synchronizations that do not specify a team +are performed on the default team. +See Section~\ref{subsec:team} for more detail about its use. +\tabularnewline \hline +%% \LibHandleDecl{SHMEM\_CTX\_DEFAULT} & Handle of type \CTYPE{shmem\_ctx\_t} that corresponds to the default communication context. All point-to-point communication operations From de0b60c08d009a3ab2959bdaa55bb0fb6f35505d Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 26 Jun 2018 10:51:34 -0500 Subject: [PATCH 006/319] Added API call shmem_team_get_options --- content/shmem_team_get_options.tex | 44 ++++++++++++++++++++++++++++++ main_spec.tex | 3 ++ 2 files changed, 47 insertions(+) create mode 100644 content/shmem_team_get_options.tex diff --git a/content/shmem_team_get_options.tex b/content/shmem_team_get_options.tex new file mode 100644 index 000000000..b878da372 --- /dev/null +++ b/content/shmem_team_get_options.tex @@ -0,0 +1,44 @@ +\apisummary{ + shmem\_team\_get\_options returns the options flags describing a given team +} + +\begin{apidefinition} + +\begin{Csynopsis} +long @\FuncDecl{shmem\_team\_get\_options}@(shmem_team_t team); +\end{Csynopsis} + +\begin{apiarguments} +\apiargument{IN}{team}{A valid SHMEM team handle.} +\end{apiarguments} + +\apidescription{ +\FUNC{shmem\_team\_get\_options} returns a long unsigned value containing +all of the options which describe the given team. Options are requested when +new teams are created in the various \FUNC{shmem\_team\_split\_*} functions. +Whichever of the requested options are applied to the team by the library +implementation will be returned by \FUNC{shmem\_team\_get\_options}. + +All processes in the team will get back the same value for the team options. + +Error checking will be done to ensure a valid team handle is provided. +All errors are considered fatal and will result in the job aborting +with an informative error message. +} + +\apireturnvalues{ +The set of options applied to the given team. Multiple options are combined +with a bitwise OR and can be extracted with a bitwise AND. A return value of +\CONST{0} implies that the team uses all default options. +} + +\apinotes{ +A use case for this function is to determine if a given team will +support collective operations by testing for the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} +option. When teams are created without support for collectives, they may still use +point to point operations to communicate and synchronize. So programmers may wish +to design frameworks with functions that provide alternative algorithms +for teams based on whether they do or do not support collectives. +} + +\end{apidefinition} diff --git a/main_spec.tex b/main_spec.tex index 01d710835..efeb0944b 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -281,6 +281,9 @@ \subsubsection{\textbf{SHMEM\_TEAM\_MY\_PE}}\label{subsec:shmem_team_my_pe} \subsubsection{\textbf{SHMEM\_TEAM\_N\_PES}}\label{subsec:shmem_team_n_pes} \input{content/shmem_team_n_pes.tex} +\subsubsection{\textbf{SHMEM\_TEAM\_GET\_OPTIONS}}\label{subsec:shmem_team_get_options} +\input{content/shmem_team_get_options.tex} + \subsubsection{\textbf{SHMEM\_TEAM\_TRANSLATE}}\label{subsec:shmem_team_translate} \input{content/shmem_team_translate.tex} From c4ec325a2458482eb57c79c89b8f5fe313014fc1 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 26 Jun 2018 12:11:53 -0500 Subject: [PATCH 007/319] Added API description for shmem_team_broadcast --- content/shmem_team_broadcast.tex | 88 ++++++++++++++++++++++++++++++++ main_spec.tex | 3 ++ 2 files changed, 91 insertions(+) create mode 100644 content/shmem_team_broadcast.tex diff --git a/content/shmem_team_broadcast.tex b/content/shmem_team_broadcast.tex new file mode 100644 index 000000000..aeca874ea --- /dev/null +++ b/content/shmem_team_broadcast.tex @@ -0,0 +1,88 @@ +\apisummary{ + Broadcasts a block of data from one \ac{PE} to one or more destination + \acp{PE}. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_team\_broadcast32}@(shmem_team_t team, void *dest, const void *source, size_t nelems, int PE_root); +void @\FuncDecl{shmem\_team\_broadcast64}@(shmem_team_t team, void *dest, const void *source, size_t nelems, int PE_root); +\end{Csynopsis} + +\begin{apiarguments} + +\apiargument{IN}{team}{A valid SHMEM team handle to a team which has been created with support for collective operations.} +\apiargument{OUT}{dest}{A symmetric data object.} +\apiargument{IN}{source}{A symmetric data object that can be of any data type + that is permissible for the \dest{} argument.} +\apiargument{IN}{nelems}{The number of elements in \source. For + \FUNC{shmem\_team\_broadcast32}, this is the number of + 32-bit halfwords. nelems must be of type \VAR{size\_t} in \Cstd.} +\apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to + the team, from which the data is copied. Must be greater than or equal to + 0 and less than the result of calling \FUNC{shmem\_team\_n\_pes(team)}. + \VAR{PE\_root} must be of type integer.} + +\end{apiarguments} + +\apidescription{ + \openshmem broadcast routines are collective routines over an existing team. + They copy data object \source{} on the processor specified by \VAR{PE\_root} + and store the values at \dest{} on the other \acp{PE} that are members of the + team. The data is not copied to the \dest{} area on the root \ac{PE}. + + As with all \openshmem team collective routines, each of these routines assumes that + only \acp{PE} in the given team call the routine. If a \ac{PE} not in the + team calls an \openshmem team collective routine, the behavior is undefined. + + If the team has been created with the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} option, + it will not have the required support structures to complete this routine. If + such a team is passed to this or any other team collective routine, the behavior + is undefined. + + As with all \openshmem routines where the operation occurs for a given team - + either when the team is an argument to the routine, or when the team is an attribute + of the context argument to a routine - the \ac{PE} numbers are relative to the team, + and must be in the range of 0 to the result of \FUNC{shmem\_team\_n\_pes(team)}. + + The values of the argument \VAR{PE\_root} must be the same value on all \acp{PE} in + the team. The same \dest{} and \source{} data objects must be passed by all \acp{PE} + in the team. + + Upon return from a broadcast routine, the following are true for the local + \ac{PE}: + \begin{itemize} + \item If the current \ac{PE} is not the root \ac{PE}, + the \dest{} data object is updated. + \item The \source{} data object may be safely reused. + \end{itemize} +} + +\apidesctable{ +The \dest{} and \source{} data objects must conform to certain typing +constraints, which are as follows: +}{Routine}{Data type of \VAR{dest} and \VAR{source}} + +\apitablerow{shmem\_broadcast64}{Any noncharacter + type that has an element size of \CONST{64} bits. No + \CorCpp{} structures are allowed.} +\apitablerow{shmem\_broadcast32}{Any noncharacter + type that has an element size of \CONST{32} bits. No + \CorCpp{} structures are allowed.} + +\apireturnvalues{ + None. +} + +\apinotes{ + All \openshmem team collective routines use symmetric data structures associated + with the team to synchronize and share data. By default, new teams that result from + split operations will have these structures. + + Multiple calls to the same collective routine for the same team by different threads + must avoid any simultaneous updates to these structures. In general, this will mean + that threads will need to serialize access to teams. +} + +\end{apidefinition} diff --git a/main_spec.tex b/main_spec.tex index 01d710835..cbb4036b5 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -253,6 +253,9 @@ \subsubsection{\textbf{SHMEM\_SYNC\_ALL}}\label{subsec:shmem_sync_all} \subsubsection{\textbf{SHMEM\_SYNC}}\label{subsec:shmem_sync} \input{content/shmem_sync.tex} +\subsubsection{\textbf{SHMEM\_TEAM\_BROADCAST}}\label{subsec:shmem_team_broadcast} +\input{content/shmem_team_broadcast.tex} + \subsubsection{\textbf{SHMEM\_BROADCAST}}\label{subsec:shmem_broadcast} \input{content/shmem_broadcast.tex} From 4adb5469563a07981a02d96a5c77a66e9b4853c8 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 26 Jun 2018 15:56:15 -0500 Subject: [PATCH 008/319] Update team_split_strided description and params --- content/shmem_team_split_strided.tex | 69 ++++++++++++++++++---------- 1 file changed, 46 insertions(+), 23 deletions(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index b31805c8d..beb8dee39 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -1,48 +1,57 @@ \apisummary{ -shmemx\_team\_split\_strided is a collective routine to partition the existing -parent team into a new SHMEM team based on the \ac{PE}triplet (PE\_start, -PE\_stride, and PE\_size) supplied to the function. -} +shmem\_team\_split\_strided is a collective routine to create a new SHMEM team from +a subset of the existing parent team \acp{PE}, where the subset is defined by the +\ac{PE} triplet (PE\_start, PE\_stride, and PE\_size) supplied to the function.} \begin{apidefinition} \begin{Csynopsis} -void @\FuncDecl{shmemx\_team\_split\_strided}@(shmem_team_t parent_team, -int PE_start, int PE_stride, int PE_size, shmem_team_t *newteam); +void @\FuncDecl{shmem\_team\_split\_strided}@(shmem_team_t parent_team, long options, +int PE_start, int PE_stride, int PE_size, shmem_team_t *new_team); \end{Csynopsis} \begin{apiarguments} \apiargument{IN}{parent\_team}{A valid SHMEM team. The predefined teams -SHMEM\_TEAM\_WORLD or SHMEM\_TEAM\_NODE may be used, or any team created by the -users.} +SHMEM\_TEAM\_WORLD or SHMEM\_TEAM\_NODE may be used, or any team created by the user.} + +\apiargument{IN}{options}{The set of options requested for the new\_team. +Multiple options may be requested by combining them with a bitwise OR operation; +otherwise, \CONST{0} can be given if no options are requested.} -\apiargument{IN}{PE\_start}{The lowest virtual \ac{PE} number of the -parent\_team of \acp{PE}.} +\apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the subset of \acp{PE} from +the parent\_team that will form the new\_team} -\apiargument{IN}{PE\_stride}{The stride between consecutive virtual \ac{PE} -numbers in the parent\_team.} +\apiargument{IN}{PE\_stride}{The stride between team \ac{PE} +numbers in the parent\_team that comprise the subset of \acp{PE} that will form +the new team.} -\apiargument{IN}{PE\_size}{The number of \acp{PE} in the defined set.} +\apiargument{IN}{PE\_size}{The number of \acp{PE} from the parent\_team in the subset +of \acp{PE} that will form the new\_team.} + +\apiargument{OUT}{new\_team}{A new SHMEM team handle, representing a \ac{PE} +subset of all the \acp{PE} in the parent\_team that is created from +the \ac{PE} triplet provided.} -\apiargument{OUT}{newteam}{A new SHMEM team handle, representing a \ac{PE} -subset of all the \acp{PE}, that is created from the \ac{PE} triplet provided.} \end{apiarguments} \apidescription{ -The shmemx\_team\_split\_strided function is a collective routine. -It partitions the existing parent team into a new SHMEM team based on -the \ac{PE} triplet (PE\_start, PE\_stride, and PE\_size) supplied to -the function. It is important to note the use of the less restrictive +The shmem\_team\_split\_strided function is a collective routine. +It creates a new SHMEM team from a subset of the existing parent team \acp{PE}, +where the subset is defined by the \ac{PE} triplet (PE\_start, PE\_stride, and PE\_size) +supplied to the function. + +It is important to note the use of the less restrictive PE\_stride argument instead of logPE\_stride. This method of creating a team with an arbitrary set of \acp{PE} is inherently restricted by its parameters, but allows for many additional use-cases over using a logPE\_stride parameter, and may provide an easier transition for -existing SHMEM programs to create and use SHMEM teams. This function -must be called by all processes contained in the SHMEM triplet +existing SHMEM programs to create and use SHMEM teams. + +This function must be called by all processes contained in the \ac{PE} triplet specification. It may be called by additional \acp{PE} not included in the -triplet specification, but for those processes a newteam value of +triplet specification, but for those processes a new\_team value of SHMEM\_TEAM\_NULL is returned. All calling processes must provide the -same values for the \ac{PE} triplet. This function will return a newteam +same values for the \ac{PE} triplet. This function will return a new\_team containing the \ac{PE} subset specified by the triplet, and ordered by the existing global \ac{PE} rank value. None of the parameters need to reside in symmetric memory. @@ -53,6 +62,20 @@ All errors are considered fatal and will result in the job aborting with an informative error message. + +The following options can be supplied during team split to restrict +team functions and enable performance optimizations. When using a given +team, the application must comply with the requirements of all options +set on that team; otherwise, the behavior is undefined. +No options are enabled on \CONST{SHMEM\_TEAM\_WORLD} or \CONST{SHMEM\_TEAM\_NODE}. + + \apitablerow{\LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE}}{ + The new team will not be created with the necessary support + structures to enable team based collectives. + This will typically allow implementations to speed up team creation + and reduce \openshmem library footprint for teams with this option. + This option will not prevent the new team from using atomics or + other non-collective team based operations.} } \apireturnvalues{ From 2492205339c6595c1194add7653791ef6ea5eda0 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 26 Jun 2018 16:24:48 -0500 Subject: [PATCH 009/319] Fix reference format on SHMEM_TEAM_NULL --- content/shmem_team_split_strided.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index beb8dee39..d0e26ac7f 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -50,7 +50,7 @@ This function must be called by all processes contained in the \ac{PE} triplet specification. It may be called by additional \acp{PE} not included in the triplet specification, but for those processes a new\_team value of -SHMEM\_TEAM\_NULL is returned. All calling processes must provide the +\CONST{SHMEM\_TEAM\_NULL} is returned. All calling processes must provide the same values for the \ac{PE} triplet. This function will return a new\_team containing the \ac{PE} subset specified by the triplet, and ordered by the existing global \ac{PE} rank value. None of the parameters need to reside in From 106239af5ef1743a4cffb19049d47f69c76138ce Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 26 Jun 2018 17:12:35 -0500 Subject: [PATCH 010/319] Minor fix split strided formatting --- content/shmem_team_split_strided.tex | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index d0e26ac7f..45ca5ccac 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -1,6 +1,6 @@ \apisummary{ -shmem\_team\_split\_strided is a collective routine to create a new SHMEM team from -a subset of the existing parent team \acp{PE}, where the subset is defined by the +Create a new SHMEM team from a subset of the existing parent team \acp{PE}, +where the subset is defined by the \ac{PE} triplet (PE\_start, PE\_stride, and PE\_size) supplied to the function.} \begin{apidefinition} @@ -35,7 +35,7 @@ \end{apiarguments} \apidescription{ -The shmem\_team\_split\_strided function is a collective routine. +The \FUNC{shmem\_team\_split\_strided} function is a collective routine. It creates a new SHMEM team from a subset of the existing parent team \acp{PE}, where the subset is defined by the \ac{PE} triplet (PE\_start, PE\_stride, and PE\_size) supplied to the function. @@ -45,7 +45,7 @@ creating a team with an arbitrary set of \acp{PE} is inherently restricted by its parameters, but allows for many additional use-cases over using a logPE\_stride parameter, and may provide an easier transition for -existing SHMEM programs to create and use SHMEM teams. +existing \openshmem programs to create and use \openshmem teams. This function must be called by all processes contained in the \ac{PE} triplet specification. It may be called by additional \acp{PE} not included in the @@ -53,7 +53,7 @@ \CONST{SHMEM\_TEAM\_NULL} is returned. All calling processes must provide the same values for the \ac{PE} triplet. This function will return a new\_team containing the \ac{PE} subset specified by the triplet, and ordered by the -existing global \ac{PE} rank value. None of the parameters need to reside in +existing global \ac{PE} number. None of the parameters need to reside in symmetric memory. Error checking will be done to ensure a valid \ac{PE} triplet is provided, From 4c6581756a35e6631fcd04c40f521b79d1c7e5c5 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 26 Jun 2018 17:13:10 -0500 Subject: [PATCH 011/319] Update formatting and terms on shmem_team_my_pe --- content/shmem_team_my_pe.tex | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/content/shmem_team_my_pe.tex b/content/shmem_team_my_pe.tex index 1d423a8ee..6265166be 100644 --- a/content/shmem_team_my_pe.tex +++ b/content/shmem_team_my_pe.tex @@ -1,23 +1,23 @@ \apisummary{ - shmem\_team\_my\_pe returns the calling process's virtual rank in the - provided team. + Returns the number of calling \ac{PE} within the provided team. } \begin{apidefinition} \begin{Csynopsis} -int @\FuncDecl{shmemx\_team\_my\_pe}@(shmem_team_t newteam); +int @\FuncDecl{shmem\_team\_my\_pe}@(shmem_team_t team); \end{Csynopsis} \begin{apiarguments} -\apiargument{IN}{newteam}{A valid SHMEM team handle.} +\apiargument{IN}{team}{A valid SHMEM team handle.} \end{apiarguments} \apidescription{ -The shmemx\_team\_my\_pe function returns the calling process's virtual -rank in the provided team. The rank will be a value between 0 and N-1, -for a team of size N. Different members of a team cannot have the same -rank. For the team SHMEM\_TEAM\_WORLD, this will return shmem\_my\_pe. +The \FUNC{shmem\_team\_my\_pe} function returns the number of calling \ac{PE} within the +provided team. The number will be a value between 0 and N-1, +for a team of size N. Each member of the team has a unique number. +For the team \CONST{SHMEM\_TEAM\_WORLD}, this will return the same value +as \FUNC{shmem\_my\_pe}. Error checking will be done to ensure a valid team handle is provided. All errors are considered fatal, and will result in the job aborting @@ -25,17 +25,17 @@ } \apireturnvalues{ -Calling process's virtual rank in the provided team. +The number of the calling \ac{PE} within the provided team. } \apinotes{ -By default, SHMEM creates two predefined teams that will be available -for use once the routine start\_pes has been called. These teams can be -referenced in the application by the constants SHMEM\_TEAM\_WORLD and -SHMEM\_TEAM\_NODE. Every PE process is a member of the SHMEM\_TEAM\_WORLD -team, and its rank in SHMEM\_TEAM\_WORLD corresponds to the value of its -global PE rank. The SHMEM\_TEAM\_NODE team only contains the set of PEs -that reside on the same node as the current PE. +By default, \openshmem creates two predefined teams that will be available +for use once the routine \FUNC{shmem\_init} has been called. These teams can be +referenced in the application by the handles \CONST{SHMEM\_TEAM\_WORLD} and +\CONST{SHMEM\_TEAM\_NODE}. Every PE process is a member of the \CONST{SHMEM\_TEAM\_WORLD} +team, and its number in \CONST{SHMEM\_TEAM\_WORLD} corresponds to the value of its +global \ac{PE} number. The \CONST{SHMEM\_TEAM\_NODE} team contains only the set of \acp{PE} +that reside on the same node as the current \ac{PE}. } \end{apidefinition} From 2946024ab6822fe4cdc94e4ce76ee603ec28ff24 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 26 Jun 2018 17:13:29 -0500 Subject: [PATCH 012/319] Update formatting and terms on shmem_team_n_pes --- content/shmem_team_n_pes.tex | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/content/shmem_team_n_pes.tex b/content/shmem_team_n_pes.tex index b4278cafe..791958d83 100644 --- a/content/shmem_team_n_pes.tex +++ b/content/shmem_team_n_pes.tex @@ -1,22 +1,24 @@ \apisummary{ - shmem\_team\_n\_pes returns the total number of \acp{PE} in the provided team. + Returns the total number of \acp{PE} in the provided team. } \begin{apidefinition} \begin{Csynopsis} -int @\FuncDecl{shmemx\_team\_n\_pes}@(shmem_team_t newteam); +int @\FuncDecl{shmem\_team\_n\_pes}@(shmem_team_t team); \end{Csynopsis} \begin{apiarguments} -\apiargument{IN}{newteam}{A valid SHMEM team handle.} +\apiargument{IN}{team}{A valid SHMEM team handle.} \end{apiarguments} \apidescription{ -The shmemx\_team\_n\_pes function returns the number of processes in the -team. This will always be a value between 1 and the total number of -\acp{PE}. For the team SHMEM\_TEAM\_WORLD, this will return shmem\_n\_pes. -Every team must have a least one member. All processes in the team +The \FUNC{shmem\_team\_n\_pes} function returns the number of \acp{PE} in the +team. This will always be a value between 1 and N, where N is the total number of +\acp{PE} accessible to the \openshmem program. For the team \CONST{SHMEM\_TEAM\_WORLD}, +this will return the same value as \FUNC{shmem\_n\_pes}. + +Every team must have a least one member. All \acp{PE} in the team will get back the same value for the team size. Error checking will be done to ensure a valid team handle is provided. @@ -29,12 +31,12 @@ } \apinotes{ -By default, SHMEM creates two predefined teams that will be available -for use once the routine start\_pes has been called. These teams can be -referenced in the application by the constants SHMEM\_TEAM\_WORLD and -SHMEM\_TEAM\_NODE. Every \ac{PE}process is a member of the SHMEM\_TEAM\_WORLD -team, and its rank in SHMEM\_TEAM\_WORLD corresponds to the value of its -global \ac{PE}rank. The SHMEM\_TEAM\_NODE team only contains the set of \acp{PE} +By default, \openshmem creates two predefined teams that will be available +for use once the routine \FUNC{shmem\_init} has been called. These teams can be +referenced in the application by the constants \CONST{SHMEM\_TEAM\_WORLD} and +\CONST{SHMEM\_TEAM\_NODE}. Every \ac{PE}process is a member of the \CONST{SHMEM\_TEAM\_WORLD} +team, and its number in \CONST{SHMEM\_TEAM\_WORLD} corresponds to the value of its +global \ac{PE} number. The \CONST{SHMEM\_TEAM\_NODE} team contains only the set of \acp{PE} that reside on the same node as the current PE. } From fa5762840085ff24572172f699616d2b1abeefc6 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 26 Jun 2018 17:13:56 -0500 Subject: [PATCH 013/319] Update formatting and terms on shmem_team_split_3d --- content/shmem_team_split_3d.tex | 65 ++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 18 deletions(-) diff --git a/content/shmem_team_split_3d.tex b/content/shmem_team_split_3d.tex index 55d2bfa9e..fac270017 100644 --- a/content/shmem_team_split_3d.tex +++ b/content/shmem_team_split_3d.tex @@ -1,22 +1,25 @@ \apisummary{ -shmemx\_team\_split\_3d - partitions an existing parent team into three subgroups, -based on the three-dimensional Cartesian space defined by the triplet (xrange, -yrange, and zrange) describing the size of the Cartesian space in X, Y, and Z -dimensions. -} +Create up to three new teams +by splitting an existing parent team into up to three subsets based on a +2D or 3D Cartesian space defined by the triplet (xrange, yrange, and zrange) +describing the size of the Cartesian space in X, Y, and Z dimensions.} \begin{apidefinition} \begin{Csynopsis} -void @\FuncDecl{shmemx\_team\_split\_3d}@(shmem_team_t parent_team, int xrange, -int yrange, int zrange, shmem_team_t *xaxis_team, shmem_team_t *yaxis_team, +void @\FuncDecl{shmem\_team\_split\_3d}@(shmem_team_t parent_team, long options, +int xrange, int yrange, int zrange, shmem_team_t *xaxis_team, shmem_team_t *yaxis_team, shmem_team_t *zaxis_team); \end{Csynopsis} \begin{apiarguments} \apiargument{IN}{parent\_team}{A valid SHMEM team. The predefined teams SHMEM\_TEAM\_WORLD or SHMEM\_TEAM\_NODE may be used, or any team created by the -users.} +user.} + +\apiargument{IN}{options}{The set of options requested for the new teams. +Multiple options may be requested by combining them with a bitwise OR operation; +otherwise, \CONST{0} can be given if no options are requested.} \apiargument{IN}{xrange}{A non-negative integer representing the number of elements in the first dimension.} @@ -25,7 +28,8 @@ elements in the second dimension.} \apiargument{IN}{zrange}{A non-negative integer representing the number of -elements in the third dimension.} +elements in the third dimension. \CONST{0} can be given if the defined space +has no third dimension.} \apiargument{OUT}{xaxis\_team}{A new \ac{PE} team handle representing a \ac{PE} subset consisting of all the \acp{PE} that are in the same row in the X-axis.} @@ -39,24 +43,49 @@ \end{apiarguments} \apidescription{ -The shmemx\_team\_split\_3d routine is a collective routine. It -partitions an existing parent team into three subgroups, based on the -three-dimensional Cartesian space defined by the triplet (xrange, -yrange, and zrange) describing the size of the Cartesian space in X, -Y, and Z dimensions. Each subgroup contains all \acp{PE} that are in the same -dimension, along the X-axis, Y-axis and Z-axis. Within each subgroup, -the \acp{PE} are ranked based on the position of the \ac{PE} with respect to its +The shmem\_team\_split\_3d routine is a collective routine. It creates up to three new teams +by splitting an existing parent team into up to three subsets based on a +1D, 2D or 3D Cartesian space defined by the triplet (xrange, yrange, and zrange) +describing the size of the Cartesian space in X, Y, and Z dimensions. + +A 2D Cartesian space may be defined by passing \CONST{0} for the zrange +parameter. In this case, the zaxis\_team argument may be passed a NULL +pointer. If it is not passed as a NULL pointer, the zaxis\_team will be returned +as \CONST{SHMEM\_TEAM\_NULL}. + +Each subset contains all \acp{PE} that are in the same +dimension, along the X-axis, Y-axis and Z-axis. Within each subset, +the \acp{PE} are numbered based on the position of the \ac{PE} with respect to its dimension in three-dimensional Cartesian space. Any valid \ac{PE} team can be used as the parent team. This routine must be called by all \acp{PE} in the parent team. The value of the triplets must be non-negative, and the size of the parent team should be greater than or -equal to the size of the three-dimensional Cartesian space. None of the -parameters need to reside in symmetric memory. +equal to the number of discrete (x,y,z) points in the Cartesian space. +None of the parameters need to reside in symmetric memory. + +In the event that the Cartesian space contains less points than exist \acp{PE} +in the parent\_team, the first N \acp{PE} in the parent team will be used +to form the new teams, where N is equal to the number of points in the +Cartesian space. Error checking will be done to ensure a valid team handle is provided. All errors are considered fatal and will result in the job aborting with an informative error message. + +The following options can be supplied during team split to restrict +team functions and enable performance optimizations. When using a given +team, the application must comply with the requirements of all options +set on that team; otherwise, the behavior is undefined. +No options are enabled on \CONST{SHMEM\_TEAM\_WORLD} or \CONST{SHMEM\_TEAM\_NODE}. + + \apitablerow{\LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE}}{ + The new team will not be created with the necessary support + structures to enable team based collectives. + This will typically allow implementations to speed up team creation + and reduce \openshmem library footprint for teams with this option. + This option will not prevent the new team from using atomics or + other non-collective team based operations.} } \apireturnvalues{ From acbcaf754aa52c6cdad65da27a3f56e5d984bbb2 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 26 Jun 2018 17:14:32 -0500 Subject: [PATCH 014/319] Update formatting and terms on shmem_team_translate --- content/shmem_team_translate.tex | 49 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/content/shmem_team_translate.tex b/content/shmem_team_translate.tex index cfeb4ed9d..7fff9009d 100644 --- a/content/shmem_team_translate.tex +++ b/content/shmem_team_translate.tex @@ -1,34 +1,33 @@ \apisummary{ - shmemx\_team\_translate\_pe -- Translate a given virtual rank of one team - to its corresponding virtual rank in another team. + Translates a given \ac{PE} number to the corresponding \ac{PE} number in another team. } \begin{apidefinition} \begin{Csynopsis} -int @\FuncDecl{shmemx\_team\_translate\_pe}@(shmem_team_t team1, int team1_pe, - shmem_team_t team2); +int @\FuncDecl{shmem\_team\_translate\_pe}@(shmem_team_t src_team, int src_pe, + shmem_team_t dest_team); \end{Csynopsis} \begin{apiarguments} -\apiargument{IN}{team1}{A valid SHMEM team handle.} -\apiargument{IN}{team1\_pe}{A virtual team rank in team1.} -\apiargument{IN}{team2}{A valid SHMEM team handle.} +\apiargument{IN}{src\_team}{A valid SHMEM team handle.} +\apiargument{IN}{src\_pe}{A \ac{PE} number in src\_team.} +\apiargument{IN}{dest\_team}{A valid SHMEM team handle.} \end{apiarguments} \apidescription{ -The shmemx\_team\_translate\_pe function will translate a virtual rank of -one team to its corresponding virtual rank in another team. -Specifically, given the team1\_pe in team1, this function returns that -\ac{PE}'s virtual rank in team2. - -If SHMEM\_TEAM\_WORLD is provided as the team2 parameter, this function -acts as a global \ac{PE} rank translator and will return the corresponding -SHMEM\_TEAM\_WORLD rank. This may be useful when performing point-to- +The \FUNC{shmem\_team\_translate\_pe} function will translate a given \ac{PE} number +to the corresponding \ac{PE} number in another team. +Specifically, given the src\_pe in src\_team, this function returns that +\ac{PE}'s number in dest\_team. If src\_pe is not a member of +dest\_team, a value of -1 is returned. + +If \CONST{SHMEM\_TEAM\_WORLD} is provided as the dest\_team parameter, this function +acts as a global \ac{PE} number translator and will return the corresponding +\CONST{SHMEM\_TEAM\_WORLD} number. This may be useful when performing point-to- point operations between \acp{PE} in a subset, as point-to-point operations -require the global (SHMEM\_TEAM\_WORLD) rank. This function requires -team1\_pe to be a member of team1. If team1\_pe is not a member of -team2, a value of -1 is returned. +that do not take a context argument require the global \CONST{SHMEM\_TEAM\_WORLD} +\ac{PE} number. Error checking will be done to ensure valid team handles are provided. All team handle errors are considered fatal and will result in the job @@ -36,16 +35,16 @@ } \apireturnvalues{ -Calling process's virtual rank in the provided team. +The specified \ac{PE}'s number in the dest\_team. } \apinotes{ -By default, SHMEM creates two predefined teams that will be available -for use once the routine start\_pes has been called. These teams can be -referenced in the application by the constants SHMEM\_TEAM\_WORLD and -SHMEM\_TEAM\_NODE. Every \ac{PE} process is a member of the SHMEM\_TEAM\_WORLD -team, and its rank in SHMEM\_TEAM\_WORLD corresponds to the value of its -global \ac{PE} rank. The SHMEM\_TEAM\_NODE team only contains the set of PEs +By default, \openshmem creates two predefined teams that will be available +for use once the routine \FUNC{shmem\_init} has been called. These teams can be +referenced in the application by the constants \CONST{SHMEM\_TEAM\_WORLD} and +\CONST{SHMEM\_TEAM\_NODE}. Every \ac{PE}process is a member of the \CONST{SHMEM\_TEAM\_WORLD} +team, and its number in \CONST{SHMEM\_TEAM\_WORLD} corresponds to the value of its +global \ac{PE} number. The \CONST{SHMEM\_TEAM\_NODE} team contains only the set of \acp{PE} that reside on the same node as the current PE. } From 282aaf13ae3b51d6144e87e99a974ae024e5e75c Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 26 Jun 2018 17:15:02 -0500 Subject: [PATCH 015/319] Update formatting and terms on shmem_team_destroy --- content/shmem_team_destroy.tex | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/content/shmem_team_destroy.tex b/content/shmem_team_destroy.tex index 6496c1204..4f2b49dbd 100644 --- a/content/shmem_team_destroy.tex +++ b/content/shmem_team_destroy.tex @@ -1,27 +1,27 @@ \apisummary{ - shmem\_team\_destroy function destroys existing team. + Destroys existing team. } \begin{apidefinition} \begin{Csynopsis} -int @\FuncDecl{shmemx\_team\_destroy}@(shmem_team_t newteam); +int @\FuncDecl{shmem\_team\_destroy}@(shmem_team_t team); \end{Csynopsis} \begin{apiarguments} -\apiargument{IN}{newteam}{A valid SHMEM team handle.} +\apiargument{IN}{team}{A valid SHMEM team handle.} \end{apiarguments} \apidescription{ -The shmemx\_team\_destroy function destroys an existing team. This is a +The shmem\_team\_destroy function destroys an existing team. This is a collective call, in which every member of the team being destroyed needs to participate. This will free all internal memory structures associated with the team and invalidate the team handle. Upon return, the team -handle is set to SHMEM\_TEAM\_NULL, after which it can no longer be -used for team collective calls. +handle is set to \CONST{SHMEM\_TEAM\_NULL}, after which it can no longer be +used for team API calls. -It is considered erroneous to free SHMEM\_TEAM\_WORLD or -SHMEM\_TEAM\_NODE. Error checking will be done to ensure a valid +It is considered erroneous to free \CONST{SHMEM\_TEAM\_WORLD} or +\CONST{SHMEM\_TEAM\_NODE}. Error checking will be done to ensure a valid team handle is provided. All errors are considered fatal, and will result in the job aborting with an informative error message. } From d2317165e578e6dbd6c2b6d71f9804c545add52b Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 27 Jun 2018 11:27:15 -0500 Subject: [PATCH 016/319] Added shmem_team_collect section --- content/shmem_team_collect.tex | 99 ++++++++++++++++++++++++++++++++++ main_spec.tex | 3 ++ 2 files changed, 102 insertions(+) create mode 100644 content/shmem_team_collect.tex diff --git a/content/shmem_team_collect.tex b/content/shmem_team_collect.tex new file mode 100644 index 000000000..7cf03cf20 --- /dev/null +++ b/content/shmem_team_collect.tex @@ -0,0 +1,99 @@ +\apisummary{ + Concatenates blocks of data from multiple \acp{PE} int a team to an array in every + \ac{PE} in the team. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_team\_collect32}@(shmem_team_t team, void *dest, const void *source, size_t nelems); +void @\FuncDecl{shmem\_team\_collect64}@(shmem_team_t team, void *dest, const void *source, size_t nelems); +\end{Csynopsis} + +\begin{apiarguments} + +\apiargument{IN}{team}{A valid \openshmem team handle to a team which has been + created without disabling support for collective operations.} +\apiargument{OUT}{dest}{A symmetric array large enough + to accept the concatenation of the \source{} arrays on all \acp{PE} in the team. + See table below in this description for allowable data types.} +\apiargument{IN}{source}{A symmetric data object that can be of any type permissible + for the \dest{} argument.} +\apiargument{IN}{nelems}{The number of elements in the \source{} array. \VAR{nelems} + must be of type \VAR{size\_t}.} + +\end{apiarguments} + +\apidescription{ + \openshmem \FUNC{team\_collect} and \FUNC{team\_fcollect} are collective routines + over an existing team. These routines concatenate \VAR{nelems} + \CONST{64}-bit or \CONST{32}-bit data items from the \source{} array into the + \dest{} array, over all \acp{PE} in the specified \VAR{team} in processor number order. + The resultant \dest{} array contains the contribution from \ac{PE} with \VAR{team} number 0 + first, then the contribution from \ac{PE} with \VAR{team} number 1, and so on. + The collected result is written to the \dest{} array for all \acp{PE} in the team. + + The \FUNC{fcollect} routines require that \VAR{nelems} be the same value in all + participating \acp{PE}, while the \FUNC{collect} routines allow \VAR{nelems} to + vary from \ac{PE} to \ac{PE}. + + As with all \openshmem team collective routines, each of these routines assumes that + only \acp{PE} in the given team call the routine. If a \ac{PE} not in the + team calls an \openshmem team collective routine, the behavior is undefined. + + If the team has been created with the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} option, + it will not have the required support structures to complete this routine. If + such a team is passed to this or any other team collective routine, the behavior + is undefined. + + As with all \openshmem routines where the operation occurs for a given team - + either when the team is an argument to the routine, or when the team is an attribute + of the context argument to a routine - the \ac{PE} numbers are relative to the team, + and must be in the range of 0 to N-1, where N is the result of \FUNC{shmem\_team\_n\_pes(team)}. + + The same \dest{} and \source{} data objects must be passed by all \acp{PE} + in the team. + + Upon return from a collective routine, the following are true for the local + \ac{PE}: + \begin{itemize} + \item The \dest{} array is updated. + \item The \source{} array may be safely reused. + \end{itemize} + + Error checking will be done to ensure a valid team handle is provided. + All errors are considered fatal and will result in the job aborting + with an informative error message. +} + +\apidesctable{ +The \dest{} and \source{} data objects must conform to certain typing +constraints, which are as follows: +}{Routine}{Data type of \VAR{dest} and \VAR{source}} + +\apitablerow{shmem\_team\_collect64, shmem\_team\_fcollect64}{Any noncharacter + type that has an element size of \CONST{64} bits. + \CorCpp{} structures are NOT allowed.} +\apitablerow{shmem\_team\_collect32, shmem\_team\_fcollect32}{Any noncharacter + type that has an element size of \CONST{32} bits. + \CorCpp{} structures are NOT allowed.} + +\apireturnvalues{ + None. +} + +\apinotes{ + All \openshmem team collective routines use symmetric data structures associated + with the team to synchronize and share data. By default, new teams that result from + split operations will have these structures. + + Multiple calls to the same collective routine for the same team by different threads + must avoid any simultaneous updates to these structures. In general, this will mean + that threads will need to serialize access to teams. +} + +\begin{apiexamples} + +\end{apiexamples} + +\end{apidefinition} diff --git a/main_spec.tex b/main_spec.tex index 01d710835..9d27b6b58 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -259,6 +259,9 @@ \subsubsection{\textbf{SHMEM\_BROADCAST}}\label{subsec:shmem_broadcast} \subsubsection{\textbf{SHMEM\_COLLECT, SHMEM\_FCOLLECT}}\label{subsec:shmem_collect} \input{content/shmem_collect.tex} +\subsubsection{\textbf{SHMEM\_TEAM\_COLLECT, SHMEM\_TEAM\_FCOLLECT}}\label{subsec:shmem_team_collect} +\input{content/shmem_team_collect.tex} + \subsubsection{\textbf{SHMEM\_REDUCTIONS}}\label{subsec:shmem_reductions} \input{content/shmem_reductions.tex} From 765225089643d0174559b87c77a2398148c2ec14 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 27 Jun 2018 11:35:38 -0500 Subject: [PATCH 017/319] Fix formatting of terms and remove duplicate definition --- content/library_handles.tex | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/content/library_handles.tex b/content/library_handles.tex index 50e28b59f..0f080cad7 100644 --- a/content/library_handles.tex +++ b/content/library_handles.tex @@ -15,25 +15,17 @@ %% \LibHandleDecl{SHMEM\_TEAM\_WORLD} & Handle of type \CTYPE{shmem\_team\_t} that corresponds to the -default team of all pes in the OpenSHMEM program. All point-to-point +default team of all \acp{PE} in the \openshmem program. All point-to-point communication operations and synchronizations that do not specify a team are performed on the default team. See Section~\ref{subsec:team} for more detail about its use. \tabularnewline \hline %% \LibHandleDecl{SHMEM\_TEAM\_NODE} & -Handle of type \CTYPE{shmem\_team\_t} that corresponds a team of pes +Handle of type \CTYPE{shmem\_team\_t} that corresponds a team of \acp{PE} which share node level resources, such as shared memory, network -interfaces, etc. When this handle is used by some pe, it will refer -to the node level team containing that pe. -See Section~\ref{subsec:team} for more detail about its use. -\tabularnewline \hline -%% -\LibHandleDecl{SHMEM\_TEAM\_WORLD} & -Handle of type \CTYPE{shmem\_team\_t} that corresponds to the -default team of all pes in the OpenSHMEM program. All point-to-point -communication operations and synchronizations that do not specify a team -are performed on the default team. +interfaces, etc. When this handle is used by some \ac{PE}, it will refer +to the node level team containing that \ac{PE}. See Section~\ref{subsec:team} for more detail about its use. \tabularnewline \hline %% From d14f99555b441eb4a2f21802fccc84f62a688e91 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 27 Jun 2018 11:43:30 -0500 Subject: [PATCH 018/319] Add green highlighting to new constants and handles --- content/library_constants.tex | 4 ++++ content/library_handles.tex | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/content/library_constants.tex b/content/library_constants.tex index 26c99494b..f5fd29cba 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -39,13 +39,17 @@ See Section~\ref{subsec:thread_support} for more detail about its use. \tabularnewline \hline %% +\color{Green} \LibConstDecl{SHMEM\_TEAM\_NOCOLLECTIVE} & +\color{Green} The team creation option which specifies that the new team will not be initialized with support for team collective operations. See Section~\ref{subsec:team} for more detail about its use. \tabularnewline \hline %% +\color{Green} \LibConstDecl{SHMEM\_TEAM\_NULL} & +\color{Green} Predefined constant that can be compared against handles of type \CTYPE{shmem\_team\_t} to determine if they refer to a valid team. See Section~\ref{subsec:team} for more detail about its use. diff --git a/content/library_handles.tex b/content/library_handles.tex index 0f080cad7..ca7b8e606 100644 --- a/content/library_handles.tex +++ b/content/library_handles.tex @@ -13,7 +13,9 @@ \tabularnewline \hline \endhead %% +\color{Green} \LibHandleDecl{SHMEM\_TEAM\_WORLD} & +\color{Green} Handle of type \CTYPE{shmem\_team\_t} that corresponds to the default team of all \acp{PE} in the \openshmem program. All point-to-point communication operations and synchronizations that do not specify a team @@ -21,7 +23,9 @@ See Section~\ref{subsec:team} for more detail about its use. \tabularnewline \hline %% +\color{Green} \LibHandleDecl{SHMEM\_TEAM\_NODE} & +\color{Green} Handle of type \CTYPE{shmem\_team\_t} that corresponds a team of \acp{PE} which share node level resources, such as shared memory, network interfaces, etc. When this handle is used by some \ac{PE}, it will refer From abad3b280a85c50adeca6015f50657f0d788574c Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 27 Jun 2018 12:02:00 -0500 Subject: [PATCH 019/319] Changed CONST to LibConstRef or LibHandleRef as appropriate --- content/shmem_team_destroy.tex | 6 +++--- content/shmem_team_my_pe.tex | 10 +++++----- content/shmem_team_n_pes.tex | 10 +++++----- content/shmem_team_split_3d.tex | 4 ++-- content/shmem_team_split_strided.tex | 4 ++-- content/shmem_team_translate.tex | 14 +++++++------- 6 files changed, 24 insertions(+), 24 deletions(-) diff --git a/content/shmem_team_destroy.tex b/content/shmem_team_destroy.tex index 4f2b49dbd..8c27c4dc6 100644 --- a/content/shmem_team_destroy.tex +++ b/content/shmem_team_destroy.tex @@ -17,11 +17,11 @@ collective call, in which every member of the team being destroyed needs to participate. This will free all internal memory structures associated with the team and invalidate the team handle. Upon return, the team -handle is set to \CONST{SHMEM\_TEAM\_NULL}, after which it can no longer be +handle is set to \LibConstRef{SHMEM\_TEAM\_NULL}, after which it can no longer be used for team API calls. -It is considered erroneous to free \CONST{SHMEM\_TEAM\_WORLD} or -\CONST{SHMEM\_TEAM\_NODE}. Error checking will be done to ensure a valid +It is considered erroneous to free \LibHandleRef{SHMEM\_TEAM\_WORLD} or +\LibHandleRef{SHMEM\_TEAM\_NODE}. Error checking will be done to ensure a valid team handle is provided. All errors are considered fatal, and will result in the job aborting with an informative error message. } diff --git a/content/shmem_team_my_pe.tex b/content/shmem_team_my_pe.tex index 6265166be..a231277c6 100644 --- a/content/shmem_team_my_pe.tex +++ b/content/shmem_team_my_pe.tex @@ -16,7 +16,7 @@ The \FUNC{shmem\_team\_my\_pe} function returns the number of calling \ac{PE} within the provided team. The number will be a value between 0 and N-1, for a team of size N. Each member of the team has a unique number. -For the team \CONST{SHMEM\_TEAM\_WORLD}, this will return the same value +For the team \LibHandleRef{SHMEM\_TEAM\_WORLD}, this will return the same value as \FUNC{shmem\_my\_pe}. Error checking will be done to ensure a valid team handle is provided. @@ -31,10 +31,10 @@ \apinotes{ By default, \openshmem creates two predefined teams that will be available for use once the routine \FUNC{shmem\_init} has been called. These teams can be -referenced in the application by the handles \CONST{SHMEM\_TEAM\_WORLD} and -\CONST{SHMEM\_TEAM\_NODE}. Every PE process is a member of the \CONST{SHMEM\_TEAM\_WORLD} -team, and its number in \CONST{SHMEM\_TEAM\_WORLD} corresponds to the value of its -global \ac{PE} number. The \CONST{SHMEM\_TEAM\_NODE} team contains only the set of \acp{PE} +referenced in the application by the handles \LibHandleRef{SHMEM\_TEAM\_WORLD} and +\LibHandleRef{SHMEM\_TEAM\_NODE}. Every PE process is a member of the \LibHandleRef{SHMEM\_TEAM\_WORLD} +team, and its number in \LibHandleRef{SHMEM\_TEAM\_WORLD} corresponds to the value of its +global \ac{PE} number. The \LibHandleRef{SHMEM\_TEAM\_NODE} team contains only the set of \acp{PE} that reside on the same node as the current \ac{PE}. } diff --git a/content/shmem_team_n_pes.tex b/content/shmem_team_n_pes.tex index 791958d83..992730f24 100644 --- a/content/shmem_team_n_pes.tex +++ b/content/shmem_team_n_pes.tex @@ -15,7 +15,7 @@ \apidescription{ The \FUNC{shmem\_team\_n\_pes} function returns the number of \acp{PE} in the team. This will always be a value between 1 and N, where N is the total number of -\acp{PE} accessible to the \openshmem program. For the team \CONST{SHMEM\_TEAM\_WORLD}, +\acp{PE} accessible to the \openshmem program. For the team \LibHandleRef{SHMEM\_TEAM\_WORLD}, this will return the same value as \FUNC{shmem\_n\_pes}. Every team must have a least one member. All \acp{PE} in the team @@ -33,10 +33,10 @@ \apinotes{ By default, \openshmem creates two predefined teams that will be available for use once the routine \FUNC{shmem\_init} has been called. These teams can be -referenced in the application by the constants \CONST{SHMEM\_TEAM\_WORLD} and -\CONST{SHMEM\_TEAM\_NODE}. Every \ac{PE}process is a member of the \CONST{SHMEM\_TEAM\_WORLD} -team, and its number in \CONST{SHMEM\_TEAM\_WORLD} corresponds to the value of its -global \ac{PE} number. The \CONST{SHMEM\_TEAM\_NODE} team contains only the set of \acp{PE} +referenced in the application by the constants \LibHandleRef{SHMEM\_TEAM\_WORLD} and +\LibHandleRef{SHMEM\_TEAM\_NODE}. Every \ac{PE}process is a member of the \LibHandleRef{SHMEM\_TEAM\_WORLD} +team, and its number in \LibHandleRef{SHMEM\_TEAM\_WORLD} corresponds to the value of its +global \ac{PE} number. The \LibHandleRef{SHMEM\_TEAM\_NODE} team contains only the set of \acp{PE} that reside on the same node as the current PE. } diff --git a/content/shmem_team_split_3d.tex b/content/shmem_team_split_3d.tex index fac270017..be68e8dd2 100644 --- a/content/shmem_team_split_3d.tex +++ b/content/shmem_team_split_3d.tex @@ -51,7 +51,7 @@ A 2D Cartesian space may be defined by passing \CONST{0} for the zrange parameter. In this case, the zaxis\_team argument may be passed a NULL pointer. If it is not passed as a NULL pointer, the zaxis\_team will be returned -as \CONST{SHMEM\_TEAM\_NULL}. +as \LibConstRef{SHMEM\_TEAM\_NULL}. Each subset contains all \acp{PE} that are in the same dimension, along the X-axis, Y-axis and Z-axis. Within each subset, @@ -77,7 +77,7 @@ team functions and enable performance optimizations. When using a given team, the application must comply with the requirements of all options set on that team; otherwise, the behavior is undefined. -No options are enabled on \CONST{SHMEM\_TEAM\_WORLD} or \CONST{SHMEM\_TEAM\_NODE}. +No options are enabled on \LibHandleRef{SHMEM\_TEAM\_WORLD} or \LibHandleRef{SHMEM\_TEAM\_NODE}. \apitablerow{\LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE}}{ The new team will not be created with the necessary support diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 45ca5ccac..ec9d3ed98 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -50,7 +50,7 @@ This function must be called by all processes contained in the \ac{PE} triplet specification. It may be called by additional \acp{PE} not included in the triplet specification, but for those processes a new\_team value of -\CONST{SHMEM\_TEAM\_NULL} is returned. All calling processes must provide the +\LibConstRef{SHMEM\_TEAM\_NULL} is returned. All calling processes must provide the same values for the \ac{PE} triplet. This function will return a new\_team containing the \ac{PE} subset specified by the triplet, and ordered by the existing global \ac{PE} number. None of the parameters need to reside in @@ -67,7 +67,7 @@ team functions and enable performance optimizations. When using a given team, the application must comply with the requirements of all options set on that team; otherwise, the behavior is undefined. -No options are enabled on \CONST{SHMEM\_TEAM\_WORLD} or \CONST{SHMEM\_TEAM\_NODE}. +No options are enabled on \LibHandleRef{SHMEM\_TEAM\_WORLD} or \LibHandleRef{SHMEM\_TEAM\_NODE}. \apitablerow{\LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE}}{ The new team will not be created with the necessary support diff --git a/content/shmem_team_translate.tex b/content/shmem_team_translate.tex index 7fff9009d..8719256bf 100644 --- a/content/shmem_team_translate.tex +++ b/content/shmem_team_translate.tex @@ -22,11 +22,11 @@ \ac{PE}'s number in dest\_team. If src\_pe is not a member of dest\_team, a value of -1 is returned. -If \CONST{SHMEM\_TEAM\_WORLD} is provided as the dest\_team parameter, this function +If \LibHandleRef{SHMEM\_TEAM\_WORLD} is provided as the dest\_team parameter, this function acts as a global \ac{PE} number translator and will return the corresponding -\CONST{SHMEM\_TEAM\_WORLD} number. This may be useful when performing point-to- +\LibHandleRef{SHMEM\_TEAM\_WORLD} number. This may be useful when performing point-to- point operations between \acp{PE} in a subset, as point-to-point operations -that do not take a context argument require the global \CONST{SHMEM\_TEAM\_WORLD} +that do not take a context argument require the global \LibHandleRef{SHMEM\_TEAM\_WORLD} \ac{PE} number. Error checking will be done to ensure valid team handles are provided. @@ -41,10 +41,10 @@ \apinotes{ By default, \openshmem creates two predefined teams that will be available for use once the routine \FUNC{shmem\_init} has been called. These teams can be -referenced in the application by the constants \CONST{SHMEM\_TEAM\_WORLD} and -\CONST{SHMEM\_TEAM\_NODE}. Every \ac{PE}process is a member of the \CONST{SHMEM\_TEAM\_WORLD} -team, and its number in \CONST{SHMEM\_TEAM\_WORLD} corresponds to the value of its -global \ac{PE} number. The \CONST{SHMEM\_TEAM\_NODE} team contains only the set of \acp{PE} +referenced in the application by the constants \LibHandleRef{SHMEM\_TEAM\_WORLD} and +\LibHandleRef{SHMEM\_TEAM\_NODE}. Every \ac{PE}process is a member of the \LibHandleRef{SHMEM\_TEAM\_WORLD} +team, and its number in \LibHandleRef{SHMEM\_TEAM\_WORLD} corresponds to the value of its +global \ac{PE} number. The \LibHandleRef{SHMEM\_TEAM\_NODE} team contains only the set of \acp{PE} that reside on the same node as the current PE. } From d95a0a844384c85b4291f5f03922ff2d36aad75b Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 27 Jun 2018 14:29:43 -0400 Subject: [PATCH 020/319] Add shmem_ctx_{get,set}_team and some intro text --- content/shmem_ctx_get_team.tex | 45 ++++++++++++++++++++++++++++++++++ content/shmem_ctx_set_team.tex | 45 ++++++++++++++++++++++++++++++++++ main_spec.tex | 24 ++++++++++++++++++ utils/defs.tex | 14 +++++++---- 4 files changed, 123 insertions(+), 5 deletions(-) create mode 100644 content/shmem_ctx_get_team.tex create mode 100644 content/shmem_ctx_set_team.tex diff --git a/content/shmem_ctx_get_team.tex b/content/shmem_ctx_get_team.tex new file mode 100644 index 000000000..2ba10a0c6 --- /dev/null +++ b/content/shmem_ctx_get_team.tex @@ -0,0 +1,45 @@ +\apisummary{ + Retrieve the team associated with the communication context. +} + +\begin{apidefinition} + + \begin{Csynopsis} +int @\FuncDecl{shmem\_ctx\_get\_team}@(shmem_ctx_t ctx, shmem_team_t *team); + \end{Csynopsis} + + \begin{apiarguments} + + \apiargument{IN}{ctx}{ + A handle to a communication context. + } + + \apiargument{OUT}{team}{ + A pointer to a handle to the associated \ac{PE} team. + } + + \end{apiarguments} + + \apidescription{ + The \FUNC{shmem\_ctx\_get\_team} routine returns a handle to the \ac{PE} + team associated with the specified communication context \VAR{ctx}. + The team handle is returned through the pointer argument \VAR{team}. + + If \VAR{ctx} is the default context, the returned team is guaranteed + to be \CONST{SHMEM\_TEAM\_WORLD}. + + If \VAR{ctx} is an invalid context, the argument \VAR{team} is not + modified and a value of \CONST{-1} is returned. + + If \VAR{team} is a null pointer, a value of \CONST{-1} is returned. + } + + \apireturnvalues{ + Zero on success; otherwise, \CONST{-1}. + } + + \apinotes{ + None. + } + +\end{apidefinition} diff --git a/content/shmem_ctx_set_team.tex b/content/shmem_ctx_set_team.tex new file mode 100644 index 000000000..54cdd7124 --- /dev/null +++ b/content/shmem_ctx_set_team.tex @@ -0,0 +1,45 @@ +\apisummary{ + Update the team associated with the communication context. +} + +\begin{apidefinition} + + \begin{Csynopsis} +int @\FuncDecl{shmem\_ctx\_set\_team}@(shmem_ctx_t ctx, shmem_team_t team); + \end{Csynopsis} + + \begin{apiarguments} + + \apiargument{IN}{ctx}{ + A handle to a communication context. + } + + \apiargument{IN}{team}{ + A handle to the specified \ac{PE} team. + } + + \end{apiarguments} + + \apidescription{ + The \FUNC{shmem\_ctx\_set\_team} routine associates the \ac{PE} team + identified by the handle \VAR{team} with the communication context + specified by the handle \VAR{ctx}. + All subsequent \openshmem operations performed on the specified context + will operate with respect to the updated \ac{PE} team. + + If \VAR{ctx} is a handle to the default context or + \VAR{team} is equal to the constant \CONST{SHMEM\_TEAM\_NULL}, then + the specified context is not updated and a value of \CONST{-1} is returned. + + If \VAR{ctx} is an invalid context, a value of \CONST{-1} is returned. + } + + \apireturnvalues{ + Zero on success; otherwise, \CONST{-1}. + } + + \apinotes{ + None. + } + +\end{apidefinition} diff --git a/main_spec.tex b/main_spec.tex index 01d710835..0d2e58e6d 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -135,6 +135,18 @@ \subsection{Communication Management Routines} default context. The default context can be used explicitly through the \LibHandleRef{SHMEM\_CTX\_DEFAULT} handle. +\newtext{ +Every communication context has an associated \ac{PE} team. +This \ac{PE} team specifies the set of \acp{PE} over which \ac{PE}-specific +routines that operate on a communication context, explicitly or implicitly, +(e.g., \ac{RMA} and \ac{AMO} routines) may be performed. +The default context has a fixed association with the world team, +\LibHandleRef{SHMEM\_TEAM\_WORLD}. +Communication contexts created by \FUNC{shmem\_ctx\_create} have an initial +association with the world team, which may be updated by the +\FUNC{shmem\_ctx\_set\_team} routine. +} + \subsubsection{\textbf{SHMEM\_CTX\_CREATE}} \label{subsec:shmem_ctx_create} \input{content/shmem_ctx_create.tex} @@ -143,6 +155,18 @@ \subsubsection{\textbf{SHMEM\_CTX\_DESTROY}} \label{subsec:shmem_ctx_destroy} \input{content/shmem_ctx_destroy.tex} +\newtext{ +\subsubsection{\textbf{SHMEM\_CTX\_SET\_TEAM}} +\label{subsec:shmem_ctx_set_team} +\input{content/shmem_ctx_set_team.tex} +} + +\newtext{ +\subsubsection{\textbf{SHMEM\_CTX\_GET\_TEAM}} +\label{subsec:shmem_ctx_get_team} +\input{content/shmem_ctx_get_team.tex} +} + \subsection{Remote Memory Access Routines}\label{sec:rma} \input{content/rma_intro.tex} diff --git a/utils/defs.tex b/utils/defs.tex index dbfdffdcf..86da9273b 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -408,14 +408,16 @@ \textbf{C11:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t, + shmem_team_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisCol} { \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, + shmem_team_t}, aboveskip=0pt, belowskip=0pt}}{} @@ -424,7 +426,8 @@ \textbf{C/C++:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, + shmem_team_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisST} @@ -433,7 +436,8 @@ \color{red} {\lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, + shmem_team_t}, aboveskip=0pt, belowskip=0pt}}}{} \lstnewenvironment{Fsynopsis} @@ -516,7 +520,7 @@ ##1 \lstinputlisting[language={C}, tabsize=2, basicstyle=\ttfamily\footnotesize, - morekeywords={size_t, ptrdiff_t, shmem_ctx_t}]{##2} + morekeywords={size_t, ptrdiff_t, shmem_ctx_t, shmem_team_t}]{##2} ##3 } \newcommand{\apifexample}[3]{ ##1 From 0bf8ba483614e0e7105806a51cba02076364b9d9 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 27 Jun 2018 14:33:40 -0500 Subject: [PATCH 021/319] Fixed formatting and softened error condition on team destroy --- content/shmem_team_destroy.tex | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/content/shmem_team_destroy.tex b/content/shmem_team_destroy.tex index 8c27c4dc6..c8dfd758b 100644 --- a/content/shmem_team_destroy.tex +++ b/content/shmem_team_destroy.tex @@ -9,11 +9,11 @@ \end{Csynopsis} \begin{apiarguments} -\apiargument{IN}{team}{A valid SHMEM team handle.} +\apiargument{IN}{team}{A valid \openshmem team handle.} \end{apiarguments} \apidescription{ -The shmem\_team\_destroy function destroys an existing team. This is a +The \FUNC{shmem\_team\_destroy} function destroys an existing team. This is a collective call, in which every member of the team being destroyed needs to participate. This will free all internal memory structures associated with the team and invalidate the team handle. Upon return, the team @@ -22,16 +22,16 @@ It is considered erroneous to free \LibHandleRef{SHMEM\_TEAM\_WORLD} or \LibHandleRef{SHMEM\_TEAM\_NODE}. Error checking will be done to ensure a valid -team handle is provided. All errors are considered fatal, and will -result in the job aborting with an informative error message. +team handle is provided. Errors will result in a return value less than \CONST{0}. } \apireturnvalues{ -None. +On success, the function will return 0. Otherwise a value less than +\CONST{0} will be returned. } \apinotes{ -Note that SHMEM team handles have local semantics only. That is, team +Note that \openshmem team handles have local semantics only. That is, team handles should not be stored in shared variables and used across other processes. Doing so will result in unpredictable behavior. } From ffd549c27f53c04c6900fac6f53b355d87d479a1 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 27 Jun 2018 14:34:36 -0500 Subject: [PATCH 022/319] Fixed formatting and softened error condition on team_my_pe --- content/shmem_team_my_pe.tex | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/content/shmem_team_my_pe.tex b/content/shmem_team_my_pe.tex index a231277c6..81c4a7ad9 100644 --- a/content/shmem_team_my_pe.tex +++ b/content/shmem_team_my_pe.tex @@ -1,5 +1,5 @@ \apisummary{ - Returns the number of calling \ac{PE} within the provided team. + Returns the number of the calling \ac{PE} within the provided team. } \begin{apidefinition} @@ -9,7 +9,7 @@ \end{Csynopsis} \begin{apiarguments} -\apiargument{IN}{team}{A valid SHMEM team handle.} +\apiargument{IN}{team}{A valid \openshmem team handle.} \end{apiarguments} \apidescription{ @@ -20,22 +20,22 @@ as \FUNC{shmem\_my\_pe}. Error checking will be done to ensure a valid team handle is provided. -All errors are considered fatal, and will result in the job aborting -with an informative error message. +Errors will result in a return value less than \CONST{0}. } \apireturnvalues{ -The number of the calling \ac{PE} within the provided team. +The number of the calling \ac{PE} within the provided team, or a value less than +\CONST{0} if the team handle is invalid. } \apinotes{ By default, \openshmem creates two predefined teams that will be available for use once the routine \FUNC{shmem\_init} has been called. These teams can be referenced in the application by the handles \LibHandleRef{SHMEM\_TEAM\_WORLD} and -\LibHandleRef{SHMEM\_TEAM\_NODE}. Every PE process is a member of the \LibHandleRef{SHMEM\_TEAM\_WORLD} +\LibHandleRef{SHMEM\_TEAM\_NODE}. Every \ac{PE} is a member of the \LibHandleRef{SHMEM\_TEAM\_WORLD} team, and its number in \LibHandleRef{SHMEM\_TEAM\_WORLD} corresponds to the value of its -global \ac{PE} number. The \LibHandleRef{SHMEM\_TEAM\_NODE} team contains only the set of \acp{PE} -that reside on the same node as the current \ac{PE}. +global \ac{PE} number. The \LibHandleRef{SHMEM\_TEAM\_NODE} team contains the set of only those +\acp{PE} that reside on the same node as the current \ac{PE}. } \end{apidefinition} From 88449a1d2317db3328892e2362cfd030306e5870 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 27 Jun 2018 14:37:34 -0500 Subject: [PATCH 023/319] Fix formatting on team_split_3d --- content/shmem_team_split_3d.tex | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/content/shmem_team_split_3d.tex b/content/shmem_team_split_3d.tex index be68e8dd2..bc7523878 100644 --- a/content/shmem_team_split_3d.tex +++ b/content/shmem_team_split_3d.tex @@ -13,9 +13,9 @@ \end{Csynopsis} \begin{apiarguments} -\apiargument{IN}{parent\_team}{A valid SHMEM team. The predefined teams -SHMEM\_TEAM\_WORLD or SHMEM\_TEAM\_NODE may be used, or any team created by the -user.} +\apiargument{IN}{parent\_team}{A valid \openshmem team. The predefined teams +\LibHandleRef{SHMEM\_TEAM\_WORLD} or \LibHandleRef{SHMEM\_TEAM\_NODE} may be +used, or any team created by the user.} \apiargument{IN}{options}{The set of options requested for the new teams. Multiple options may be requested by combining them with a bitwise OR operation; @@ -43,14 +43,14 @@ \end{apiarguments} \apidescription{ -The shmem\_team\_split\_3d routine is a collective routine. It creates up to three new teams +The \FUNC{shmem\_team\_split\_3d} routine is a collective routine. It creates up to three new teams by splitting an existing parent team into up to three subsets based on a -1D, 2D or 3D Cartesian space defined by the triplet (xrange, yrange, and zrange) -describing the size of the Cartesian space in X, Y, and Z dimensions. +1D, 2D or 3D Cartesian space defined by the triplet (\VAR{xrange}, \VAR{yrange}, +and \VAR{zrange}) describing the size of the Cartesian space in X, Y, and Z dimensions. -A 2D Cartesian space may be defined by passing \CONST{0} for the zrange -parameter. In this case, the zaxis\_team argument may be passed a NULL -pointer. If it is not passed as a NULL pointer, the zaxis\_team will be returned +A 2D Cartesian space may be defined by passing \CONST{0} for the \VAR{zrange} +parameter. In this case, the zaxis team argument may be passed a null +pointer. If it is not passed as a null pointer, the zaxis team will be returned as \LibConstRef{SHMEM\_TEAM\_NULL}. Each subset contains all \acp{PE} that are in the same @@ -64,12 +64,12 @@ equal to the number of discrete (x,y,z) points in the Cartesian space. None of the parameters need to reside in symmetric memory. -In the event that the Cartesian space contains less points than exist \acp{PE} -in the parent\_team, the first N \acp{PE} in the parent team will be used +In the event that the Cartesian space contains fewer points than exist \acp{PE} +in the parent team, the first N \acp{PE} in the parent team will be used to form the new teams, where N is equal to the number of points in the Cartesian space. -Error checking will be done to ensure a valid team handle is provided. +Error checking will be done to ensure a valid parent team handle is provided. All errors are considered fatal and will result in the job aborting with an informative error message. From 62bd69341c5c57f7877874a3dce34147e7888bc3 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 27 Jun 2018 14:38:11 -0500 Subject: [PATCH 024/319] Fix formatting and soften error condition on shmem_team_translate --- content/shmem_team_translate.tex | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/content/shmem_team_translate.tex b/content/shmem_team_translate.tex index 8719256bf..d45d5111e 100644 --- a/content/shmem_team_translate.tex +++ b/content/shmem_team_translate.tex @@ -18,11 +18,11 @@ \apidescription{ The \FUNC{shmem\_team\_translate\_pe} function will translate a given \ac{PE} number to the corresponding \ac{PE} number in another team. -Specifically, given the src\_pe in src\_team, this function returns that -\ac{PE}'s number in dest\_team. If src\_pe is not a member of -dest\_team, a value of -1 is returned. +Specifically, given the \VAR{src\_pe} in \VAR{src\_team}, this function returns that +\ac{PE}'s number in \VAR{dest\_team}. If \VAR{src\_pe} is not a member of both the +\VAR{src\_team} and \VAR{dest\_team}, a value less than \CONST{0} is returned. -If \LibHandleRef{SHMEM\_TEAM\_WORLD} is provided as the dest\_team parameter, this function +If \LibHandleRef{SHMEM\_TEAM\_WORLD} is provided as the \VAR{dest\_team} parameter, this function acts as a global \ac{PE} number translator and will return the corresponding \LibHandleRef{SHMEM\_TEAM\_WORLD} number. This may be useful when performing point-to- point operations between \acp{PE} in a subset, as point-to-point operations @@ -30,22 +30,22 @@ \ac{PE} number. Error checking will be done to ensure valid team handles are provided. -All team handle errors are considered fatal and will result in the job -aborting with an informative error message. +Errors will result in a return value less than \CONST{0}. } \apireturnvalues{ -The specified \ac{PE}'s number in the dest\_team. +The specified \ac{PE}'s number in the \VAR{dest\_team}, or a value less than \CONST{0} if any +team handle arguments are invalid or the \VAR{src\_pe} is not in both the source and destination teams. } \apinotes{ By default, \openshmem creates two predefined teams that will be available for use once the routine \FUNC{shmem\_init} has been called. These teams can be referenced in the application by the constants \LibHandleRef{SHMEM\_TEAM\_WORLD} and -\LibHandleRef{SHMEM\_TEAM\_NODE}. Every \ac{PE}process is a member of the \LibHandleRef{SHMEM\_TEAM\_WORLD} +\LibHandleRef{SHMEM\_TEAM\_NODE}. Every \ac{PE} is a member of the \LibHandleRef{SHMEM\_TEAM\_WORLD} team, and its number in \LibHandleRef{SHMEM\_TEAM\_WORLD} corresponds to the value of its -global \ac{PE} number. The \LibHandleRef{SHMEM\_TEAM\_NODE} team contains only the set of \acp{PE} -that reside on the same node as the current PE. +global \ac{PE} number. The \LibHandleRef{SHMEM\_TEAM\_NODE} team contains the set of only those +\acp{PE} that reside on the same node as the current \ac{PE}. } \end{apidefinition} From cb738bef73a6252239532da8496ce4ff7639f7ed Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 27 Jun 2018 14:39:04 -0500 Subject: [PATCH 025/319] Fix formatting on team_split_strided --- content/shmem_team_split_strided.tex | 39 ++++++++++++++-------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index ec9d3ed98..b8914a61f 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -1,7 +1,7 @@ \apisummary{ -Create a new SHMEM team from a subset of the existing parent team \acp{PE}, +Create a new \openshmem team from a subset of the existing parent team \acp{PE}, where the subset is defined by the -\ac{PE} triplet (PE\_start, PE\_stride, and PE\_size) supplied to the function.} +\ac{PE} triplet (\VAR{PE\_start}, \VAR{PE\_stride}, and \VAR{PE\_size}) supplied to the function.} \begin{apidefinition} @@ -12,55 +12,54 @@ \begin{apiarguments} \apiargument{IN}{parent\_team}{A valid SHMEM team. The predefined teams -SHMEM\_TEAM\_WORLD or SHMEM\_TEAM\_NODE may be used, or any team created by the user.} +\LibHandleRef{SHMEM\_TEAM\_WORLD} or \libHandleRef{SHMEM\_TEAM\_NODE} may +be used, or any team created by the user.} -\apiargument{IN}{options}{The set of options requested for the new\_team. +\apiargument{IN}{options}{The set of options requested for the new team. Multiple options may be requested by combining them with a bitwise OR operation; otherwise, \CONST{0} can be given if no options are requested.} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the subset of \acp{PE} from -the parent\_team that will form the new\_team} +the parent team that will form the new team} \apiargument{IN}{PE\_stride}{The stride between team \ac{PE} -numbers in the parent\_team that comprise the subset of \acp{PE} that will form +numbers in the parent team that comprise the subset of \acp{PE} that will form the new team.} -\apiargument{IN}{PE\_size}{The number of \acp{PE} from the parent\_team in the subset -of \acp{PE} that will form the new\_team.} +\apiargument{IN}{PE\_size}{The number of \acp{PE} from the parent team in the subset +of \acp{PE} that will form the new team.} -\apiargument{OUT}{new\_team}{A new SHMEM team handle, representing a \ac{PE} -subset of all the \acp{PE} in the parent\_team that is created from +\apiargument{OUT}{new\_team}{A new \openshmem team handle, representing a \ac{PE} +subset of all the \acp{PE} in the parent team that is created from the \ac{PE} triplet provided.} \end{apiarguments} \apidescription{ The \FUNC{shmem\_team\_split\_strided} function is a collective routine. -It creates a new SHMEM team from a subset of the existing parent team \acp{PE}, -where the subset is defined by the \ac{PE} triplet (PE\_start, PE\_stride, and PE\_size) -supplied to the function. +It creates a new \openshmem team from a subset of the existing parent team, +where the subset is defined by the \ac{PE} triplet (\VAR{PE\_start}, +\VAR{PE\_stride}, and \VAR{PE\_size}) supplied to the function. It is important to note the use of the less restrictive -PE\_stride argument instead of logPE\_stride. This method of +\VAR{PE\_stride} argument instead of \VAR{logPE\_stride}. This method of creating a team with an arbitrary set of \acp{PE} is inherently restricted by its parameters, but allows for many additional use-cases over using a -logPE\_stride parameter, and may provide an easier transition for +\VAR{logPE\_stride} parameter, and may provide an easier transition for existing \openshmem programs to create and use \openshmem teams. This function must be called by all processes contained in the \ac{PE} triplet specification. It may be called by additional \acp{PE} not included in the -triplet specification, but for those processes a new\_team value of +triplet specification, but for those processes a \VAR{new\_team} value of \LibConstRef{SHMEM\_TEAM\_NULL} is returned. All calling processes must provide the -same values for the \ac{PE} triplet. This function will return a new\_team +same values for the \ac{PE} triplet. This function will return a \VAR{new\_team} containing the \ac{PE} subset specified by the triplet, and ordered by the existing global \ac{PE} number. None of the parameters need to reside in symmetric memory. Error checking will be done to ensure a valid \ac{PE} triplet is provided, and also to determine whether a valid team handle is provided for the -parent\_team. - -All errors are considered fatal and will result in the job aborting with +parent team. All errors are considered fatal and will result in the job aborting with an informative error message. The following options can be supplied during team split to restrict From 04eea2c8cfdf7a079db0ad8a467dcc2a3d3595c2 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 27 Jun 2018 14:39:34 -0500 Subject: [PATCH 026/319] Fix formatting and soften error condition on team_n_pes --- content/shmem_team_n_pes.tex | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/content/shmem_team_n_pes.tex b/content/shmem_team_n_pes.tex index 992730f24..6695f09ce 100644 --- a/content/shmem_team_n_pes.tex +++ b/content/shmem_team_n_pes.tex @@ -9,35 +9,36 @@ \end{Csynopsis} \begin{apiarguments} -\apiargument{IN}{team}{A valid SHMEM team handle.} +\apiargument{IN}{team}{A valid \openshmem team handle.} \end{apiarguments} \apidescription{ The \FUNC{shmem\_team\_n\_pes} function returns the number of \acp{PE} in the team. This will always be a value between 1 and N, where N is the total number of -\acp{PE} accessible to the \openshmem program. For the team \LibHandleRef{SHMEM\_TEAM\_WORLD}, -this will return the same value as \FUNC{shmem\_n\_pes}. +\acp{PE} accessible to the \openshmem program. For the team +\LibHandleRef{SHMEM\_TEAM\_WORLD}, this will return the same value as +\FUNC{shmem\_n\_pes}. Every team must have a least one member. All \acp{PE} in the team will get back the same value for the team size. Error checking will be done to ensure a valid team handle is provided. -All errors are considered fatal and will result in the job aborting -with an informative error message. +Errors will result in a return value less than \CONST{0}. } \apireturnvalues{ -Total number of \acp{PE} in the provided team. +Total number of \acp{PE} in the provided team, or a value less than +\CONST{0} if the team handle is invalid. } \apinotes{ By default, \openshmem creates two predefined teams that will be available for use once the routine \FUNC{shmem\_init} has been called. These teams can be referenced in the application by the constants \LibHandleRef{SHMEM\_TEAM\_WORLD} and -\LibHandleRef{SHMEM\_TEAM\_NODE}. Every \ac{PE}process is a member of the \LibHandleRef{SHMEM\_TEAM\_WORLD} +\LibHandleRef{SHMEM\_TEAM\_NODE}. Every \ac{PE} is a member of the \LibHandleRef{SHMEM\_TEAM\_WORLD} team, and its number in \LibHandleRef{SHMEM\_TEAM\_WORLD} corresponds to the value of its -global \ac{PE} number. The \LibHandleRef{SHMEM\_TEAM\_NODE} team contains only the set of \acp{PE} -that reside on the same node as the current PE. +global \ac{PE} number. The \LibHandleRef{SHMEM\_TEAM\_NODE} team contains the set of only those +\acp{PE} that reside on the same node as the current \ac{PE}. } \end{apidefinition} From 231592b07f5355b280505d32561a6dc39c86d422 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 27 Jun 2018 15:36:24 -0500 Subject: [PATCH 027/319] Fix formatting and soften error condition for team_get_options --- content/shmem_team_get_options.tex | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/content/shmem_team_get_options.tex b/content/shmem_team_get_options.tex index b878da372..3e497b295 100644 --- a/content/shmem_team_get_options.tex +++ b/content/shmem_team_get_options.tex @@ -1,5 +1,5 @@ \apisummary{ - shmem\_team\_get\_options returns the options flags describing a given team + Return the options flags describing the options applied to a given team } \begin{apidefinition} @@ -9,31 +9,34 @@ \end{Csynopsis} \begin{apiarguments} -\apiargument{IN}{team}{A valid SHMEM team handle.} +\apiargument{IN}{team}{A valid \openshmem team handle.} \end{apiarguments} \apidescription{ -\FUNC{shmem\_team\_get\_options} returns a long unsigned value containing +\FUNC{shmem\_team\_get\_options} returns a \CTYPE{long} value containing all of the options which describe the given team. Options are requested when new teams are created in the various \FUNC{shmem\_team\_split\_*} functions. -Whichever of the requested options are applied to the team by the library +All of the requested options applied to the team by the library implementation will be returned by \FUNC{shmem\_team\_get\_options}. -All processes in the team will get back the same value for the team options. +A library implementation will not apply any non-default options to a team, +other than those requested during team split functions. + +All \acp{PE} in the team will get back the same value for the team options. Error checking will be done to ensure a valid team handle is provided. -All errors are considered fatal and will result in the job aborting -with an informative error message. +Errors will result in a return value less than \CONST{0}. } \apireturnvalues{ The set of options applied to the given team. Multiple options are combined with a bitwise OR and can be extracted with a bitwise AND. A return value of -\CONST{0} implies that the team uses all default options. +\CONST{0} implies that the team uses only default options. A return value less than +\CONST{0} implies that the team handle is invalid. } \apinotes{ -A use case for this function is to determine if a given team will +A use case for this function is to determine whether a given team can support collective operations by testing for the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} option. When teams are created without support for collectives, they may still use point to point operations to communicate and synchronize. So programmers may wish From 1e4e36c031cf060f395c26a6cde277fbf6b423e8 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 27 Jun 2018 16:50:11 -0400 Subject: [PATCH 028/319] Minor LaTeX updates for feedback block (based on deprecation cmds) --- utils/defs.tex | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/utils/defs.tex b/utils/defs.tex index dbfdffdcf..e355a1c72 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -360,7 +360,6 @@ {\strikeline\mbox{} \DeprecationStart \stretchline\mbox{}}} \newcommand{\EndDeprecateBlock}{% \mbox{}\stretchline\mbox{} \DeprecationEnd \strikeline} - \newenvironment{DeprecateBlock}{% \par \StartDeprecateBlock \par}{\par \EndDeprecateBlock \par} @@ -370,16 +369,25 @@ \strikeline\mbox{} \DeprecationEnd \strikeline} \newenvironment{DeprecateInline}{\StartInlineDeprecate}{\EndInlineDeprecate} +\newcommand{\deprecationstart}{% + \color{red} \strikeline\mbox{} deprecation start \stretchline \mbox{}} +\newcommand{\deprecationend}{% + \mbox{}\stretchline\mbox{} \color{red} deprecation end \strikeline} +\newenvironment{deprecate}{\deprecationstart \\}{\\ \deprecationend} + % -% Library API description template commands +% Design feedback request helpers % -\newcommand{\deprecationstart}{\color{red} \raisebox{.5ex}{\rule{1em}{.4pt}} - deprecation start \xrfill[.5ex]{.4pt}[red] \mbox{}} -\newcommand{\deprecationend}{\mbox{}\xrfill[.5ex]{.4pt}[red]\mbox{} \color{red} - deprecation end \raisebox{.5ex}{\rule{1em}{.4pt}}} +\newcommand{\feedbackstart}{\color{RoyalBlue} \strikeline[RoyalBlue] + design feedback requested \stretchline[RoyalBlue] \mbox{}} +\newcommand{\feedbackend}{\mbox{}\stretchline[RoyalBlue]\mbox{}} -\newenvironment{deprecate}{\deprecationstart \\}{\\ \deprecationend} +\newenvironment{FeedbackRequest}{\feedbackstart \\}{\\ \feedbackend} + +% +% Library API description template commands +% \newcommand{\apisummary}[1]{ #1 From 687620e04d0116394ccfadd99a623faeaab3f06f Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 27 Jun 2018 15:51:56 -0500 Subject: [PATCH 029/319] Fix formatting and fix erroneous error condition on team_broadcast --- content/shmem_team_broadcast.tex | 36 +++++++++++++++++--------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/content/shmem_team_broadcast.tex b/content/shmem_team_broadcast.tex index aeca874ea..364de4198 100644 --- a/content/shmem_team_broadcast.tex +++ b/content/shmem_team_broadcast.tex @@ -12,17 +12,19 @@ \begin{apiarguments} -\apiargument{IN}{team}{A valid SHMEM team handle to a team which has been created with support for collective operations.} -\apiargument{OUT}{dest}{A symmetric data object.} +\apiargument{IN}{team}{A valid \openshmem team handle to a team which has been created without + disabling support for collective operations.} +\apiargument{OUT}{dest}{A symmetric data object. See the table below in this description + for allowable types} \apiargument{IN}{source}{A symmetric data object that can be of any data type that is permissible for the \dest{} argument.} -\apiargument{IN}{nelems}{The number of elements in \source. For +\apiargument{IN}{nelems}{The number of elements in \source{}. For \FUNC{shmem\_team\_broadcast32}, this is the number of - 32-bit halfwords. nelems must be of type \VAR{size\_t} in \Cstd.} + 32-bit halfwords. nelems must be of type \CTYPE{size\_t} in \Cstd.} \apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to the team, from which the data is copied. Must be greater than or equal to - 0 and less than the result of calling \FUNC{shmem\_team\_n\_pes(team)}. - \VAR{PE\_root} must be of type integer.} + 0 and less than N-1, where N is the result of calling \FUNC{shmem\_team\_n\_pes(team)}. + \VAR{PE\_root} must be of type \CTYPE{int}.} \end{apiarguments} @@ -32,10 +34,6 @@ and store the values at \dest{} on the other \acp{PE} that are members of the team. The data is not copied to the \dest{} area on the root \ac{PE}. - As with all \openshmem team collective routines, each of these routines assumes that - only \acp{PE} in the given team call the routine. If a \ac{PE} not in the - team calls an \openshmem team collective routine, the behavior is undefined. - If the team has been created with the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} option, it will not have the required support structures to complete this routine. If such a team is passed to this or any other team collective routine, the behavior @@ -44,7 +42,7 @@ As with all \openshmem routines where the operation occurs for a given team - either when the team is an argument to the routine, or when the team is an attribute of the context argument to a routine - the \ac{PE} numbers are relative to the team, - and must be in the range of 0 to the result of \FUNC{shmem\_team\_n\_pes(team)}. + and must be in the range of 0 to N-1, where N is the result of \FUNC{shmem\_team\_n\_pes(team)}. The values of the argument \VAR{PE\_root} must be the same value on all \acp{PE} in the team. The same \dest{} and \source{} data objects must be passed by all \acp{PE} @@ -57,6 +55,10 @@ the \dest{} data object is updated. \item The \source{} data object may be safely reused. \end{itemize} + + Error checking will be done to ensure a valid team handle is provided. + All errors are considered fatal and will result in the job aborting with + an informative error message. } \apidesctable{ @@ -64,12 +66,12 @@ constraints, which are as follows: }{Routine}{Data type of \VAR{dest} and \VAR{source}} -\apitablerow{shmem\_broadcast64}{Any noncharacter - type that has an element size of \CONST{64} bits. No - \CorCpp{} structures are allowed.} -\apitablerow{shmem\_broadcast32}{Any noncharacter - type that has an element size of \CONST{32} bits. No - \CorCpp{} structures are allowed.} +\apitablerow{shmem\_team\_broadcast64}{Any noncharacter + type that has an element size of \CONST{64} bits. + \CorCpp{} structures are NOT allowed.} +\apitablerow{shmem\_team\_broadcast32}{Any noncharacter + type that has an element size of \CONST{32} bits. + \CorCpp{} structures are NOT allowed.} \apireturnvalues{ None. From 8f9720c78c38d216269c94d4df21fc656bb035c0 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 27 Jun 2018 16:14:05 -0500 Subject: [PATCH 030/319] Added feedback request blocks for error return values --- content/shmem_team_destroy.tex | 2 ++ content/shmem_team_my_pe.tex | 2 ++ content/shmem_team_n_pes.tex | 2 ++ content/shmem_team_split_3d.tex | 3 +++ content/shmem_team_split_strided.tex | 8 ++++++-- content/shmem_team_translate.tex | 2 ++ 6 files changed, 17 insertions(+), 2 deletions(-) diff --git a/content/shmem_team_destroy.tex b/content/shmem_team_destroy.tex index c8dfd758b..b63314af8 100644 --- a/content/shmem_team_destroy.tex +++ b/content/shmem_team_destroy.tex @@ -25,10 +25,12 @@ team handle is provided. Errors will result in a return value less than \CONST{0}. } +\begin{FeedbackRequest} \apireturnvalues{ On success, the function will return 0. Otherwise a value less than \CONST{0} will be returned. } +\end{FeedbackRequest} \apinotes{ Note that \openshmem team handles have local semantics only. That is, team diff --git a/content/shmem_team_my_pe.tex b/content/shmem_team_my_pe.tex index 81c4a7ad9..9b79355ba 100644 --- a/content/shmem_team_my_pe.tex +++ b/content/shmem_team_my_pe.tex @@ -23,10 +23,12 @@ Errors will result in a return value less than \CONST{0}. } +\begin{FeedbackRequest} \apireturnvalues{ The number of the calling \ac{PE} within the provided team, or a value less than \CONST{0} if the team handle is invalid. } +\end{FeedbackRequest} \apinotes{ By default, \openshmem creates two predefined teams that will be available diff --git a/content/shmem_team_n_pes.tex b/content/shmem_team_n_pes.tex index 6695f09ce..1b1bcf2a2 100644 --- a/content/shmem_team_n_pes.tex +++ b/content/shmem_team_n_pes.tex @@ -26,10 +26,12 @@ Errors will result in a return value less than \CONST{0}. } +\begin{FeedbackRequest} \apireturnvalues{ Total number of \acp{PE} in the provided team, or a value less than \CONST{0} if the team handle is invalid. } +\end{FeedbackRequest} \apinotes{ By default, \openshmem creates two predefined teams that will be available diff --git a/content/shmem_team_split_3d.tex b/content/shmem_team_split_3d.tex index bc7523878..0fd86b29d 100644 --- a/content/shmem_team_split_3d.tex +++ b/content/shmem_team_split_3d.tex @@ -70,8 +70,11 @@ Cartesian space. Error checking will be done to ensure a valid parent team handle is provided. + +\begin{FeedbackRequest} All errors are considered fatal and will result in the job aborting with an informative error message. +\end{FeedbackRequest} The following options can be supplied during team split to restrict team functions and enable performance optimizations. When using a given diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index b8914a61f..c785e8156 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -12,7 +12,7 @@ \begin{apiarguments} \apiargument{IN}{parent\_team}{A valid SHMEM team. The predefined teams -\LibHandleRef{SHMEM\_TEAM\_WORLD} or \libHandleRef{SHMEM\_TEAM\_NODE} may +\LibHandleRef{SHMEM\_TEAM\_WORLD} or \LibHandleRef{SHMEM\_TEAM\_NODE} may be used, or any team created by the user.} \apiargument{IN}{options}{The set of options requested for the new team. @@ -59,8 +59,12 @@ Error checking will be done to ensure a valid \ac{PE} triplet is provided, and also to determine whether a valid team handle is provided for the -parent team. All errors are considered fatal and will result in the job aborting with +parent team. + +\begin{FeedbackRequest} +All errors are considered fatal and will result in the job aborting with an informative error message. +\end{FeedbackRequest} The following options can be supplied during team split to restrict team functions and enable performance optimizations. When using a given diff --git a/content/shmem_team_translate.tex b/content/shmem_team_translate.tex index d45d5111e..1e022040f 100644 --- a/content/shmem_team_translate.tex +++ b/content/shmem_team_translate.tex @@ -33,10 +33,12 @@ Errors will result in a return value less than \CONST{0}. } +\begin{FeedbackRequest} \apireturnvalues{ The specified \ac{PE}'s number in the \VAR{dest\_team}, or a value less than \CONST{0} if any team handle arguments are invalid or the \VAR{src\_pe} is not in both the source and destination teams. } +\end{FeedbackRequest} \apinotes{ By default, \openshmem creates two predefined teams that will be available From 237e712915b0bb4b0aef264d77a7b5919390a95a Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 27 Jun 2018 16:20:26 -0500 Subject: [PATCH 031/319] Add request for feedback on option setting requirements --- content/shmem_team_get_options.tex | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/content/shmem_team_get_options.tex b/content/shmem_team_get_options.tex index 3e497b295..2aac48f2f 100644 --- a/content/shmem_team_get_options.tex +++ b/content/shmem_team_get_options.tex @@ -22,6 +22,15 @@ A library implementation will not apply any non-default options to a team, other than those requested during team split functions. +\begin{FeedbackRequest} +A library implementation must apply all requested options to a team, even in +the event that the library does not make optimizations based on these options. +For example, suppose library implementation must always create teams with the same +overhead, no matter if the program disables collective support during team creation. +The library must still enable the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} option +when it is requested, so that the \openshmem program will be portable across implementations. +\end{FeedbackRequest} + All \acp{PE} in the team will get back the same value for the team options. Error checking will be done to ensure a valid team handle is provided. From ec84225673331a1469852405ec6d17ffdb3799c9 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Thu, 28 Jun 2018 09:50:56 -0400 Subject: [PATCH 032/319] Clarify team-context association --- content/shmem_ctx_create.tex | 9 +++++++++ content/shmem_ctx_destroy.tex | 5 +++++ content/shmem_ctx_get_team.tex | 7 +++++++ content/shmem_ctx_set_team.tex | 7 +++++++ main_spec.tex | 10 +++++++--- 5 files changed, 35 insertions(+), 3 deletions(-) diff --git a/content/shmem_ctx_create.tex b/content/shmem_ctx_create.tex index 7211b4547..c83667888 100644 --- a/content/shmem_ctx_create.tex +++ b/content/shmem_ctx_create.tex @@ -25,6 +25,15 @@ in a correct state. The creation call can be reattempted with different options or after additional resources become available. + \newtext{ + A newly created communication context has an initial association with the + default team. + All \openshmem routines that operate on this context will do so with + respect to the associated \ac{PE} team. + That is, all point-to-point routines operating on this context will use + team-relative \ac{PE} numbering. + } + By default, contexts are {\em shareable} and, when it is allowed by the threading model provided by the \openshmem library, they can be used concurrently by multiple threads within the PE where they were created. diff --git a/content/shmem_ctx_destroy.tex b/content/shmem_ctx_destroy.tex index 287c5bcdd..3a176712c 100644 --- a/content/shmem_ctx_destroy.tex +++ b/content/shmem_ctx_destroy.tex @@ -18,6 +18,11 @@ the context is not used after it has been destroyed, for example when the destroyed context is used by multiple threads. This function performs an implicit quiet operation on the given context before it is freed. + + \newtext{ + When a context is destroyed, the team associated with this context + is not affected. + } } \apireturnvalues{ diff --git a/content/shmem_ctx_get_team.tex b/content/shmem_ctx_get_team.tex index 2ba10a0c6..0a8456d6d 100644 --- a/content/shmem_ctx_get_team.tex +++ b/content/shmem_ctx_get_team.tex @@ -36,6 +36,13 @@ \apireturnvalues{ Zero on success; otherwise, \CONST{-1}. + + \begin{FeedbackRequest} + Should this routine return nonzero, -1, or negative values + (e.g., to allow for implementation-defined error codes) on error? + Will slowing down the critical path of this routine by adding + input checking adversely affect its use? + \end{FeedbackRequest} } \apinotes{ diff --git a/content/shmem_ctx_set_team.tex b/content/shmem_ctx_set_team.tex index 54cdd7124..c73804221 100644 --- a/content/shmem_ctx_set_team.tex +++ b/content/shmem_ctx_set_team.tex @@ -36,6 +36,13 @@ \apireturnvalues{ Zero on success; otherwise, \CONST{-1}. + + \begin{FeedbackRequest} + Should this routine return nonzero, -1, or negative values + (e.g., to allow for implementation-defined error codes) on error? + Will slowing down the critical path of this routine by adding + input checking adversely affect its use? + \end{FeedbackRequest} } \apinotes{ diff --git a/main_spec.tex b/main_spec.tex index 0d2e58e6d..53c916a16 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -140,10 +140,14 @@ \subsection{Communication Management Routines} This \ac{PE} team specifies the set of \acp{PE} over which \ac{PE}-specific routines that operate on a communication context, explicitly or implicitly, (e.g., \ac{RMA} and \ac{AMO} routines) may be performed. -The default context has a fixed association with the world team, -\LibHandleRef{SHMEM\_TEAM\_WORLD}. +All \openshmem routines that operate on this context will do so with respect +to the team-relative \ac{PE} numbering of the associated \ac{PE} team. +} + +\newtext{ +The default context has a fixed association with the default team. Communication contexts created by \FUNC{shmem\_ctx\_create} have an initial -association with the world team, which may be updated by the +association with the default team, which may be updated by the \FUNC{shmem\_ctx\_set\_team} routine. } From ebf0ccbccfdfc7c6f839142599945bb4d34eaeb3 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Thu, 28 Jun 2018 10:07:29 -0400 Subject: [PATCH 033/319] Initial draft of changes for barrier/sync on contexts/teams --- content/shmem_barrier.tex | 65 +++++++++++++++++++++++++++++++++------ content/shmem_sync.tex | 42 ++++++++++++++++++++++--- 2 files changed, 93 insertions(+), 14 deletions(-) diff --git a/content/shmem_barrier.tex b/content/shmem_barrier.tex index c5f7adea5..9c0dd8100 100644 --- a/content/shmem_barrier.tex +++ b/content/shmem_barrier.tex @@ -1,13 +1,27 @@ \apisummary{ Performs all operations described in the \FUNC{shmem\_barrier\_all} interface - but with respect to a subset of \acp{PE} defined by the active set. + but with respect to \newtext{a specified \ac{PE} team or} + a subset of \acp{PE} defined by \oldtext{the} \newtext{an} active set \newtext{triple}. } \begin{apidefinition} +{\color{ForestGreen} +\begin{C11synopsis} +void @\FuncDecl{shmem\_barrier}@(shmem_ctx_t ctx); +\end{C11synopsis} +} + +% These separate synopses should be merged when the color highlighting removed, +% preferably with shmem_ctx_barrier listed first. \begin{Csynopsis} void @\FuncDecl{shmem\_barrier}@(int PE_start, int logPE_stride, int PE_size, long *pSync); \end{Csynopsis} +{\color{ForestGreen} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_ctx\_barrier}@(shmem_ctx_t ctx); +\end{CsynopsisCol} +} \begin{Fsynopsis} INTEGER PE_start, logPE_stride, PE_size @@ -17,6 +31,11 @@ \begin{apiarguments} +\newtext{ +\apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on the + default context.} +} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}. \VAR{PE\_start} must be of type integer. When using \Fortran, it must be a default integer value.} @@ -37,22 +56,50 @@ \end{apiarguments} \apidescription{ - \FUNC{shmem\_barrier} is a collective synchronization routine over an - active set. Control returns from \FUNC{shmem\_barrier} after all \acp{PE} in - the active set (specified by \VAR{PE\_start}, \VAR{logPE\_stride}, and - \VAR{PE\_size}) have called \FUNC{shmem\_barrier}. - + \FUNC{shmem\_barrier} is a collective synchronization routine over + \newtext {the \ac{PE} team associated with a communication context or} an active set. + Control returns from \FUNC{shmem\_barrier} after all \acp{PE} in + the \newtext{specified \ac{PE} team or} active set \oldtext{(specified by \VAR{PE\_start}, \VAR{logPE\_stride}, and + \VAR{PE\_size})} have called \FUNC{shmem\_barrier}. + \newtext{ + An active set is specified by the triple of values: \VAR{PE\_start}, + \VAR{logPE\_stride}, and \VAR{PE\_size}. + } + + {\color{ForestGreen} + The \FUNC{shmem\_barrier} and \FUNC{shmem\_ctx\_barrier} routines that + accept a context handle \VAR{ctx} have the effect of a call to + \FUNC{shmem\_ctx\_quiet} on the specified context, followed by a call to + \FUNC{shmem\_team\_sync} on the team associated with the specified context. + + The \FUNC{shmem\_barrier} routine that accepts an active set triple + has the effect of a call to \FUNC{shmem\_quiet}, which implicitly + operates on the default context, followed by a call to \FUNC{shmem\_sync} + with the same active set and \VAR{pSync} arguments. + } + As with all \openshmem collective routines, each of these routines assumes that only \acp{PE} in the active set call the routine. If a \ac{PE} not in the active set calls an \openshmem collective routine, the behavior is undefined. + + \begin{FeedbackRequest} + Do we need to say something like the above for team-based collectives? + We already say that teams are not portable across PEs, so only the PE + that created the team could invoke it in a collective without undefined + behavior. + + Should we have a harder partition between the description text between + the team- and active set-based API (since the active set and pSync + requirements don't affect the team call)? + \end{FeedbackRequest} The values of arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be the same value on all \acp{PE} in the active set. The same work array must be passed in \VAR{pSync} to all \acp{PE} in the active set. - \FUNC{shmem\_barrier} ensures that all previously issued stores and remote - memory updates, including \acp{AMO} and \ac{RMA} operations, done by any of the - \acp{PE} in the active set on the default context are complete before returning. + \newtext{The} \FUNC{shmem\_barrier} \newtext{routines} ensure\oldtext{s} that all previously issued stores and remote + memory updates, including \acp{AMO} and \ac{RMA} operations, \newtext{issued} \oldtext{done} by any of the + \acp{PE} in the \newtext{\ac{PE} team or} active set on the default context are complete before returning. The same \VAR{pSync} array may be reused on consecutive calls to \FUNC{shmem\_barrier} if the same active set is used. diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 2c5707929..93ab30e0e 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -5,12 +5,28 @@ \begin{apidefinition} +{\color{ForestGreen} +\begin{C11synopsis} +void @\FuncDecl{shmem\_sync}@(shmem_team_t team); +\end{C11synopsis} +} + +% These separate synopses should be merged when the color highlighting removed, +% preferably with shmem_ctx_barrier listed first. \begin{Csynopsis} void @\FuncDecl{shmem\_sync}@(int PE_start, int logPE_stride, int PE_size, long *pSync); \end{Csynopsis} +{\color{ForestGreen} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_team\_sync}@(shmem_team_t team); +\end{CsynopsisCol} +} \begin{apiarguments} +\newtext{ +\apiargument{IN}{team}{The \ac{PE} team over which to perform the operation.} +} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}. \VAR{PE\_start} must be of type integer.} \apiargument{IN}{logPE\_stride}{The log (base 2) of the stride between @@ -26,20 +42,36 @@ \end{apiarguments} \apidescription{ - \FUNC{shmem\_sync} is a collective synchronization routine over an - active set. Control returns from \FUNC{shmem\_sync} after all \acp{PE} in - the active set (specified by \VAR{PE\_start}, \VAR{logPE\_stride}, and - \VAR{PE\_size}) have called \FUNC{shmem\_sync}. + \FUNC{shmem\_sync} is a collective synchronization routine over + \newtext {a \ac{PE} team or} an active set. + Control returns from \FUNC{shmem\_sync} after all \acp{PE} in + the \newtext{specified \ac{PE} team or} active set \oldtext{(specified by \VAR{PE\_start}, \VAR{logPE\_stride}, and + \VAR{PE\_size})} have called \FUNC{shmem\_sync}. + \newtext{ + An active set is specified by the triple of values: \VAR{PE\_start}, + \VAR{logPE\_stride}, and \VAR{PE\_size}. + } As with all \openshmem collective routines, each of these routines assumes that only \acp{PE} in the active set call the routine. If a \ac{PE} not in the active set calls an \openshmem collective routine, the behavior is undefined. + \begin{FeedbackRequest} + Do we need to say something like the above for team-based collectives? + We already say that teams are not portable across PEs, so only the PE + that created the team could invoke it in a collective without undefined + behavior. + + Should we have a harder partition between the description text between + the team- and active set-based API (since the active set and pSync + requirements don't affect the team call)? + \end{FeedbackRequest} + The values of arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be equal on all \acp{PE} in the active set. The same work array must be passed in \VAR{pSync} to all \acp{PE} in the active set. - In contrast with the \FUNC{shmem\_barrier} routine, \FUNC{shmem\_sync} only + In contrast with the \FUNC{shmem\_barrier} routine\newtext{s}, \FUNC{shmem\_sync} only ensures completion and visibility of previously issued memory stores and does not ensure completion of remote memory updates issued via \openshmem routines. From 6c06c5aee96564fd1a30d850b43b464ee6032df7 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Thu, 28 Jun 2018 10:37:21 -0500 Subject: [PATCH 034/319] Remove erroneous text about team handle in team destroy --- content/shmem_team_destroy.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/shmem_team_destroy.tex b/content/shmem_team_destroy.tex index b63314af8..8613142e1 100644 --- a/content/shmem_team_destroy.tex +++ b/content/shmem_team_destroy.tex @@ -17,8 +17,7 @@ collective call, in which every member of the team being destroyed needs to participate. This will free all internal memory structures associated with the team and invalidate the team handle. Upon return, the team -handle is set to \LibConstRef{SHMEM\_TEAM\_NULL}, after which it can no longer be -used for team API calls. +handle can no longer be used for team API calls. It is considered erroneous to free \LibHandleRef{SHMEM\_TEAM\_WORLD} or \LibHandleRef{SHMEM\_TEAM\_NODE}. Error checking will be done to ensure a valid @@ -35,7 +34,8 @@ \apinotes{ Note that \openshmem team handles have local semantics only. That is, team handles should not be stored in shared variables and used across other -processes. Doing so will result in unpredictable behavior. +processes. If a team handle or its value is used by any \ac{PE} other than +that which created it, the behavior is undefined. } \end{apidefinition} From b5321528f667bddb152c71794ed317805bffea46 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 3 Jul 2018 12:15:05 -0500 Subject: [PATCH 035/319] Move and rename teams section and add intro text --- main_spec.tex | 107 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 77 insertions(+), 30 deletions(-) diff --git a/main_spec.tex b/main_spec.tex index 29b67bf6a..28c5b7397 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -116,6 +116,83 @@ \subsubsection{\textbf{SHPDEALLC}}\label{subsec:shpdeallc} \input{content/shpdeallc.tex} + + +\color{Green} +\subsection{Teams Management Routines}\label{subsec:team} + +The \acp{PE} in an \openshmem program can communicate either using +point-to-point routines that specify the \ac{PE} number of the target +\ac{PE} or using collective routines which operate over some predefined +set of \acp{PE}. Teams in \openshmem allow programs to group subsets +of \acp{PE} for collective communications and provide a contiguous renumbering +of the \acp{PE} within that subset that can be used in point-to-point communication. + +An \openshmem team is a set of \acp{PE} defined by calling a specific team +split routine with a parent team argument and other arguments to further +specify how the parent team is to be split into one more new teams. +A team created by a team split routine can be used as the parent team +for a subsequent call to a team split routine. A team persists and can +be used for multiple collective routine calls until it is destroyed by +\FUNC{shmem\_team\_destroy}. + +Every team must have a least one member. Any attempt to create a team over an empty set of \acp{PE} will result in no new team being created. + +A "team handle" is an opaque object with type \CTYPE{shmem\_team\_t} that is used +to reference a defined team. Team handles are created by one of the team split +routines and destroyed by the team destroy routine. Team handles have local +semantics only. That is, team handles should not be stored in shared variables +and used across other \acp{PE}. Doing so will result in undefined behavior. + +By default, \openshmem creates predefined teams that will be available +for use once the routine \FUNC{shmem\_init} has been called. See section +\ref{subsec:library_handles} for a description of all predefined team handles +provided by \openshmem. Predefined \CTYPE{shmem\_team\_t} handles can be used as +the parent team when creating new \openshmem teams. + +Every \ac{PE} is a member of the \LibHandleRef{SHMEM\_TEAM\_WORLD} team, +and its number in \LibHandleRef{SHMEM\_TEAM\_WORLD} corresponds to the +value of its global \ac{PE} number as returned by \FUNC{shmem\_my\_pe}. + +A special team handle value, \LibConstRef{SHMEM\_TEAM\_NULL}, is used to +indicate that a returned team handle is not valid. This value can be tested +against to check for successful split operations and can be assigned to user +declared team handles as a sentinel value. + +Teams may be created with options that change team behavior and may allow for +more optimized performance. These options are described in section +\ref{subsec:library_constants} and in the various descriptions of the +team split routines below. In particular, teams may be created with an option +to disable support for collective communications, which allows implementations +to reduce team creation overheads for those teams. In that case, the team is +just a local renumbering of some set of \acp{PE} that can be used for +point to point communications or as parent teams in subsequent split operations. + +\subsubsection{\textbf{SHMEM\_TEAM\_MY\_PE}}\label{subsec:shmem_team_my_pe} +\input{content/shmem_team_my_pe.tex} + +\subsubsection{\textbf{SHMEM\_TEAM\_N\_PES}}\label{subsec:shmem_team_n_pes} +\input{content/shmem_team_n_pes.tex} + +\subsubsection{\textbf{SHMEM\_TEAM\_GET\_OPTIONS}}\label{subsec:shmem_team_get_options} +\input{content/shmem_team_get_options.tex} + +\subsubsection{\textbf{SHMEM\_TEAM\_TRANSLATE}}\label{subsec:shmem_team_translate} +\input{content/shmem_team_translate.tex} + +\subsubsection{\textbf{SHMEM\_TEAM\_SPLIT\_STRIDED}}\label{subsec:shmem_team_split_strided} +\input{content/shmem_team_split_strided.tex} + +\subsubsection{\textbf{SHMEM\_TEAM\_SPLIT\_3D}}\label{subsec:shmem_team_split_3d} +\input{content/shmem_team_split_3d.tex} + +\subsubsection{\textbf{SHMEM\_TEAM\_DESTROY}}\label{subsec:shmem_team_destroy} +\input{content/shmem_team_destroy.tex} +\color{Black} + + + + \subsection{Communication Management Routines} \label{sec:ctx} All \openshmem RMA, AMO, and memory ordering routines are @@ -299,36 +376,6 @@ \subsubsection{\textbf{SHMEM\_ALLTOALLS}}\label{subsec:shmem_alltoalls} - -\color{Green} -\subsection{Teams or PE Subsets}\label{subsec:team} - -\subsubsection{\textbf{SHMEM\_TEAM\_MY\_PE}}\label{subsec:shmem_team_my_pe} -\input{content/shmem_team_my_pe.tex} - -\subsubsection{\textbf{SHMEM\_TEAM\_N\_PES}}\label{subsec:shmem_team_n_pes} -\input{content/shmem_team_n_pes.tex} - -\subsubsection{\textbf{SHMEM\_TEAM\_GET\_OPTIONS}}\label{subsec:shmem_team_get_options} -\input{content/shmem_team_get_options.tex} - -\subsubsection{\textbf{SHMEM\_TEAM\_TRANSLATE}}\label{subsec:shmem_team_translate} -\input{content/shmem_team_translate.tex} - -\subsubsection{\textbf{SHMEM\_TEAM\_SPLIT\_STRIDED}}\label{subsec:shmem_team_split_strided} -\input{content/shmem_team_split_strided.tex} - -\subsubsection{\textbf{SHMEM\_TEAM\_SPLIT\_3D}}\label{subsec:shmem_team_split_3d} -\input{content/shmem_team_split_3d.tex} - -\subsubsection{\textbf{SHMEM\_TEAM\_DESTROY}}\label{subsec:shmem_team_destroy} -\input{content/shmem_team_destroy.tex} -\color{Black} - - - - - \subsection{Point-To-Point Synchronization Routines}\label{subsec:p2p_intro} \input{content/p2p_sync_intro.tex} From ba49598f88180c2e412d1603d33deb55f51511b5 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 3 Jul 2018 12:16:06 -0500 Subject: [PATCH 036/319] Update existing teams section to remove duplicate text --- content/shmem_team_destroy.tex | 7 ++----- content/shmem_team_my_pe.tex | 8 +------- content/shmem_team_n_pes.tex | 11 ++--------- content/shmem_team_split_3d.tex | 4 +--- content/shmem_team_split_strided.tex | 4 +--- content/shmem_team_translate.tex | 8 +------- 6 files changed, 8 insertions(+), 34 deletions(-) diff --git a/content/shmem_team_destroy.tex b/content/shmem_team_destroy.tex index 8613142e1..841344590 100644 --- a/content/shmem_team_destroy.tex +++ b/content/shmem_team_destroy.tex @@ -20,7 +20,7 @@ handle can no longer be used for team API calls. It is considered erroneous to free \LibHandleRef{SHMEM\_TEAM\_WORLD} or -\LibHandleRef{SHMEM\_TEAM\_NODE}. Error checking will be done to ensure a valid +any other predefined team. Error checking will be done to ensure a valid team handle is provided. Errors will result in a return value less than \CONST{0}. } @@ -32,10 +32,7 @@ \end{FeedbackRequest} \apinotes{ -Note that \openshmem team handles have local semantics only. That is, team -handles should not be stored in shared variables and used across other -processes. If a team handle or its value is used by any \ac{PE} other than -that which created it, the behavior is undefined. +None. } \end{apidefinition} diff --git a/content/shmem_team_my_pe.tex b/content/shmem_team_my_pe.tex index 9b79355ba..1ffa4d1cc 100644 --- a/content/shmem_team_my_pe.tex +++ b/content/shmem_team_my_pe.tex @@ -31,13 +31,7 @@ \end{FeedbackRequest} \apinotes{ -By default, \openshmem creates two predefined teams that will be available -for use once the routine \FUNC{shmem\_init} has been called. These teams can be -referenced in the application by the handles \LibHandleRef{SHMEM\_TEAM\_WORLD} and -\LibHandleRef{SHMEM\_TEAM\_NODE}. Every \ac{PE} is a member of the \LibHandleRef{SHMEM\_TEAM\_WORLD} -team, and its number in \LibHandleRef{SHMEM\_TEAM\_WORLD} corresponds to the value of its -global \ac{PE} number. The \LibHandleRef{SHMEM\_TEAM\_NODE} team contains the set of only those -\acp{PE} that reside on the same node as the current \ac{PE}. +None. } \end{apidefinition} diff --git a/content/shmem_team_n_pes.tex b/content/shmem_team_n_pes.tex index 1b1bcf2a2..6b30a8a0c 100644 --- a/content/shmem_team_n_pes.tex +++ b/content/shmem_team_n_pes.tex @@ -19,8 +19,7 @@ \LibHandleRef{SHMEM\_TEAM\_WORLD}, this will return the same value as \FUNC{shmem\_n\_pes}. -Every team must have a least one member. All \acp{PE} in the team -will get back the same value for the team size. +All \acp{PE} in the team will get back the same value for the team size. Error checking will be done to ensure a valid team handle is provided. Errors will result in a return value less than \CONST{0}. @@ -34,13 +33,7 @@ \end{FeedbackRequest} \apinotes{ -By default, \openshmem creates two predefined teams that will be available -for use once the routine \FUNC{shmem\_init} has been called. These teams can be -referenced in the application by the constants \LibHandleRef{SHMEM\_TEAM\_WORLD} and -\LibHandleRef{SHMEM\_TEAM\_NODE}. Every \ac{PE} is a member of the \LibHandleRef{SHMEM\_TEAM\_WORLD} -team, and its number in \LibHandleRef{SHMEM\_TEAM\_WORLD} corresponds to the value of its -global \ac{PE} number. The \LibHandleRef{SHMEM\_TEAM\_NODE} team contains the set of only those -\acp{PE} that reside on the same node as the current \ac{PE}. +None. } \end{apidefinition} diff --git a/content/shmem_team_split_3d.tex b/content/shmem_team_split_3d.tex index 0fd86b29d..59551e6c7 100644 --- a/content/shmem_team_split_3d.tex +++ b/content/shmem_team_split_3d.tex @@ -96,9 +96,7 @@ } \apinotes{ -Note that SHMEM team handles have local semantics only. That is, team -handles should not be stored in shared variables and used across other -processes. Doing so will result in unpredictable behavior. +See the description of team handles and predefined teams at the top of section \ref{subsec:team} for more information about semantics and usage. } \begin{apiexamples} diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index c785e8156..1286555c4 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -86,9 +86,7 @@ } \apinotes{ -Note that SHMEM team handles have local semantics only. That is, team -handles should not be stored in shared variables and used across other -processes. Doing so will result in unpredictable behavior. +See the description of team handles and predefined teams at the top of section \ref{subsec:team} for more information about semantics and usage. } \begin{apiexamples} diff --git a/content/shmem_team_translate.tex b/content/shmem_team_translate.tex index 1e022040f..d4e5f2105 100644 --- a/content/shmem_team_translate.tex +++ b/content/shmem_team_translate.tex @@ -41,13 +41,7 @@ \end{FeedbackRequest} \apinotes{ -By default, \openshmem creates two predefined teams that will be available -for use once the routine \FUNC{shmem\_init} has been called. These teams can be -referenced in the application by the constants \LibHandleRef{SHMEM\_TEAM\_WORLD} and -\LibHandleRef{SHMEM\_TEAM\_NODE}. Every \ac{PE} is a member of the \LibHandleRef{SHMEM\_TEAM\_WORLD} -team, and its number in \LibHandleRef{SHMEM\_TEAM\_WORLD} corresponds to the value of its -global \ac{PE} number. The \LibHandleRef{SHMEM\_TEAM\_NODE} team contains the set of only those -\acp{PE} that reside on the same node as the current \ac{PE}. +None. } \end{apidefinition} From c0f95e70d7ad8985f87a81090ebff120e5e9d0a9 Mon Sep 17 00:00:00 2001 From: Anshuman Goswami Date: Mon, 9 Jul 2018 00:34:41 -0700 Subject: [PATCH 037/319] minor updates based on shmem_team_broadcast --- content/shmem_team_collect.tex | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/content/shmem_team_collect.tex b/content/shmem_team_collect.tex index 7cf03cf20..407d19c2d 100644 --- a/content/shmem_team_collect.tex +++ b/content/shmem_team_collect.tex @@ -1,5 +1,5 @@ \apisummary{ - Concatenates blocks of data from multiple \acp{PE} int a team to an array in every + Concatenates blocks of data from multiple \acp{PE} in a team to an array in every \ac{PE} in the team. } @@ -8,6 +8,8 @@ \begin{Csynopsis} void @\FuncDecl{shmem\_team\_collect32}@(shmem_team_t team, void *dest, const void *source, size_t nelems); void @\FuncDecl{shmem\_team\_collect64}@(shmem_team_t team, void *dest, const void *source, size_t nelems); +void @\FuncDecl{shmem\_team\_fcollect32}@(shmem_team_t team, void *dest, const void *source, size_t nelems); +void @\FuncDecl{shmem\_team\_fcollect64}@(shmem_team_t team, void *dest, const void *source, size_t nelems); \end{Csynopsis} \begin{apiarguments} @@ -29,12 +31,12 @@ over an existing team. These routines concatenate \VAR{nelems} \CONST{64}-bit or \CONST{32}-bit data items from the \source{} array into the \dest{} array, over all \acp{PE} in the specified \VAR{team} in processor number order. - The resultant \dest{} array contains the contribution from \ac{PE} with \VAR{team} number 0 - first, then the contribution from \ac{PE} with \VAR{team} number 1, and so on. + The resultant \dest{} array contains the contribution from the first \ac{PE} in the \VAR{team} + , then the contribution from the second \ac{PE} in the \VAR{team}, and so on. The collected result is written to the \dest{} array for all \acp{PE} in the team. The \FUNC{fcollect} routines require that \VAR{nelems} be the same value in all - participating \acp{PE}, while the \FUNC{collect} routines allow \VAR{nelems} to + \acp{PE} in the team, while the \FUNC{collect} routines allow \VAR{nelems} to vary from \ac{PE} to \ac{PE}. As with all \openshmem team collective routines, each of these routines assumes that @@ -46,11 +48,6 @@ such a team is passed to this or any other team collective routine, the behavior is undefined. - As with all \openshmem routines where the operation occurs for a given team - - either when the team is an argument to the routine, or when the team is an attribute - of the context argument to a routine - the \ac{PE} numbers are relative to the team, - and must be in the range of 0 to N-1, where N is the result of \FUNC{shmem\_team\_n\_pes(team)}. - The same \dest{} and \source{} data objects must be passed by all \acp{PE} in the team. From 41df6e6f69ca23a9232e6e599195ffd9eb3a695b Mon Sep 17 00:00:00 2001 From: Anshuman Goswami Date: Mon, 9 Jul 2018 14:27:43 -0700 Subject: [PATCH 038/319] fixes based on review comments --- content/shmem_team_collect.tex | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/content/shmem_team_collect.tex b/content/shmem_team_collect.tex index 407d19c2d..f45250047 100644 --- a/content/shmem_team_collect.tex +++ b/content/shmem_team_collect.tex @@ -35,14 +35,10 @@ , then the contribution from the second \ac{PE} in the \VAR{team}, and so on. The collected result is written to the \dest{} array for all \acp{PE} in the team. - The \FUNC{fcollect} routines require that \VAR{nelems} be the same value in all - \acp{PE} in the team, while the \FUNC{collect} routines allow \VAR{nelems} to + The \FUNC{fcollect} routines require that all \acp{PE} in the team provide the same + value for \VAR{nelems}, while the \FUNC{collect} routines allow \VAR{nelems} to vary from \ac{PE} to \ac{PE}. - As with all \openshmem team collective routines, each of these routines assumes that - only \acp{PE} in the given team call the routine. If a \ac{PE} not in the - team calls an \openshmem team collective routine, the behavior is undefined. - If the team has been created with the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} option, it will not have the required support structures to complete this routine. If such a team is passed to this or any other team collective routine, the behavior @@ -58,9 +54,10 @@ \item The \source{} array may be safely reused. \end{itemize} - Error checking will be done to ensure a valid team handle is provided. - All errors are considered fatal and will result in the job aborting - with an informative error message. + %Issue 25 + %Error checking will be done to ensure a valid team handle is provided. + %All errors are considered fatal and will result in the job aborting + %with an informative error message. } \apidesctable{ @@ -83,10 +80,6 @@ All \openshmem team collective routines use symmetric data structures associated with the team to synchronize and share data. By default, new teams that result from split operations will have these structures. - - Multiple calls to the same collective routine for the same team by different threads - must avoid any simultaneous updates to these structures. In general, this will mean - that threads will need to serialize access to teams. } \begin{apiexamples} From c79daede9ca207e78804283f8f59a4f30a2cd455 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 9 Jul 2018 16:38:21 -0500 Subject: [PATCH 039/319] Cleaned up wording. Softened error checking requirement. --- content/shmem_team_broadcast.tex | 36 ++++++++++++-------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/content/shmem_team_broadcast.tex b/content/shmem_team_broadcast.tex index 364de4198..3417aee6b 100644 --- a/content/shmem_team_broadcast.tex +++ b/content/shmem_team_broadcast.tex @@ -1,6 +1,5 @@ \apisummary{ - Broadcasts a block of data from one \ac{PE} to one or more destination - \acp{PE}. + Broadcasts a block of data from one \ac{PE} in a team to all other \acp{PE} in the team } \begin{apidefinition} @@ -20,16 +19,14 @@ that is permissible for the \dest{} argument.} \apiargument{IN}{nelems}{The number of elements in \source{}. For \FUNC{shmem\_team\_broadcast32}, this is the number of - 32-bit halfwords. nelems must be of type \CTYPE{size\_t} in \Cstd.} + 32-bit halfwords. nelems must be of type \CTYPE{size\_t}.} \apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to - the team, from which the data is copied. Must be greater than or equal to - 0 and less than N-1, where N is the result of calling \FUNC{shmem\_team\_n\_pes(team)}. - \VAR{PE\_root} must be of type \CTYPE{int}.} + the team, from which the data is copied. \VAR{PE\_root} must be of type \CTYPE{int}.} \end{apiarguments} \apidescription{ - \openshmem broadcast routines are collective routines over an existing team. + \openshmem team broadcast routines are collective routines over an existing team. They copy data object \source{} on the processor specified by \VAR{PE\_root} and store the values at \dest{} on the other \acp{PE} that are members of the team. The data is not copied to the \dest{} area on the root \ac{PE}. @@ -39,26 +36,26 @@ such a team is passed to this or any other team collective routine, the behavior is undefined. - As with all \openshmem routines where the operation occurs for a given team - - either when the team is an argument to the routine, or when the team is an attribute - of the context argument to a routine - the \ac{PE} numbers are relative to the team, - and must be in the range of 0 to N-1, where N is the result of \FUNC{shmem\_team\_n\_pes(team)}. + As with all \openshmem routines where the operation occurs over a given team, \ac{PE} + numbering is relative to the team. The specified root \ac{PE} must be a valid \ac{PE} + number for the team, between \CONST{0} and \VAR{N-1}, where \VAR{N} is + the size of the team. The values of the argument \VAR{PE\_root} must be the same value on all \acp{PE} in the team. The same \dest{} and \source{} data objects must be passed by all \acp{PE} in the team. - Upon return from a broadcast routine, the following are true for the local - \ac{PE}: + Upon return from a broadcast routine, the following are true for the local \ac{PE}: \begin{itemize} \item If the current \ac{PE} is not the root \ac{PE}, the \dest{} data object is updated. \item The \source{} data object may be safely reused. \end{itemize} - Error checking will be done to ensure a valid team handle is provided. - All errors are considered fatal and will result in the job aborting with - an informative error message. + Error checking will be done to detect a value of \LibConstRef{SHMEM\_TEAM\_NULL} passed + for the team argument. In that case, the program will abort with an informative + error message. If an invalid team handle is passed to the routine, + the behavior is undefined. } \apidesctable{ @@ -78,13 +75,6 @@ } \apinotes{ - All \openshmem team collective routines use symmetric data structures associated - with the team to synchronize and share data. By default, new teams that result from - split operations will have these structures. - - Multiple calls to the same collective routine for the same team by different threads - must avoid any simultaneous updates to these structures. In general, this will mean - that threads will need to serialize access to teams. } \end{apidefinition} From 559ad92fdc7a30787de15e8649eb2963704db6b2 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 10 Jul 2018 11:30:16 -0500 Subject: [PATCH 040/319] Update description of 3d split to 2d split --- content/shmem_team_split_3d.tex | 127 ++++++++++++++++---------------- 1 file changed, 65 insertions(+), 62 deletions(-) diff --git a/content/shmem_team_split_3d.tex b/content/shmem_team_split_3d.tex index 0fd86b29d..1efa4df22 100644 --- a/content/shmem_team_split_3d.tex +++ b/content/shmem_team_split_3d.tex @@ -1,86 +1,91 @@ \apisummary{ -Create up to three new teams -by splitting an existing parent team into up to three subsets based on a -2D or 3D Cartesian space defined by the triplet (xrange, yrange, and zrange) -describing the size of the Cartesian space in X, Y, and Z dimensions.} +Create two new teams by splitting an existing parent team into two subsets based on a +2D Cartesian space defined by xrange argument and the yrange derived from xrange +and the parent team size. These ranges describe the Cartesian space in X and Y +dimensions.} \begin{apidefinition} \begin{Csynopsis} -void @\FuncDecl{shmem\_team\_split\_3d}@(shmem_team_t parent_team, long options, -int xrange, int yrange, int zrange, shmem_team_t *xaxis_team, shmem_team_t *yaxis_team, -shmem_team_t *zaxis_team); +void @\FuncDecl{shmem\_team\_split\_2d}@(shmem_team_t parent_team, int xrange, +long xaxis_options, shmem_team_t *xaxis_team, long yaxis_options, shmem_team_t *yaxis_team); \end{Csynopsis} \begin{apiarguments} -\apiargument{IN}{parent\_team}{A valid \openshmem team. The predefined teams -\LibHandleRef{SHMEM\_TEAM\_WORLD} or \LibHandleRef{SHMEM\_TEAM\_NODE} may be -used, or any team created by the user.} - -\apiargument{IN}{options}{The set of options requested for the new teams. -Multiple options may be requested by combining them with a bitwise OR operation; -otherwise, \CONST{0} can be given if no options are requested.} +\apiargument{IN}{parent\_team}{A valid \openshmem team. Any predefined teams, such as +\LibHandleRef{SHMEM\_TEAM\_WORLD}, may be used, or any team created by the user.} \apiargument{IN}{xrange}{A non-negative integer representing the number of elements in the first dimension.} -\apiargument{IN}{yrange}{A non-negative integer representing the number of -elements in the second dimension.} - -\apiargument{IN}{zrange}{A non-negative integer representing the number of -elements in the third dimension. \CONST{0} can be given if the defined space -has no third dimension.} +\apiargument{IN}{xaxis\_options}{The set of options requested for the new X-axis team. +Multiple options may be requested by combining them with a bitwise OR operation; +otherwise, \CONST{0} can be given if no options are requested.} \apiargument{OUT}{xaxis\_team}{A new \ac{PE} team handle representing a \ac{PE} -subset consisting of all the \acp{PE} that are in the same row in the X-axis.} +subset consisting of all the \acp{PE} that have the same coordinate along the X-axis +as the calling \ac{PE}.} -\apiargument{OUT}{yaxis\_team}{A new \ac{PE} team handle representing a \ac{PE} -subset consisting of all the \acp{PE} that are in the same column in the Y-axis.} +\apiargument{IN}{yaxis\_options}{The set of options requested for the new Y-axis team. +These options do not have to be the same as the options for the new X-axis team.} -\apiargument{OUT}{zaxis\_team}{A new \ac{PE} team handle representing a \ac{PE} -subset consisting of all the \acp{PE} that are in the same position in in the -Z-axis.} +\apiargument{OUT}{yaxis\_team}{A new \ac{PE} team handle representing a \ac{PE} +subset consisting of all the \acp{PE} that have the same coordinate along the Y-axis +as the calling \ac{PE}.} \end{apiarguments} \apidescription{ -The \FUNC{shmem\_team\_split\_3d} routine is a collective routine. It creates up to three new teams -by splitting an existing parent team into up to three subsets based on a -1D, 2D or 3D Cartesian space defined by the triplet (\VAR{xrange}, \VAR{yrange}, -and \VAR{zrange}) describing the size of the Cartesian space in X, Y, and Z dimensions. - -A 2D Cartesian space may be defined by passing \CONST{0} for the \VAR{zrange} -parameter. In this case, the zaxis team argument may be passed a null -pointer. If it is not passed as a null pointer, the zaxis team will be returned -as \LibConstRef{SHMEM\_TEAM\_NULL}. - -Each subset contains all \acp{PE} that are in the same -dimension, along the X-axis, Y-axis and Z-axis. Within each subset, -the \acp{PE} are numbered based on the position of the \ac{PE} with respect to its -dimension in three-dimensional Cartesian space. - -Any valid \ac{PE} team can be used as the parent team. This routine must be -called by all \acp{PE} in the parent team. The value of the triplets must be -non-negative, and the size of the parent team should be greater than or -equal to the number of discrete (x,y,z) points in the Cartesian space. -None of the parameters need to reside in symmetric memory. - -In the event that the Cartesian space contains fewer points than exist \acp{PE} -in the parent team, the first N \acp{PE} in the parent team will be used -to form the new teams, where N is equal to the number of points in the -Cartesian space. - -Error checking will be done to ensure a valid parent team handle is provided. - -\begin{FeedbackRequest} -All errors are considered fatal and will result in the job aborting with -an informative error message. -\end{FeedbackRequest} +The \FUNC{shmem\_team\_split\_2d} routine is a collective routine. It creates two new teams +by splitting an existing parent team into up to two subsets based on a +2D Cartesian space. The user provides the size of the X dimension, which is then used to +derive the size of the Y dimension based on the size of the parent team. The size of +the Y dimension will be equal to $ceiling ( N \div xrange )$, where \VAR{N} is the size +of the parent team. In other words, $xrange \times yrange \geq N$, so that every \ac{PE} +in the parent team has a unique \VAR{(x,y)} location the 2D Cartesian space. + +After the split operation, each of the new teams will contain all \acp{PE} that have the same +coordinate along the X-axis and Y-axis, respectively, as the calling \ac{PE}. The +\acp{PE} are numbered in the new teams based on the position of the \ac{PE} along +the given axis. + +Any valid \openshmem team can be used as the parent team. This routine must be +called by all \acp{PE} in the parent team. The value of \VAR{xrange} must be +non-negative and all \acp{PE} in the parent team must pass the same value for +\VAR{xrange}. None of the parameters need to reside in symmetric memory. + +Since the split may result in a 2D space with more points than there are members of +the parent team, there may be a final, incomplete row of the 2D mapping of the parent +team. This means that the resulting X-axis teams may vary in size by up to 1 \ac{PE}, +and that there may be one resulting Y-axis team of smaller size than all of the other +Y-axis teams. + +The following grid shows the twelve teams that would result from splitting a parent team +of size 10 with \VAR{xrange} of 3. The numbers in the grid cells are the \ac{PE} numbers +in the parent team. The rows are the Y-axis teams. The columns are the X-axis teams. + +\begin{center} +\begin{tabular}{|l|l|l|l|} + \hline + & x=0 & x=1 & x=2 \\ \hline + y=0 & 0 & 1 & 2 \\ \hline + y=1 & 3 & 4 & 5 \\ \hline + y=2 & 6 & 7 & 8 \\ \hline + y=3 & 9 \\ + \cline{0-1} +\end{tabular} +\end{center} + +Error checking will be done to detect if the value \LibConstRef{SHMEM\_TEAM\_NULL} +is passed as the parent team handle. In that case, the program will abort with +an informative error message. If an invalid team handle is passed to the +routine, the behavior is undefined. The following options can be supplied during team split to restrict team functions and enable performance optimizations. When using a given team, the application must comply with the requirements of all options set on that team; otherwise, the behavior is undefined. -No options are enabled on \LibHandleRef{SHMEM\_TEAM\_WORLD} or \LibHandleRef{SHMEM\_TEAM\_NODE}. +No options are enabled on \LibHandleRef{SHMEM\_TEAM\_WORLD} or on other +predefined \openshmem teams. \apitablerow{\LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE}}{ The new team will not be created with the necessary support @@ -96,9 +101,7 @@ } \apinotes{ -Note that SHMEM team handles have local semantics only. That is, team -handles should not be stored in shared variables and used across other -processes. Doing so will result in unpredictable behavior. +See the description of team handles and predefined teams at the top of section \ref{subsec:team} for more information about semantics and usage. } \begin{apiexamples} From 9ad096eac699931117d1ea8ded1391cb4b91b937 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 10 Jul 2018 11:33:06 -0500 Subject: [PATCH 041/319] Changed filename and subsection name --- content/{shmem_team_split_3d.tex => shmem_team_split_2d.tex} | 0 main_spec.tex | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename content/{shmem_team_split_3d.tex => shmem_team_split_2d.tex} (100%) diff --git a/content/shmem_team_split_3d.tex b/content/shmem_team_split_2d.tex similarity index 100% rename from content/shmem_team_split_3d.tex rename to content/shmem_team_split_2d.tex diff --git a/main_spec.tex b/main_spec.tex index 29b67bf6a..4653be4ce 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -318,8 +318,8 @@ \subsubsection{\textbf{SHMEM\_TEAM\_TRANSLATE}}\label{subsec:shmem_team_translat \subsubsection{\textbf{SHMEM\_TEAM\_SPLIT\_STRIDED}}\label{subsec:shmem_team_split_strided} \input{content/shmem_team_split_strided.tex} -\subsubsection{\textbf{SHMEM\_TEAM\_SPLIT\_3D}}\label{subsec:shmem_team_split_3d} -\input{content/shmem_team_split_3d.tex} +\subsubsection{\textbf{SHMEM\_TEAM\_SPLIT\_2D}}\label{subsec:shmem_team_split_2d} +\input{content/shmem_team_split_2d.tex} \subsubsection{\textbf{SHMEM\_TEAM\_DESTROY}}\label{subsec:shmem_team_destroy} \input{content/shmem_team_destroy.tex} From 306185132f681558f8f39b4963e6ac6b97c476a5 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 11 Jul 2018 13:47:18 -0500 Subject: [PATCH 042/319] Clean up and clarify options requirements for split 2d --- content/shmem_team_split_2d.tex | 88 ++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 39 deletions(-) diff --git a/content/shmem_team_split_2d.tex b/content/shmem_team_split_2d.tex index 1efa4df22..8a8e9349f 100644 --- a/content/shmem_team_split_2d.tex +++ b/content/shmem_team_split_2d.tex @@ -35,55 +35,38 @@ \end{apiarguments} \apidescription{ -The \FUNC{shmem\_team\_split\_2d} routine is a collective routine. It creates two new teams -by splitting an existing parent team into up to two subsets based on a -2D Cartesian space. The user provides the size of the X dimension, which is then used to -derive the size of the Y dimension based on the size of the parent team. The size of -the Y dimension will be equal to $ceiling ( N \div xrange )$, where \VAR{N} is the size -of the parent team. In other words, $xrange \times yrange \geq N$, so that every \ac{PE} -in the parent team has a unique \VAR{(x,y)} location the 2D Cartesian space. - -After the split operation, each of the new teams will contain all \acp{PE} that have the same -coordinate along the X-axis and Y-axis, respectively, as the calling \ac{PE}. The -\acp{PE} are numbered in the new teams based on the position of the \ac{PE} along -the given axis. +The \FUNC{shmem\_team\_split\_2d} routine is a collective routine. It creates two +new teams by splitting an existing parent team into up to two subsets based on a +2D Cartesian space. The user provides the size of the X dimension, which is then +used to derive the size of the Y dimension based on the size of the parent team. +The size of the Y dimension will be equal to $ceiling ( N \div xrange )$, where +\VAR{N} is the size of the parent team. In other words, +$xrange \times yrange \geq N$, so that every \ac{PE} in the parent team has a +unique \VAR{(x,y)} location the 2D Cartesian space. + +After the split operation, each of the new teams will contain all \acp{PE} that +have the same coordinate along the X-axis and Y-axis, respectively, as the calling +\ac{PE}. The \acp{PE} are numbered in the new teams based on the position of the +\ac{PE} along the given axis. Any valid \openshmem team can be used as the parent team. This routine must be called by all \acp{PE} in the parent team. The value of \VAR{xrange} must be non-negative and all \acp{PE} in the parent team must pass the same value for \VAR{xrange}. None of the parameters need to reside in symmetric memory. -Since the split may result in a 2D space with more points than there are members of -the parent team, there may be a final, incomplete row of the 2D mapping of the parent -team. This means that the resulting X-axis teams may vary in size by up to 1 \ac{PE}, -and that there may be one resulting Y-axis team of smaller size than all of the other -Y-axis teams. - -The following grid shows the twelve teams that would result from splitting a parent team -of size 10 with \VAR{xrange} of 3. The numbers in the grid cells are the \ac{PE} numbers -in the parent team. The rows are the Y-axis teams. The columns are the X-axis teams. - -\begin{center} -\begin{tabular}{|l|l|l|l|} - \hline - & x=0 & x=1 & x=2 \\ \hline - y=0 & 0 & 1 & 2 \\ \hline - y=1 & 3 & 4 & 5 \\ \hline - y=2 & 6 & 7 & 8 \\ \hline - y=3 & 9 \\ - \cline{0-1} -\end{tabular} -\end{center} - Error checking will be done to detect if the value \LibConstRef{SHMEM\_TEAM\_NULL} is passed as the parent team handle. In that case, the program will abort with an informative error message. If an invalid team handle is passed to the routine, the behavior is undefined. -The following options can be supplied during team split to restrict -team functions and enable performance optimizations. When using a given -team, the application must comply with the requirements of all options -set on that team; otherwise, the behavior is undefined. +The options in the table below can be supplied during team split to restrict +team functions and enable performance optimizations. All \acp{PE} that will +be in the same resultant new team must specify the same options. The \acp{PE} +in the parent team \emph{do not} have to all provide the same options for new +teams. + +When using a given team, the application must comply with the requirements +of all options set on that team; otherwise, the behavior is undefined. No options are enabled on \LibHandleRef{SHMEM\_TEAM\_WORLD} or on other predefined \openshmem teams. @@ -101,7 +84,34 @@ } \apinotes{ -See the description of team handles and predefined teams at the top of section \ref{subsec:team} for more information about semantics and usage. +Since the split may result in a 2D space with more points than there are members of +the parent team, there may be a final, incomplete row of the 2D mapping of the parent +team. This means that the resultant X-axis teams may vary in size by up to 1 \ac{PE}, +and that there may be one resultant Y-axis team of smaller size than all of the other +Y-axis teams. + +The following grid shows the 12 teams that would result from splitting a parent team +of size 10 with \VAR{xrange} of 3. The numbers in the grid cells are the \ac{PE} numbers +in the parent team. The rows are the Y-axis teams. The columns are the X-axis teams. + +\begin{center} +\begin{tabular}{|l|l|l|l|} + \hline + & x=0 & x=1 & x=2 \\ \hline + y=0 & 0 & 1 & 2 \\ \hline + y=1 & 3 & 4 & 5 \\ \hline + y=2 & 6 & 7 & 8 \\ \hline + y=3 & 9 \\ + \cline{0-1} +\end{tabular} +\end{center} + +It would be legal, for example, if \acp{PE} 0, 3, 6, 9 specified a different value +for \VAR{xaxis\_options} than all of the other \acp{PE}, as long as options match +for all \acp{PE} in each of the new teams. + +See the description of team handles and predefined teams at the top of section +\ref{subsec:team} for more information about team handle semantics and usage. } \begin{apiexamples} From a8cacd41d5cc3f0f0860f5221d9503fd12d66b3b Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 11 Jul 2018 13:53:58 -0500 Subject: [PATCH 043/319] Add preliminary error check requirement to team collect --- content/shmem_team_collect.tex | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/content/shmem_team_collect.tex b/content/shmem_team_collect.tex index f45250047..7d1ab0eb0 100644 --- a/content/shmem_team_collect.tex +++ b/content/shmem_team_collect.tex @@ -54,10 +54,10 @@ \item The \source{} array may be safely reused. \end{itemize} - %Issue 25 - %Error checking will be done to ensure a valid team handle is provided. - %All errors are considered fatal and will result in the job aborting - %with an informative error message. + Error checking will be done to detect a value of \LibConstRef{SHMEM\_TEAM\_NULL} passed + for the team argument. In that case, the program will abort with an informative + error message. If an invalid team handle is passed to the routine, + the behavior is undefined. } \apidesctable{ From 15eca29dd31d49a9f9088a9da7d338f47ded8e43 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 11 Jul 2018 14:04:28 -0500 Subject: [PATCH 044/319] Change the term numbering to indexing for teams description --- main_spec.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main_spec.tex b/main_spec.tex index a8b908120..fbf7e40db 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -125,7 +125,7 @@ \subsection{Teams Management Routines}\label{subsec:team} point-to-point routines that specify the \ac{PE} number of the target \ac{PE} or using collective routines which operate over some predefined set of \acp{PE}. Teams in \openshmem allow programs to group subsets -of \acp{PE} for collective communications and provide a contiguous renumbering +of \acp{PE} for collective communications and provide a contiguous reindexing of the \acp{PE} within that subset that can be used in point-to-point communication. An \openshmem team is a set of \acp{PE} defined by calling a specific team @@ -165,7 +165,7 @@ \subsection{Teams Management Routines}\label{subsec:team} team split routines below. In particular, teams may be created with an option to disable support for collective communications, which allows implementations to reduce team creation overheads for those teams. In that case, the team is -just a local renumbering of some set of \acp{PE} that can be used for +just a local reindexing of some set of \acp{PE} that can be used for point to point communications or as parent teams in subsequent split operations. \subsubsection{\textbf{SHMEM\_TEAM\_MY\_PE}}\label{subsec:shmem_team_my_pe} From 0ed0126b840a001ecca75b3e6624a0af31604548 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 25 Jul 2018 17:41:31 -0400 Subject: [PATCH 045/319] Revise team-context interactions per gmegan/specification#28 --- content/shmem_ctx_create.tex | 2 +- content/shmem_ctx_destroy.tex | 10 +++-- content/shmem_ctx_get_team.tex | 61 +++++++++++---------------- content/shmem_ctx_set_team.tex | 52 ----------------------- content/shmem_team_create_ctx.tex | 55 +++++++++++++++++++++++++ content/shmem_team_destroy_ctx.tex | 37 +++++++++++++++++ main_spec.tex | 66 +++++++++++++++++------------- 7 files changed, 161 insertions(+), 122 deletions(-) delete mode 100644 content/shmem_ctx_set_team.tex create mode 100644 content/shmem_team_create_ctx.tex create mode 100644 content/shmem_team_destroy_ctx.tex diff --git a/content/shmem_ctx_create.tex b/content/shmem_ctx_create.tex index c83667888..b5e7e2a0b 100644 --- a/content/shmem_ctx_create.tex +++ b/content/shmem_ctx_create.tex @@ -1,5 +1,5 @@ \apisummary{ - Create a communication context. + Create a communication context \newtext{locally}. } \begin{apidefinition} diff --git a/content/shmem_ctx_destroy.tex b/content/shmem_ctx_destroy.tex index 3a176712c..ceb01194a 100644 --- a/content/shmem_ctx_destroy.tex +++ b/content/shmem_ctx_destroy.tex @@ -1,5 +1,5 @@ \apisummary{ - Destroy a communication context. + Destroy a \newtext{locally created} communication context. } \begin{apidefinition} @@ -19,9 +19,9 @@ destroyed context is used by multiple threads. This function performs an implicit quiet operation on the given context before it is freed. - \newtext{ - When a context is destroyed, the team associated with this context - is not affected. + \newtext{% + It is invalid to pass \CONST{SHMEM\_CTX\_DEFAULT} or a context handle + returned by a call to \FUNC{shmem\_team\_create\_ctx} to this routine. } } @@ -30,7 +30,9 @@ } \apinotes{ + \oldtext{% It is invalid to pass \CONST{SHMEM\_CTX\_DEFAULT} to this routine. + } Destroying a context makes it impossible for the user to complete communication operations that are pending on that context. This includes diff --git a/content/shmem_ctx_get_team.tex b/content/shmem_ctx_get_team.tex index 0a8456d6d..2bc8b9b10 100644 --- a/content/shmem_ctx_get_team.tex +++ b/content/shmem_ctx_get_team.tex @@ -4,49 +4,38 @@ \begin{apidefinition} - \begin{Csynopsis} +\begin{Csynopsis} int @\FuncDecl{shmem\_ctx\_get\_team}@(shmem_ctx_t ctx, shmem_team_t *team); - \end{Csynopsis} +\end{Csynopsis} - \begin{apiarguments} - - \apiargument{IN}{ctx}{ - A handle to a communication context. - } - - \apiargument{OUT}{team}{ - A pointer to a handle to the associated \ac{PE} team. - } - - \end{apiarguments} - - \apidescription{ - The \FUNC{shmem\_ctx\_get\_team} routine returns a handle to the \ac{PE} - team associated with the specified communication context \VAR{ctx}. - The team handle is returned through the pointer argument \VAR{team}. +\begin{apiarguments} + \apiargument{IN}{ctx}{ + A handle to a communication context. + } + \apiargument{OUT}{team}{ + A pointer to a handle to the associated \ac{PE} team. + } +\end{apiarguments} - If \VAR{ctx} is the default context, the returned team is guaranteed - to be \CONST{SHMEM\_TEAM\_WORLD}. +\apidescription{ + The \FUNC{shmem\_ctx\_get\_team} routine returns a handle to the + team associated with the specified communication context \VAR{ctx}. + The team handle is returned through the pointer argument \VAR{team}. - If \VAR{ctx} is an invalid context, the argument \VAR{team} is not - modified and a value of \CONST{-1} is returned. + If \VAR{ctx} is the default context or one created by a call to + \FUNC{shmem\_ctx\_create}, the returned team is the default team. - If \VAR{team} is a null pointer, a value of \CONST{-1} is returned. - } + If \VAR{ctx} is an invalid context, the behavior is undefined. - \apireturnvalues{ - Zero on success; otherwise, \CONST{-1}. + If \VAR{team} is a null pointer, a value of \CONST{-1} is returned. +} - \begin{FeedbackRequest} - Should this routine return nonzero, -1, or negative values - (e.g., to allow for implementation-defined error codes) on error? - Will slowing down the critical path of this routine by adding - input checking adversely affect its use? - \end{FeedbackRequest} - } +\apireturnvalues{ + Zero on success; otherwise, nonzero. +} - \apinotes{ - None. - } +\apinotes{ + None. +} \end{apidefinition} diff --git a/content/shmem_ctx_set_team.tex b/content/shmem_ctx_set_team.tex deleted file mode 100644 index c73804221..000000000 --- a/content/shmem_ctx_set_team.tex +++ /dev/null @@ -1,52 +0,0 @@ -\apisummary{ - Update the team associated with the communication context. -} - -\begin{apidefinition} - - \begin{Csynopsis} -int @\FuncDecl{shmem\_ctx\_set\_team}@(shmem_ctx_t ctx, shmem_team_t team); - \end{Csynopsis} - - \begin{apiarguments} - - \apiargument{IN}{ctx}{ - A handle to a communication context. - } - - \apiargument{IN}{team}{ - A handle to the specified \ac{PE} team. - } - - \end{apiarguments} - - \apidescription{ - The \FUNC{shmem\_ctx\_set\_team} routine associates the \ac{PE} team - identified by the handle \VAR{team} with the communication context - specified by the handle \VAR{ctx}. - All subsequent \openshmem operations performed on the specified context - will operate with respect to the updated \ac{PE} team. - - If \VAR{ctx} is a handle to the default context or - \VAR{team} is equal to the constant \CONST{SHMEM\_TEAM\_NULL}, then - the specified context is not updated and a value of \CONST{-1} is returned. - - If \VAR{ctx} is an invalid context, a value of \CONST{-1} is returned. - } - - \apireturnvalues{ - Zero on success; otherwise, \CONST{-1}. - - \begin{FeedbackRequest} - Should this routine return nonzero, -1, or negative values - (e.g., to allow for implementation-defined error codes) on error? - Will slowing down the critical path of this routine by adding - input checking adversely affect its use? - \end{FeedbackRequest} - } - - \apinotes{ - None. - } - -\end{apidefinition} diff --git a/content/shmem_team_create_ctx.tex b/content/shmem_team_create_ctx.tex new file mode 100644 index 000000000..478165afe --- /dev/null +++ b/content/shmem_team_create_ctx.tex @@ -0,0 +1,55 @@ +\apisummary{ + Create a communication context collectively. +} + +\begin{apidefinition} + +\begin{Csynopsis} +int @\FuncDecl{shmem\_team\_create\_ctx}@(shmem_team_t team, long options, shmem_ctx_t *ctx); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{team}{A handle to the specified \ac{PE} team.} + \apiargument{IN}{options}{ + The set of options requested for the given context. + Multiple options may be requested by combining them with a bitwise OR + operation; otherwise, \CONST{0} can be given if no options are requested.} + \apiargument{OUT}{ctx}{A handle to the newly created context.} +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_team\_create\_ctx} routine creates a new communication + context and returns its handle through the \VAR{ctx} argument. + This context is created collectively by all \acp{PE} in the team + specified by the \VAR{team} argument. + The specified team may not have the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} + option enabled; otherwise, the behavior is undefined. + + %% All \openshmem routines that operate on this context will do so with + %% respect to the associated \ac{PE} team. + %% That is, all point-to-point routines operating on this context will use + %% team-relative \ac{PE} numbering. + + In addition to the team, the \FUNC{shmem\_team\_create\_ctx} routine accepts + the same arguments and provides all the same return conditions as the + \FUNC{shmem\_ctx\_create} routine. + The call is either collectively successful or collectively fails across + all \acp{PE} in the team. + + As \FUNC{shmem\_team\_create\_ctx} is collective, it includes a call to a + procedure semantically equivalent to \FUNC{shmem\_team\_sync} on both entry + and exit. +} + +\apireturnvalues{ + Zero on success and nonzero otherwise. +} + +\apinotes{ + Depending on the \openshmem implementation, system configuration, and + application communication pattern, some applications may observe higher + performance with collectively created contexts than with locally created + contexts. +} + +\end{apidefinition} diff --git a/content/shmem_team_destroy_ctx.tex b/content/shmem_team_destroy_ctx.tex new file mode 100644 index 000000000..40f14b060 --- /dev/null +++ b/content/shmem_team_destroy_ctx.tex @@ -0,0 +1,37 @@ +\apisummary{ + Destroy a collectively created communication context. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_team\_destroy\_ctx}@(shmem_ctx_t ctx); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{ctx}{Handle to the context that will be destroyed.} +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_team\_destroy\_ctx} collectively destroys a context that was + created by a call to \FUNC{shmem\_team\_create\_ctx}. + It is the user's responsibility to ensure that the context is not used + after it has been destroyed. + + As \FUNC{shmem\_team\_create\_ctx} is collective, it includes calls to + procedures semantically equivalent to \FUNC{shmem\_team\_barrier} on entry + and \FUNC{shmem\_team\_sync} on exit. + + It is invalid to pass \CONST{SHMEM\_CTX\_DEFAULT} or a context handle + returned by a call to \FUNC{shmem\_ctx\_create} to this routine. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + None. +} + +\end{apidefinition} diff --git a/main_spec.tex b/main_spec.tex index fbf7e40db..82c6dac1c 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -119,7 +119,7 @@ \subsubsection{\textbf{SHPDEALLC}}\label{subsec:shpdeallc} \color{Green} -\subsection{Teams Management Routines}\label{subsec:team} +\subsection{Team Management Routines}\label{subsec:team} The \acp{PE} in an \openshmem program can communicate either using point-to-point routines that specify the \ac{PE} number of the target @@ -138,30 +138,31 @@ \subsection{Teams Management Routines}\label{subsec:team} Every team must have a least one member. Any attempt to create a team over an empty set of \acp{PE} will result in no new team being created. -A "team handle" is an opaque object with type \CTYPE{shmem\_team\_t} that is used +A ``team handle'' is an opaque object with type \CTYPE{shmem\_team\_t} that is used to reference a defined team. Team handles are created by one of the team split routines and destroyed by the team destroy routine. Team handles have local semantics only. That is, team handles should not be stored in shared variables and used across other \acp{PE}. Doing so will result in undefined behavior. By default, \openshmem creates predefined teams that will be available -for use once the routine \FUNC{shmem\_init} has been called. See section -\ref{subsec:library_handles} for a description of all predefined team handles +for use once the routine \FUNC{shmem\_init} has been called. See +Section~\ref{subsec:library_handles} for a description of all predefined team handles provided by \openshmem. Predefined \CTYPE{shmem\_team\_t} handles can be used as the parent team when creating new \openshmem teams. -Every \ac{PE} is a member of the \LibHandleRef{SHMEM\_TEAM\_WORLD} team, -and its number in \LibHandleRef{SHMEM\_TEAM\_WORLD} corresponds to the -value of its global \ac{PE} number as returned by \FUNC{shmem\_my\_pe}. +Every \ac{PE} is a member of the default team, which may be referenced +through the team handle \LibHandleRef{SHMEM\_TEAM\_WORLD}, +and its number in the default team is equal to the +value of its \ac{PE} number as returned by \FUNC{shmem\_my\_pe}. -A special team handle value, \LibConstRef{SHMEM\_TEAM\_NULL}, is used to +A special team handle value, \LibConstRef{SHMEM\_TEAM\_NULL}, may be used to indicate that a returned team handle is not valid. This value can be tested against to check for successful split operations and can be assigned to user declared team handles as a sentinel value. Teams may be created with options that change team behavior and may allow for -more optimized performance. These options are described in section -\ref{subsec:library_constants} and in the various descriptions of the +more optimized performance. These options are described in +Section~\ref{subsec:library_constants} and in the various descriptions of the team split routines below. In particular, teams may be created with an option to disable support for collective communications, which allows implementations to reduce team creation overheads for those teams. In that case, the team is @@ -213,19 +214,20 @@ \subsection{Communication Management Routines} \LibHandleRef{SHMEM\_CTX\_DEFAULT} handle. \newtext{ -Every communication context has an associated \ac{PE} team. -This \ac{PE} team specifies the set of \acp{PE} over which \ac{PE}-specific -routines that operate on a communication context, explicitly or implicitly, -(e.g., \ac{RMA} and \ac{AMO} routines) may be performed. -All \openshmem routines that operate on this context will do so with respect -to the team-relative \ac{PE} numbering of the associated \ac{PE} team. -} - -\newtext{ -The default context has a fixed association with the default team. -Communication contexts created by \FUNC{shmem\_ctx\_create} have an initial -association with the default team, which may be updated by the -\FUNC{shmem\_ctx\_set\_team} routine. +Every communication context is associated with a team. +This association is established at context creation. +Communication contexts created by \FUNC{shmem\_ctx\_create} are +associated with the default team, while contexts created by +\FUNC{shmem\_team\_create\_ctx} are associated with a team specified at +context creation. +The default context is associated with the default team. +A context's associated team specifies the set of \acp{PE} over which +\ac{PE}-specific routines that operate on a communication context, +explicitly or implicitly, are performed. +All point-to-point routines that operate on this context will do so with +respect to the team-relative \ac{PE} numbering of the associated team. +All collective routines that operate on this context will do so across +the associated team. } \subsubsection{\textbf{SHMEM\_CTX\_CREATE}} @@ -236,18 +238,24 @@ \subsubsection{\textbf{SHMEM\_CTX\_DESTROY}} \label{subsec:shmem_ctx_destroy} \input{content/shmem_ctx_destroy.tex} -\newtext{ -\subsubsection{\textbf{SHMEM\_CTX\_SET\_TEAM}} -\label{subsec:shmem_ctx_set_team} -\input{content/shmem_ctx_set_team.tex} -} - \newtext{ \subsubsection{\textbf{SHMEM\_CTX\_GET\_TEAM}} \label{subsec:shmem_ctx_get_team} \input{content/shmem_ctx_get_team.tex} } +\newtext{ +\subsubsection{\textbf{SHMEM\_TEAM\_CREATE\_CTX}} +\label{subsec:shmem_team_create_ctx} +\input{content/shmem_team_create_ctx.tex} +} + +\newtext{ +\subsubsection{\textbf{SHMEM\_TEAM\_DESTROY\_CTX}} +\label{subsec:shmem_team_destroy_ctx} +\input{content/shmem_team_destroy_ctx.tex} +} + \subsection{Remote Memory Access Routines}\label{sec:rma} \input{content/rma_intro.tex} From 6334ba52674d9d48ed0ad96bf563303ba8e48ba8 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Fri, 17 Aug 2018 14:21:32 -0400 Subject: [PATCH 046/319] Interim commit; integrating teams + contexts --- content/shmem_team_config_t.tex | 54 ++++++++++++ ..._options.tex => shmem_team_get_config.tex} | 30 +++---- content/shmem_team_split_strided.tex | 83 ++++++++++++------- main_spec.tex | 34 +++++--- utils/defs.tex | 6 +- 5 files changed, 142 insertions(+), 65 deletions(-) create mode 100644 content/shmem_team_config_t.tex rename content/{shmem_team_get_options.tex => shmem_team_get_config.tex} (50%) diff --git a/content/shmem_team_config_t.tex b/content/shmem_team_config_t.tex new file mode 100644 index 000000000..cd309a5ed --- /dev/null +++ b/content/shmem_team_config_t.tex @@ -0,0 +1,54 @@ +\apisummary{ + A structure type representing team configuration arguments +} + +\begin{apidefinition} + + \begin{Csynopsis} +typedef struct { + int disable_collectives; + int return_local_limit; + int num_threads; +} shmem_team_config_t; +\end{Csynopsis} + + \vspace{1.0em} + + \apidescription{ + A team configuration argument acts as both input and output to the + \FUNC{shmem\_team\_split\_*} routines. + As an input, it specifies the requested capabilities of the team to be + created. + As an output, the configuration argument is conditionally updated on + whether team creation is successful. + If successful, the configuration argument is not modified; + if unsuccessful, it is updated to specify the limiting configuration + parameter(s). + + The \VAR{disable\_collectives} member allows for teams to be created + without support for collective communications, which allows implementations + to reduce team creation overheads for those teams. + When its value is zero, it specifies that the team should have collectives + enabled. + When nonzero, the team will not support collective operations, which + allows implementations to reduce team creation overheads. + + The \VAR{return\_local\_limit} member controls whether, after a failed + team creation, the team configuration argument is updated with the + locally restrictive parameter(s) or the most restrictive parameter(s) + across the \acp{PE} of the new team. + When its value is zero, the most restrictive parameters are returned; + otherwise, the locally restrictive parameters are returned. + + The \VAR{num\_threads} member specifies the number of threads that will + create contexts from the new team. + It must have a nonnegative value. + See Section~\ref{sec:ctx} for more on communication contexts and + Section~\ref{subsec:shmem_team_create_ctx} for team-based context creation. + } + + \apinotes{ + None. + } + +\end{apidefinition} diff --git a/content/shmem_team_get_options.tex b/content/shmem_team_get_config.tex similarity index 50% rename from content/shmem_team_get_options.tex rename to content/shmem_team_get_config.tex index 2aac48f2f..1610e6832 100644 --- a/content/shmem_team_get_options.tex +++ b/content/shmem_team_get_config.tex @@ -1,26 +1,23 @@ \apisummary{ - Return the options flags describing the options applied to a given team + Return the configuration parameters of a given team } \begin{apidefinition} \begin{Csynopsis} -long @\FuncDecl{shmem\_team\_get\_options}@(shmem_team_t team); +void @\FuncDecl{shmem\_team\_get\_config}@(shmem_team_t team, shmem_team_config_t *team); \end{Csynopsis} \begin{apiarguments} -\apiargument{IN}{team}{A valid \openshmem team handle.} + \apiargument{IN}{team}{A valid \openshmem team handle.} + \apiargument{OUT}{config}{ + A pointer to the configuration parameters for the new team.} \end{apiarguments} \apidescription{ -\FUNC{shmem\_team\_get\_options} returns a \CTYPE{long} value containing -all of the options which describe the given team. Options are requested when -new teams are created in the various \FUNC{shmem\_team\_split\_*} functions. -All of the requested options applied to the team by the library -implementation will be returned by \FUNC{shmem\_team\_get\_options}. - -A library implementation will not apply any non-default options to a team, -other than those requested during team split functions. +\FUNC{shmem\_team\_get\_config} returns through the \VAR{config} argument +the configuration parameters of the given team, which were specified when the +team was created. \begin{FeedbackRequest} A library implementation must apply all requested options to a team, even in @@ -31,17 +28,14 @@ when it is requested, so that the \openshmem program will be portable across implementations. \end{FeedbackRequest} -All \acp{PE} in the team will get back the same value for the team options. +All \acp{PE} in the team will get back the same parameter values for the team options. -Error checking will be done to ensure a valid team handle is provided. -Errors will result in a return value less than \CONST{0}. +If the \VAR{team} argument does not specify a valid team, the behavior is +undefined. } \apireturnvalues{ -The set of options applied to the given team. Multiple options are combined -with a bitwise OR and can be extracted with a bitwise AND. A return value of -\CONST{0} implies that the team uses only default options. A return value less than -\CONST{0} implies that the team handle is invalid. + None. } \apinotes{ diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 1286555c4..2cca40aa7 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -6,8 +6,8 @@ \begin{apidefinition} \begin{Csynopsis} -void @\FuncDecl{shmem\_team\_split\_strided}@(shmem_team_t parent_team, long options, -int PE_start, int PE_stride, int PE_size, shmem_team_t *new_team); +void @\FuncDecl{shmem\_team\_split\_strided}@(shmem_team_t parent_team, int PE_start, int PE_stride, + int PE_size, shmem_team_config_t *config, long config_mask, shmem_team_t *new_team); \end{Csynopsis} \begin{apiarguments} @@ -15,9 +15,9 @@ \LibHandleRef{SHMEM\_TEAM\_WORLD} or \LibHandleRef{SHMEM\_TEAM\_NODE} may be used, or any team created by the user.} -\apiargument{IN}{options}{The set of options requested for the new team. -Multiple options may be requested by combining them with a bitwise OR operation; -otherwise, \CONST{0} can be given if no options are requested.} +%% \apiargument{IN}{options}{The set of options requested for the new team. +%% Multiple options may be requested by combining them with a bitwise OR operation; +%% otherwise, \CONST{0} can be given if no options are requested.} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the subset of \acp{PE} from the parent team that will form the new team} @@ -29,6 +29,13 @@ \apiargument{IN}{PE\_size}{The number of \acp{PE} from the parent team in the subset of \acp{PE} that will form the new team.} +\apiargument{INOUT}{config}{ + A pointer to the configuration parameters for the new team.} + +\apiargument{IN}{config\_mask}{ + The a bitwise mask representing the set of configuration parameters to use + from \VAR{config}.} + \apiargument{OUT}{new\_team}{A new \openshmem team handle, representing a \ac{PE} subset of all the \acp{PE} in the parent team that is created from the \ac{PE} triplet provided.} @@ -41,13 +48,6 @@ where the subset is defined by the \ac{PE} triplet (\VAR{PE\_start}, \VAR{PE\_stride}, and \VAR{PE\_size}) supplied to the function. -It is important to note the use of the less restrictive -\VAR{PE\_stride} argument instead of \VAR{logPE\_stride}. This method of -creating a team with an arbitrary set of \acp{PE} is inherently restricted by -its parameters, but allows for many additional use-cases over using a -\VAR{logPE\_stride} parameter, and may provide an easier transition for -existing \openshmem programs to create and use \openshmem teams. - This function must be called by all processes contained in the \ac{PE} triplet specification. It may be called by additional \acp{PE} not included in the triplet specification, but for those processes a \VAR{new\_team} value of @@ -57,36 +57,55 @@ existing global \ac{PE} number. None of the parameters need to reside in symmetric memory. +The \VAR{config} argument specifies team configuration parameters, which are +described in Section~\ref{subsec:shmem_team_config_t}. + +The \VAR{config\_mask} argument is a bitwise mask representing the set of +configuration parameters to use from \VAR{config}. +A \VAR{config\_mask} value of \CONST{0} indicates that all the field members +of \VAR{config} should be used. +Individual field masks can be combined through a bitwise OR operation +of the following library constants: + +{ + \apitablerow{\LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE}}{ + The team should be created using the value of the + \VAR{disable\_collectives} member of the configuration parameter + \VAR{config}. + } + \apitablerow{\LibConstRef{SHMEM\_TEAM\_LOCAL\_LIMIT}}{ + The team should be created using the value of the + \VAR{return\_local\_limit} member of the configuration parameter + \VAR{config}. + } + \apitablerow{\LibConstRef{SHMEM\_TEAM\_NUM\_THREADS}}{ + The team should be created using the value of the + \VAR{num\_threads} member of the configuration parameter \VAR{config}. + } +} + Error checking will be done to ensure a valid \ac{PE} triplet is provided, and also to determine whether a valid team handle is provided for the parent team. -\begin{FeedbackRequest} -All errors are considered fatal and will result in the job aborting with -an informative error message. -\end{FeedbackRequest} - -The following options can be supplied during team split to restrict -team functions and enable performance optimizations. When using a given -team, the application must comply with the requirements of all options -set on that team; otherwise, the behavior is undefined. -No options are enabled on \LibHandleRef{SHMEM\_TEAM\_WORLD} or \LibHandleRef{SHMEM\_TEAM\_NODE}. - - \apitablerow{\LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE}}{ - The new team will not be created with the necessary support - structures to enable team based collectives. - This will typically allow implementations to speed up team creation - and reduce \openshmem library footprint for teams with this option. - This option will not prevent the new team from using atomics or - other non-collective team based operations.} +If \VAR{new\_team} cannot be created, it will be assigned the value +\LibConstRef{SHMEM\_TEAM\_NULL}. } \apireturnvalues{ -None. + None. } \apinotes{ -See the description of team handles and predefined teams at the top of section \ref{subsec:team} for more information about semantics and usage. + It is important to note the use of the less restrictive + \VAR{PE\_stride} argument instead of \VAR{logPE\_stride}. This method of + creating a team with an arbitrary set of \acp{PE} is inherently restricted + by its parameters, but allows for many additional use-cases over using a + \VAR{logPE\_stride} parameter, and may provide an easier transition for + existing \openshmem programs to create and use \openshmem teams. + + See the description of team handles and predefined teams at the top of + Section~\ref{subsec:team} for more information about semantics and usage. } \begin{apiexamples} diff --git a/main_spec.tex b/main_spec.tex index 82c6dac1c..dd3b142bf 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -130,8 +130,8 @@ \subsection{Team Management Routines}\label{subsec:team} An \openshmem team is a set of \acp{PE} defined by calling a specific team split routine with a parent team argument and other arguments to further -specify how the parent team is to be split into one more new teams. -A team created by a team split routine can be used as the parent team +specify how the parent team is to be split into one or more new teams. +A team created by a \FUNC{shmem\_team\_split\_*} routine can be used as the parent team for a subsequent call to a team split routine. A team persists and can be used for multiple collective routine calls until it is destroyed by \FUNC{shmem\_team\_destroy}. @@ -160,14 +160,20 @@ \subsection{Team Management Routines}\label{subsec:team} against to check for successful split operations and can be assigned to user declared team handles as a sentinel value. -Teams may be created with options that change team behavior and may allow for -more optimized performance. These options are described in -Section~\ref{subsec:library_constants} and in the various descriptions of the -team split routines below. In particular, teams may be created with an option -to disable support for collective communications, which allows implementations -to reduce team creation overheads for those teams. In that case, the team is -just a local reindexing of some set of \acp{PE} that can be used for -point to point communications or as parent teams in subsequent split operations. +Teams that are created by a \FUNC{shmem\_team\_split\_*} routine may be +provided a configuration argument that specifies options that may affect +a team's capabilities and may allow for optimized performance. +This configuration argument is of type \CTYPE{shmem\_team\_config\_t}, which +is detailed further in Section~\ref{subsec:shmem_team_config_t}. + +%% Teams may be created with options that change team behavior and may allow for +%% more optimized performance. These options are described in +%% Section~\ref{subsec:library_constants} and in the various descriptions of the +%% team split routines below. In particular, teams may be created with an option +%% to disable support for collective communications, which allows implementations +%% to reduce team creation overheads for those teams. In that case, the team is +%% just a local reindexing of some set of \acp{PE} that can be used for +%% point to point communications or as parent teams in subsequent split operations. \subsubsection{\textbf{SHMEM\_TEAM\_MY\_PE}}\label{subsec:shmem_team_my_pe} \input{content/shmem_team_my_pe.tex} @@ -175,8 +181,12 @@ \subsubsection{\textbf{SHMEM\_TEAM\_MY\_PE}}\label{subsec:shmem_team_my_pe} \subsubsection{\textbf{SHMEM\_TEAM\_N\_PES}}\label{subsec:shmem_team_n_pes} \input{content/shmem_team_n_pes.tex} -\subsubsection{\textbf{SHMEM\_TEAM\_GET\_OPTIONS}}\label{subsec:shmem_team_get_options} -\input{content/shmem_team_get_options.tex} +\subsubsection{\textbf{SHMEM\_TEAM\_CONFIG\_T}} +\label{subsec:shmem_team_config_t} +\input{content/shmem_team_config_t.tex} + +\subsubsection{\textbf{SHMEM\_TEAM\_GET\_CONFIG}}\label{subsec:shmem_team_get_config} +\input{content/shmem_team_get_config.tex} \subsubsection{\textbf{SHMEM\_TEAM\_TRANSLATE}}\label{subsec:shmem_team_translate} \input{content/shmem_team_translate.tex} diff --git a/utils/defs.tex b/utils/defs.tex index c29f39c3f..ec99d05a3 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -417,7 +417,7 @@ \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t, - shmem_team_t}, + shmem_team_t, shmem_team_config_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisCol} @@ -425,7 +425,7 @@ \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, - shmem_team_t}, + shmem_team_t, shmem_team_config_t}, aboveskip=0pt, belowskip=0pt}}{} @@ -435,7 +435,7 @@ \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, - shmem_team_t}, + shmem_team_t, shmem_team_config_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisST} From b170fe45d20f76ca70370efec1869d117b74eeee Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Fri, 17 Aug 2018 15:35:30 -0400 Subject: [PATCH 047/319] Update team options -> config --- content/shmem_team_split_2d.tex | 118 ++++++++++++++++----------- content/shmem_team_split_strided.tex | 6 +- 2 files changed, 77 insertions(+), 47 deletions(-) diff --git a/content/shmem_team_split_2d.tex b/content/shmem_team_split_2d.tex index 8a8e9349f..f675c6f70 100644 --- a/content/shmem_team_split_2d.tex +++ b/content/shmem_team_split_2d.tex @@ -1,82 +1,108 @@ \apisummary{ Create two new teams by splitting an existing parent team into two subsets based on a -2D Cartesian space defined by xrange argument and the yrange derived from xrange -and the parent team size. These ranges describe the Cartesian space in X and Y -dimensions.} +2D Cartesian space defined by the \VAR{xrange} argument and a \VAR{y} dimension derived from \VAR{xrange} +and the parent team size. These ranges describe the Cartesian space in \emph{x}- +and \emph{y}-dimensions.} \begin{apidefinition} \begin{Csynopsis} void @\FuncDecl{shmem\_team\_split\_2d}@(shmem_team_t parent_team, int xrange, -long xaxis_options, shmem_team_t *xaxis_team, long yaxis_options, shmem_team_t *yaxis_team); + shmem_team_config_t *xaxis_config, long xaxis_mask, shmem_team_t *xaxis_team, + shmem_team_config_t *yaxis_config, long yaxis_mask, shmem_team_t *yaxis_team); \end{Csynopsis} \begin{apiarguments} \apiargument{IN}{parent\_team}{A valid \openshmem team. Any predefined teams, such as \LibHandleRef{SHMEM\_TEAM\_WORLD}, may be used, or any team created by the user.} -\apiargument{IN}{xrange}{A non-negative integer representing the number of +\apiargument{IN}{xrange}{A nonnegative integer representing the number of elements in the first dimension.} -\apiargument{IN}{xaxis\_options}{The set of options requested for the new X-axis team. -Multiple options may be requested by combining them with a bitwise OR operation; -otherwise, \CONST{0} can be given if no options are requested.} +\apiargument{INOUT}{xaxis\_config}{ + A pointer to the configuration parameters for the new \VAR{x}-axis team.} + +\apiargument{IN}{xaxis\_mask}{ + The bitwise mask representing the set of configuration parameters to use + from \VAR{xaxis\_config}.} \apiargument{OUT}{xaxis\_team}{A new \ac{PE} team handle representing a \ac{PE} -subset consisting of all the \acp{PE} that have the same coordinate along the X-axis +subset consisting of all the \acp{PE} that have the same coordinate along the \VAR{x}-axis as the calling \ac{PE}.} -\apiargument{IN}{yaxis\_options}{The set of options requested for the new Y-axis team. -These options do not have to be the same as the options for the new X-axis team.} +\apiargument{INOUT}{yaxis\_config}{ + A pointer to the configuration parameters for the new \VAR{y}-axis team.} + +\apiargument{IN}{yaxis\_mask}{ + The bitwise mask representing the set of configuration parameters to use + from \VAR{yaxis\_config}.} \apiargument{OUT}{yaxis\_team}{A new \ac{PE} team handle representing a \ac{PE} -subset consisting of all the \acp{PE} that have the same coordinate along the Y-axis +subset consisting of all the \acp{PE} that have the same coordinate along the \VAR{y}-axis as the calling \ac{PE}.} \end{apiarguments} \apidescription{ The \FUNC{shmem\_team\_split\_2d} routine is a collective routine. It creates two new teams by splitting an existing parent team into up to two subsets based on a -2D Cartesian space. The user provides the size of the X dimension, which is then -used to derive the size of the Y dimension based on the size of the parent team. -The size of the Y dimension will be equal to $ceiling ( N \div xrange )$, where +2D Cartesian space. The user provides the size of the \VAR{x} dimension, which is then +used to derive the size of the \VAR{y} dimension based on the size of the parent team. +The size of the \VAR{y} dimension will be equal to $ceiling ( N \div xrange )$, where \VAR{N} is the size of the parent team. In other words, $xrange \times yrange \geq N$, so that every \ac{PE} in the parent team has a unique \VAR{(x,y)} location the 2D Cartesian space. After the split operation, each of the new teams will contain all \acp{PE} that -have the same coordinate along the X-axis and Y-axis, respectively, as the calling +have the same coordinate along the \VAR{x}-axis and \VAR{y}-axis, respectively, as the calling \ac{PE}. The \acp{PE} are numbered in the new teams based on the position of the \ac{PE} along the given axis. Any valid \openshmem team can be used as the parent team. This routine must be called by all \acp{PE} in the parent team. The value of \VAR{xrange} must be -non-negative and all \acp{PE} in the parent team must pass the same value for +nonnegative and all \acp{PE} in the parent team must pass the same value for \VAR{xrange}. None of the parameters need to reside in symmetric memory. -Error checking will be done to detect if the value \LibConstRef{SHMEM\_TEAM\_NULL} -is passed as the parent team handle. In that case, the program will abort with -an informative error message. If an invalid team handle is passed to the -routine, the behavior is undefined. - -The options in the table below can be supplied during team split to restrict -team functions and enable performance optimizations. All \acp{PE} that will -be in the same resultant new team must specify the same options. The \acp{PE} -in the parent team \emph{do not} have to all provide the same options for new -teams. - -When using a given team, the application must comply with the requirements -of all options set on that team; otherwise, the behavior is undefined. -No options are enabled on \LibHandleRef{SHMEM\_TEAM\_WORLD} or on other -predefined \openshmem teams. - - \apitablerow{\LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE}}{ - The new team will not be created with the necessary support - structures to enable team based collectives. - This will typically allow implementations to speed up team creation - and reduce \openshmem library footprint for teams with this option. - This option will not prevent the new team from using atomics or - other non-collective team based operations.} +The \VAR{xaxis\_config} and \VAR{yaxis\_config} arguments specify team +configuration parameters for the \VAR{x}- and \VAR{y}-axis teams, respectively. +These parameters are described in Section~\ref{subsec:shmem_team_config_t}. +All \acp{PE} that will be in the same resultant team must specify the same +configuration parameters. +The \acp{PE} in the parent team \emph{do not} have to all provide the same +parameters for new teams. + +The \VAR{xaxis\_mask} and\VAR{xaxis\_mask} arguments are a bitwise masks +representing the set of configuration parameters to use from +\VAR{xaxis\_config} and \VAR{yaxis\_config}, respectively. +A mask value of \CONST{0} indicates that all the field members of the +configuration parameter argument should be used. +Individual field masks can be combined through a bitwise OR operation +of the following library constants: + +{ + \apitablerow{\LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE}}{ + The team should be created using the value of the + \VAR{disable\_collectives} member of the respective + configuration parameter. + } + \apitablerow{\LibConstRef{SHMEM\_TEAM\_LOCAL\_LIMIT}}{ + The team should be created using the value of the + \VAR{return\_local\_limit} member of the respective + configuration parameter. + } + \apitablerow{\LibConstRef{SHMEM\_TEAM\_NUM\_THREADS}}{ + The team should be created using the value of the + \VAR{num\_threads} member of the respective + configuration parameter. + } +} + +If \VAR{parent\_team} is equal to \LibConstRef{SHMEM\_TEAM\_NULL}, both +\VAR{xaxis\_team} and \VAR{yaxis\_team} will be assigned the value +\LibConstRef{SHMEM\_TEAM\_NULL}. +Otherwise, if \VAR{parent\_team} is an invalid team handle, +the behavior is undefined. +If either team cannot be created, that team will be assigned the value +\LibConstRef{SHMEM\_TEAM\_NULL}. } \apireturnvalues{ @@ -86,13 +112,13 @@ \apinotes{ Since the split may result in a 2D space with more points than there are members of the parent team, there may be a final, incomplete row of the 2D mapping of the parent -team. This means that the resultant X-axis teams may vary in size by up to 1 \ac{PE}, -and that there may be one resultant Y-axis team of smaller size than all of the other -Y-axis teams. +team. This means that the resultant \VAR{x}-axis teams may vary in size by up to 1 \ac{PE}, +and that there may be one resultant \VAR{y}-axis team of smaller size than all of the other +\VAR{y}-axis teams. The following grid shows the 12 teams that would result from splitting a parent team of size 10 with \VAR{xrange} of 3. The numbers in the grid cells are the \ac{PE} numbers -in the parent team. The rows are the Y-axis teams. The columns are the X-axis teams. +in the parent team. The rows are the \VAR{y}-axis teams. The columns are the \VAR{x}-axis teams. \begin{center} \begin{tabular}{|l|l|l|l|} @@ -102,12 +128,12 @@ y=1 & 3 & 4 & 5 \\ \hline y=2 & 6 & 7 & 8 \\ \hline y=3 & 9 \\ - \cline{0-1} + \cline{0-1} \end{tabular} \end{center} It would be legal, for example, if \acp{PE} 0, 3, 6, 9 specified a different value -for \VAR{xaxis\_options} than all of the other \acp{PE}, as long as options match +for \VAR{xaxis\_config} than all of the other \acp{PE}, as long as the configuration parameters match for all \acp{PE} in each of the new teams. See the description of team handles and predefined teams at the top of section diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 2cca40aa7..e2e602755 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -33,7 +33,7 @@ A pointer to the configuration parameters for the new team.} \apiargument{IN}{config\_mask}{ - The a bitwise mask representing the set of configuration parameters to use + The bitwise mask representing the set of configuration parameters to use from \VAR{config}.} \apiargument{OUT}{new\_team}{A new \openshmem team handle, representing a \ac{PE} @@ -88,6 +88,10 @@ and also to determine whether a valid team handle is provided for the parent team. +If \VAR{parent\_team} is equal to \LibConstRef{SHMEM\_TEAM\_NULL}, then +\VAR{new\_team} will be assigned the value \LibConstRef{SHMEM\_TEAM\_NULL}. +Otherwise, if \VAR{parent\_team} is an invalid team handle, +the behavior is undefined. If \VAR{new\_team} cannot be created, it will be assigned the value \LibConstRef{SHMEM\_TEAM\_NULL}. } From f630eaf183e374dde100c5069577310f4779b549 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Fri, 17 Aug 2018 15:37:14 -0400 Subject: [PATCH 048/319] Update team-based context creation/destruction - Team-based context creation is no longer collective - Team-based contexts can be destroyed by shmem_ctx_destroy - Contexts are not used for collectives --- content/shmem_ctx_destroy.tex | 10 ++++---- content/shmem_team_create_ctx.tex | 23 +++++++------------ content/shmem_team_destroy_ctx.tex | 37 ------------------------------ main_spec.tex | 20 +++++----------- 4 files changed, 19 insertions(+), 71 deletions(-) delete mode 100644 content/shmem_team_destroy_ctx.tex diff --git a/content/shmem_ctx_destroy.tex b/content/shmem_ctx_destroy.tex index ceb01194a..4eb595050 100644 --- a/content/shmem_ctx_destroy.tex +++ b/content/shmem_ctx_destroy.tex @@ -14,14 +14,14 @@ \apidescription{ \FUNC{shmem\_ctx\_destroy} destroys a context that was created by a call to - \FUNC{shmem\_ctx\_create}. It is the user's responsibility to ensure that + \FUNC{shmem\_ctx\_create} or \FUNC{shmem\_team\_create\_ctx}. + It is the user's responsibility to ensure that the context is not used after it has been destroyed, for example when the destroyed context is used by multiple threads. This function performs an implicit quiet operation on the given context before it is freed. - \newtext{% - It is invalid to pass \CONST{SHMEM\_CTX\_DEFAULT} or a context handle - returned by a call to \FUNC{shmem\_team\_create\_ctx} to this routine. + \newtext{ + If \VAR{ctx} is a handle to the default context, the behavior is undefined. } } @@ -30,7 +30,7 @@ } \apinotes{ - \oldtext{% + \oldtext{ It is invalid to pass \CONST{SHMEM\_CTX\_DEFAULT} to this routine. } diff --git a/content/shmem_team_create_ctx.tex b/content/shmem_team_create_ctx.tex index 478165afe..99f9dec53 100644 --- a/content/shmem_team_create_ctx.tex +++ b/content/shmem_team_create_ctx.tex @@ -1,5 +1,5 @@ \apisummary{ - Create a communication context collectively. + Create a communication context from a team. } \begin{apidefinition} @@ -20,10 +20,12 @@ \apidescription{ The \FUNC{shmem\_team\_create\_ctx} routine creates a new communication context and returns its handle through the \VAR{ctx} argument. - This context is created collectively by all \acp{PE} in the team - specified by the \VAR{team} argument. - The specified team may not have the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} - option enabled; otherwise, the behavior is undefined. + This context is created from the team specified by the \VAR{team} argument. + + The \FUNC{shmem\_team\_create\_ctx} routine must be called by no more threads + than were specified by the \VAR{num\_threads} member of the + \CTYPE{shmem\_team\_config\_t} configuration parameters that were specified + when the team was created. %% All \openshmem routines that operate on this context will do so with %% respect to the associated \ac{PE} team. @@ -33,12 +35,6 @@ In addition to the team, the \FUNC{shmem\_team\_create\_ctx} routine accepts the same arguments and provides all the same return conditions as the \FUNC{shmem\_ctx\_create} routine. - The call is either collectively successful or collectively fails across - all \acp{PE} in the team. - - As \FUNC{shmem\_team\_create\_ctx} is collective, it includes a call to a - procedure semantically equivalent to \FUNC{shmem\_team\_sync} on both entry - and exit. } \apireturnvalues{ @@ -46,10 +42,7 @@ } \apinotes{ - Depending on the \openshmem implementation, system configuration, and - application communication pattern, some applications may observe higher - performance with collectively created contexts than with locally created - contexts. + None. } \end{apidefinition} diff --git a/content/shmem_team_destroy_ctx.tex b/content/shmem_team_destroy_ctx.tex deleted file mode 100644 index 40f14b060..000000000 --- a/content/shmem_team_destroy_ctx.tex +++ /dev/null @@ -1,37 +0,0 @@ -\apisummary{ - Destroy a collectively created communication context. -} - -\begin{apidefinition} - -\begin{Csynopsis} -void @\FuncDecl{shmem\_team\_destroy\_ctx}@(shmem_ctx_t ctx); -\end{Csynopsis} - -\begin{apiarguments} - \apiargument{IN}{ctx}{Handle to the context that will be destroyed.} -\end{apiarguments} - -\apidescription{ - \FUNC{shmem\_team\_destroy\_ctx} collectively destroys a context that was - created by a call to \FUNC{shmem\_team\_create\_ctx}. - It is the user's responsibility to ensure that the context is not used - after it has been destroyed. - - As \FUNC{shmem\_team\_create\_ctx} is collective, it includes calls to - procedures semantically equivalent to \FUNC{shmem\_team\_barrier} on entry - and \FUNC{shmem\_team\_sync} on exit. - - It is invalid to pass \CONST{SHMEM\_CTX\_DEFAULT} or a context handle - returned by a call to \FUNC{shmem\_ctx\_create} to this routine. -} - -\apireturnvalues{ - None. -} - -\apinotes{ - None. -} - -\end{apidefinition} diff --git a/main_spec.tex b/main_spec.tex index cb6f2b77e..588eae947 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -236,14 +236,18 @@ \subsection{Communication Management Routines} explicitly or implicitly, are performed. All point-to-point routines that operate on this context will do so with respect to the team-relative \ac{PE} numbering of the associated team. -All collective routines that operate on this context will do so across -the associated team. } \subsubsection{\textbf{SHMEM\_CTX\_CREATE}} \label{subsec:shmem_ctx_create} \input{content/shmem_ctx_create.tex} +\newtext{ +\subsubsection{\textbf{SHMEM\_TEAM\_CREATE\_CTX}} +\label{subsec:shmem_team_create_ctx} +\input{content/shmem_team_create_ctx.tex} +} + \subsubsection{\textbf{SHMEM\_CTX\_DESTROY}} \label{subsec:shmem_ctx_destroy} \input{content/shmem_ctx_destroy.tex} @@ -254,18 +258,6 @@ \subsubsection{\textbf{SHMEM\_CTX\_GET\_TEAM}} \input{content/shmem_ctx_get_team.tex} } -\newtext{ -\subsubsection{\textbf{SHMEM\_TEAM\_CREATE\_CTX}} -\label{subsec:shmem_team_create_ctx} -\input{content/shmem_team_create_ctx.tex} -} - -\newtext{ -\subsubsection{\textbf{SHMEM\_TEAM\_DESTROY\_CTX}} -\label{subsec:shmem_team_destroy_ctx} -\input{content/shmem_team_destroy_ctx.tex} -} - \subsection{Remote Memory Access Routines}\label{sec:rma} \input{content/rma_intro.tex} From 1f303e3b9159c1e83773d6e6d65fde4e9896eade Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Fri, 17 Aug 2018 16:06:00 -0400 Subject: [PATCH 049/319] Clarify barrier/sync on teams/contexts - Removes context-based barriers - Via revert "Initial draft of changes for barrier/sync on contexts/teams" - This reverts part of commit d417b246a6b043ae54eff4759056527cd3b2b6c3. - Keeps team-based sync - Adds note to shmem_barrier --- content/shmem_barrier.tex | 73 +++++++++------------------------------ content/shmem_sync.tex | 26 +++++--------- 2 files changed, 25 insertions(+), 74 deletions(-) diff --git a/content/shmem_barrier.tex b/content/shmem_barrier.tex index 9c0dd8100..ad02a8cbb 100644 --- a/content/shmem_barrier.tex +++ b/content/shmem_barrier.tex @@ -1,27 +1,13 @@ \apisummary{ Performs all operations described in the \FUNC{shmem\_barrier\_all} interface - but with respect to \newtext{a specified \ac{PE} team or} - a subset of \acp{PE} defined by \oldtext{the} \newtext{an} active set \newtext{triple}. + but with respect to a subset of \acp{PE} defined by the active set. } \begin{apidefinition} -{\color{ForestGreen} -\begin{C11synopsis} -void @\FuncDecl{shmem\_barrier}@(shmem_ctx_t ctx); -\end{C11synopsis} -} - -% These separate synopses should be merged when the color highlighting removed, -% preferably with shmem_ctx_barrier listed first. \begin{Csynopsis} void @\FuncDecl{shmem\_barrier}@(int PE_start, int logPE_stride, int PE_size, long *pSync); \end{Csynopsis} -{\color{ForestGreen} -\begin{CsynopsisCol} -void @\FuncDecl{shmem\_ctx\_barrier}@(shmem_ctx_t ctx); -\end{CsynopsisCol} -} \begin{Fsynopsis} INTEGER PE_start, logPE_stride, PE_size @@ -31,11 +17,6 @@ \begin{apiarguments} -\newtext{ -\apiargument{IN}{ctx}{The context on which to perform the operation. - When this argument is not provided, the operation is performed on the - default context.} -} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}. \VAR{PE\_start} must be of type integer. When using \Fortran, it must be a default integer value.} @@ -56,50 +37,22 @@ \end{apiarguments} \apidescription{ - \FUNC{shmem\_barrier} is a collective synchronization routine over - \newtext {the \ac{PE} team associated with a communication context or} an active set. - Control returns from \FUNC{shmem\_barrier} after all \acp{PE} in - the \newtext{specified \ac{PE} team or} active set \oldtext{(specified by \VAR{PE\_start}, \VAR{logPE\_stride}, and - \VAR{PE\_size})} have called \FUNC{shmem\_barrier}. - \newtext{ - An active set is specified by the triple of values: \VAR{PE\_start}, - \VAR{logPE\_stride}, and \VAR{PE\_size}. - } - - {\color{ForestGreen} - The \FUNC{shmem\_barrier} and \FUNC{shmem\_ctx\_barrier} routines that - accept a context handle \VAR{ctx} have the effect of a call to - \FUNC{shmem\_ctx\_quiet} on the specified context, followed by a call to - \FUNC{shmem\_team\_sync} on the team associated with the specified context. - - The \FUNC{shmem\_barrier} routine that accepts an active set triple - has the effect of a call to \FUNC{shmem\_quiet}, which implicitly - operates on the default context, followed by a call to \FUNC{shmem\_sync} - with the same active set and \VAR{pSync} arguments. - } - + \FUNC{shmem\_barrier} is a collective synchronization routine over an + active set. Control returns from \FUNC{shmem\_barrier} after all \acp{PE} in + the active set (specified by \VAR{PE\_start}, \VAR{logPE\_stride}, and + \VAR{PE\_size}) have called \FUNC{shmem\_barrier}. + As with all \openshmem collective routines, each of these routines assumes that only \acp{PE} in the active set call the routine. If a \ac{PE} not in the active set calls an \openshmem collective routine, the behavior is undefined. - - \begin{FeedbackRequest} - Do we need to say something like the above for team-based collectives? - We already say that teams are not portable across PEs, so only the PE - that created the team could invoke it in a collective without undefined - behavior. - - Should we have a harder partition between the description text between - the team- and active set-based API (since the active set and pSync - requirements don't affect the team call)? - \end{FeedbackRequest} The values of arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be the same value on all \acp{PE} in the active set. The same work array must be passed in \VAR{pSync} to all \acp{PE} in the active set. - \newtext{The} \FUNC{shmem\_barrier} \newtext{routines} ensure\oldtext{s} that all previously issued stores and remote - memory updates, including \acp{AMO} and \ac{RMA} operations, \newtext{issued} \oldtext{done} by any of the - \acp{PE} in the \newtext{\ac{PE} team or} active set on the default context are complete before returning. + \FUNC{shmem\_barrier} ensures that all previously issued stores and remote + memory updates, including \acp{AMO} and \ac{RMA} operations, done by any of the + \acp{PE} in the active set on the default context are complete before returning. The same \VAR{pSync} array may be reused on consecutive calls to \FUNC{shmem\_barrier} if the same active set is used. @@ -126,6 +79,14 @@ Calls to \FUNC{shmem\_ctx\_quiet} can be performed prior to calling the barrier routine to ensure completion of operations issued on additional contexts. + + \newtext{ + No team-based barrier is provided by \openshmem, as a team may have any + number of communication contexts associated with the team. + Applications seeking such an idiom should call \FUNC{shmem\_ctx\_quiet} + on the desired context, followed by a call to \FUNC{shmem\_team\_sync} + on the desired team. + } } \begin{apiexamples} diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 93ab30e0e..1fa319d58 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -1,6 +1,7 @@ \apisummary{ Performs all operations described in the \FUNC{shmem\_sync\_all} interface - but with respect to a subset of \acp{PE} defined by the active set. + but with respect to a subset of \acp{PE} defined by \oldtext{the} + \newtext{a team or} active set. } \begin{apidefinition} @@ -24,8 +25,8 @@ \begin{apiarguments} -\newtext{ -\apiargument{IN}{team}{The \ac{PE} team over which to perform the operation.} +\newtext{% +\apiargument{IN}{team}{The team over which to perform the operation.}% } \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}. \VAR{PE\_start} must be of type integer.} @@ -43,35 +44,24 @@ \apidescription{ \FUNC{shmem\_sync} is a collective synchronization routine over - \newtext {a \ac{PE} team or} an active set. + \newtext {a team or} an active set. Control returns from \FUNC{shmem\_sync} after all \acp{PE} in - the \newtext{specified \ac{PE} team or} active set \oldtext{(specified by \VAR{PE\_start}, \VAR{logPE\_stride}, and + the \newtext{specified team or} active set \oldtext{(specified by \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size})} have called \FUNC{shmem\_sync}. \newtext{ An active set is specified by the triple of values: \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size}. } - As with all \openshmem collective routines, each of these routines assumes + As with all \oldtext{\openshmem} \newtext{active set-based} collective routines, each of these routines assumes that only \acp{PE} in the active set call the routine. If a \ac{PE} not in the active set calls an \openshmem collective routine, the behavior is undefined. - \begin{FeedbackRequest} - Do we need to say something like the above for team-based collectives? - We already say that teams are not portable across PEs, so only the PE - that created the team could invoke it in a collective without undefined - behavior. - - Should we have a harder partition between the description text between - the team- and active set-based API (since the active set and pSync - requirements don't affect the team call)? - \end{FeedbackRequest} - The values of arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be equal on all \acp{PE} in the active set. The same work array must be passed in \VAR{pSync} to all \acp{PE} in the active set. - In contrast with the \FUNC{shmem\_barrier} routine\newtext{s}, \FUNC{shmem\_sync} only + In contrast with the \FUNC{shmem\_barrier} routine, \FUNC{shmem\_sync} only ensures completion and visibility of previously issued memory stores and does not ensure completion of remote memory updates issued via \openshmem routines. From ca524febcd08c269b0038e3b0e507e5f8dfadaae Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Fri, 17 Aug 2018 16:45:50 -0400 Subject: [PATCH 050/319] Add needed SHMEM_TEAM_* constants --- content/library_constants.tex | 28 +++++++++++++++++++++++++--- main_spec.tex | 4 ++-- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/content/library_constants.tex b/content/library_constants.tex index f5fd29cba..fef9d3042 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -42,9 +42,31 @@ \color{Green} \LibConstDecl{SHMEM\_TEAM\_NOCOLLECTIVE} & \color{Green} -The team creation option which specifies that the new team will not -be initialized with support for team collective operations. -See Section~\ref{subsec:team} for more detail about its use. +The bitwise flag which specifies that a team creation routine should use the +\VAR{disable\_collectives} member of the provided +\CTYPE{shmem\_team\_config\_t} configuration parameter. +See Sections~\ref{subsec:shmem_team_config_t} and +\ref{subsec:shmem_team_split_strided} for more detail about its use. +\tabularnewline \hline +%% +\color{Green} +\LibConstDecl{SHMEM\_TEAM\_LOCAL\_LIMIT} & +\color{Green} +The bitwise flag which specifies that a team creation routine should use the +\VAR{return\_local\_limit} member of the provided +\CTYPE{shmem\_team\_config\_t} configuration parameter. +See Sections~\ref{subsec:shmem_team_config_t} and +\ref{subsec:shmem_team_split_strided} for more detail about its use. +\tabularnewline \hline +%% +\color{Green} +\LibConstDecl{SHMEM\_TEAM\_NUM\_THREADS} & +\color{Green} +The bitwise flag which specifies that a team creation routine should use the +\VAR{num\_threads} member of the provided +\CTYPE{shmem\_team\_config\_t} configuration parameter. +See Sections~\ref{subsec:shmem_team_config_t} and +\ref{subsec:shmem_team_split_strided} for more detail about its use. \tabularnewline \hline %% \color{Green} diff --git a/main_spec.tex b/main_spec.tex index 588eae947..bdc1d5ab0 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -228,8 +228,8 @@ \subsection{Communication Management Routines} This association is established at context creation. Communication contexts created by \FUNC{shmem\_ctx\_create} are associated with the default team, while contexts created by -\FUNC{shmem\_team\_create\_ctx} are associated with a team specified at -context creation. +\FUNC{shmem\_team\_create\_ctx} are associated with and created from a team +specified at context creation. The default context is associated with the default team. A context's associated team specifies the set of \acp{PE} over which \ac{PE}-specific routines that operate on a communication context, From 6384bc20912f6b0211e9f395d078d791e7fcb1df Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Wed, 12 Sep 2018 18:54:27 -0500 Subject: [PATCH 051/319] Add initial draft support for nbi fetching AMOs --- content/shmem_atomic_compare_swap_nbi.tex | 64 +++++++++++++++++++++++ content/shmem_atomic_fetch_add_nbi.tex | 62 ++++++++++++++++++++++ content/shmem_atomic_fetch_and_nbi.tex | 58 ++++++++++++++++++++ content/shmem_atomic_fetch_inc_nbi.tex | 57 ++++++++++++++++++++ content/shmem_atomic_fetch_nbi.tex | 53 +++++++++++++++++++ content/shmem_atomic_fetch_or_nbi.tex | 58 ++++++++++++++++++++ content/shmem_atomic_fetch_xor_nbi.tex | 58 ++++++++++++++++++++ content/shmem_atomic_swap_nbi.tex | 53 +++++++++++++++++++ main_spec.tex | 31 +++++++++++ 9 files changed, 494 insertions(+) create mode 100644 content/shmem_atomic_compare_swap_nbi.tex create mode 100644 content/shmem_atomic_fetch_add_nbi.tex create mode 100644 content/shmem_atomic_fetch_and_nbi.tex create mode 100644 content/shmem_atomic_fetch_inc_nbi.tex create mode 100644 content/shmem_atomic_fetch_nbi.tex create mode 100644 content/shmem_atomic_fetch_or_nbi.tex create mode 100644 content/shmem_atomic_fetch_xor_nbi.tex create mode 100644 content/shmem_atomic_swap_nbi.tex diff --git a/content/shmem_atomic_compare_swap_nbi.tex b/content/shmem_atomic_compare_swap_nbi.tex new file mode 100644 index 000000000..6e1cc18dc --- /dev/null +++ b/content/shmem_atomic_compare_swap_nbi.tex @@ -0,0 +1,64 @@ +\color{Green} +\apisummary{ + This nonblocking routine performs an atomic conditional swap on a remote + data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_atomic\_compare\_swap\_nbi}@(TYPE *fetch, TYPE *dest, TYPE cond, TYPE value, int pe); +void @\FuncDecl{shmem\_atomic\_compare\_swap\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, TYPE *dest, TYPE cond, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of the standard \ac{AMO} types specified by +Table~\ref{stdamotypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_compare\_swap\_nbi}@(TYPE *fetch, TYPE *dest, TYPE cond, TYPE value, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_compare\_swap\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, TYPE *dest, TYPE cond, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{stdamotypes}. + +\begin{apiarguments} + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{fetch}{Local data object to be updated.} + \apiargument{OUT}{dest}{The remotely accessible integer data object to be + updated on the remote \ac{PE}. } + \apiargument{IN}{cond}{\VAR{cond} is compared to the remote \VAR{dest} + value. If \VAR{cond} and the remote \VAR{dest} are equal, then \VAR{value} + is swapped into the remote \VAR{dest}; otherwise, the remote \VAR{dest} is + unchanged. In either case, the old value of the remote \VAR{dest} is + returned as the routine return value. \VAR{cond} must be of the same data + type as \VAR{dest}.} + \apiargument{IN}{value}{The value to be atomically written to the remote + \ac{PE}. \VAR{value} must be the same data type as \VAR{dest}.} + \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number upon which + \VAR{dest} is to be updated. When using \Fortran, it must be a default + integer value.} +\end{apiarguments} + +\apidescription{ + The nonblocking conditional swap routines conditionally update a \VAR{dest} + data object on the specified \ac{PE} and fetches the prior contents of the + data object into the \VAR{fetch} local data object as a + single atomic operation. This routine returns after posting the operation. + The operation is considered complete after a subsequent call to + \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, prior + contents of the \VAR{dest} data object is atomically fetched into + \VAR{fetch} local data object and the contents of \VAR{value} + is conditionally updated into \VAR{dest} on to the remote \ac{PE}. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + None. +} + +\end{apidefinition} +\color{Black} diff --git a/content/shmem_atomic_fetch_add_nbi.tex b/content/shmem_atomic_fetch_add_nbi.tex new file mode 100644 index 000000000..55c266126 --- /dev/null +++ b/content/shmem_atomic_fetch_add_nbi.tex @@ -0,0 +1,62 @@ +\color{Green} +\apisummary{ + This nonblocking routine performs an atomic fetch-and-add operation on a + remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_atomic\_fetch\_add\_nbi}@(TYPE *fetch, TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_atomic\_fetch\_add\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of the standard \ac{AMO} types specified by +Table~\ref{stdamotypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_fetch\_add\_nbi}@(TYPE *fetch, TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_fetch\_add\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{stdamotypes}. + +\begin{apiarguments} + +\apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} +\apiargument{OUT}{fetch}{Local data object to be updated.} +\apiargument{OUT}{dest}{The remotely accessible integer data object to be updated on + the remote \ac{PE}. The type of \VAR{dest} should match that implied in the + SYNOPSIS section.} +\apiargument{IN}{value}{The value to be atomically added to \VAR{dest}. The + type of \VAR{value} should match that implied in the SYNOPSIS section.} +\apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which + \VAR{dest} is to be updated. When using \Fortran, it must be a default + integer value.} + +\end{apiarguments} + +\apidescription{ + The nonblocking \FUNC{shmem\_atomic\_fetch\_add\_nbi} routines perform an + atomic fetch-and-add operation. An atomic fetch-and-add operation fetches + the old \VAR{dest} and adds \VAR{value} to \VAR{dest} without the + possibility of another atomic operation on the \VAR{dest} between the time + of the fetch and the update. This routine returns after posting the + operation. The operation is considered complete after a subsequent call to + \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, these + routines add \VAR{value} to \VAR{dest} on \VAR{pe} and fetch the previous + contents of \VAR{dest} as an atomic operation into the \VAR{fetch} local + data object. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + None. +} + +\end{apidefinition} +\color{Black} diff --git a/content/shmem_atomic_fetch_and_nbi.tex b/content/shmem_atomic_fetch_and_nbi.tex new file mode 100644 index 000000000..b1893fb63 --- /dev/null +++ b/content/shmem_atomic_fetch_and_nbi.tex @@ -0,0 +1,58 @@ +\color{Green} +\apisummary{ + This nonblocking operation atomically performs a fetching bitwise AND + operation on a remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_atomic\_fetch\_and\_nbi}@(TYPE *fetch, TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_atomic\_fetch\_and\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types specified by +Table~\ref{bitamotypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_fetch\_and\_nbi}@(TYPE *fetch, TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_fetch\_and\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{bitamotypes}. + +\begin{apiarguments} + + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{fetch}{Local data object to be updated.} + \apiargument{OUT}{dest}{A pointer to the remotely accessible data object to + be updated.} + \apiargument{IN}{value}{The operand to the bitwise AND operation.} + \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} + is to be updated.} + +\end{apiarguments} + +\apidescription{ + The nonblocking \FUNC{shmem\_atomic\_fetch\_and\_nbi} routines atomically + performs a fetching bitwise AND on the remotely accessible data object pointed + to by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine + returns after posting the operation. The operation is considered complete + after a subsequent call to \FUNC{shmem\_quiet}. At the completion of + \FUNC{shmem\_quiet}, these routines atomically performs a fetching bitwise + AND on \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the + previous contents of \VAR{dest} as an atomic operation into the \VAR{fetch} + local data object. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + None. +} + +\end{apidefinition} +\color{Black} diff --git a/content/shmem_atomic_fetch_inc_nbi.tex b/content/shmem_atomic_fetch_inc_nbi.tex new file mode 100644 index 000000000..40a1415e3 --- /dev/null +++ b/content/shmem_atomic_fetch_inc_nbi.tex @@ -0,0 +1,57 @@ +\color{Green} +\apisummary{ + This nonblocking routine performs an atomic fetch-and-increment operation + on a remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_atomic\_fetch\_inc\_nbi}@(TYPE *fetch, TYPE *dest, int pe); +void @\FuncDecl{shmem\_atomic\_fetch\_inc\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, TYPE *dest, int pe); +\end{C11synopsis} +where \TYPE{} is one of the standard \ac{AMO} types specified by +Table~\ref{stdamotypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_fetch\_inc\_nbi}@(TYPE *fetch, TYPE *dest, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_fetch\_inc\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, TYPE *dest, int pe); +\end{Csynopsis} +where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{stdamotypes}. + +\begin{apiarguments} + +\apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} +\apiargument{OUT}{fetch}{Local data object to be updated.} +\apiargument{OUT}{dest}{The remotely accessible integer data object to be updated + on the remote \ac{PE}. The type of \dest{} should match that implied in the + SYNOPSIS section.} +\apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which + \dest{} is to be updated. When using \Fortran, it must be a default + integer value.} + +\end{apiarguments} + + +\apidescription{ + These nonblocking routines perform a atomic fetch-and-increment operation. + This routine returns after posting the operation. The operation is considered + complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of + \FUNC{shmem\_quiet}, the \dest{} on \ac{PE} \VAR{pe} is increased by one and + the routine fetches the previous contents of \dest{} as an atomic operation + into the \VAR{fetch} local data object. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + None. +} + +\end{apidefinition} +\color{Black} diff --git a/content/shmem_atomic_fetch_nbi.tex b/content/shmem_atomic_fetch_nbi.tex new file mode 100644 index 000000000..6ef45319b --- /dev/null +++ b/content/shmem_atomic_fetch_nbi.tex @@ -0,0 +1,53 @@ +\color{Green} +\apisummary{ + The nonblocking atomic fetch routine provides an option for atomically + fetching the value of a remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_atomic\_fetch\_nbi}@(TYPE *fetch, const TYPE *source, int pe); +void @\FuncDecl{shmem\_atomic\_fetch\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, const TYPE *source, int pe); +\end{C11synopsis} +where \TYPE{} is one of the extended \ac{AMO} types specified by +Table~\ref{extamotypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_fetch\_nbi}@(TYPE *fetch, const TYPE *source, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_fetch\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, const TYPE *source, int pe); +\end{Csynopsis} +where \TYPE{} is one of the extended \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{extamotypes}. + +\begin{apiarguments} + + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{fetch}{Local data object to be updated.} + \apiargument{IN}{source}{The remotely accessible data object to be fetched from + the remote \ac{PE}.} + \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number from which + \VAR{source} is to be fetched.} + +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_atomic\_fetch\_nbi} performs a nonblocking atomic fetch + operation. This routine returns after posting the operation. The operation + is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At + the completion of \FUNC{shmem\_quiet}, contents of the \source{} data object + is from \ac{PE} is atomically fetched into \VAR{fetch} local data object. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + None. +} + +\end{apidefinition} +\color{Black} diff --git a/content/shmem_atomic_fetch_or_nbi.tex b/content/shmem_atomic_fetch_or_nbi.tex new file mode 100644 index 000000000..4f4a44997 --- /dev/null +++ b/content/shmem_atomic_fetch_or_nbi.tex @@ -0,0 +1,58 @@ +\color{Green} +\apisummary{ + This nonblocking operation atomically performs a fetching bitwise OR + operation on a remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_atomic\_fetch\_or\_nbi}@(TYPE *fetch, TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_atomic\_fetch\_or\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types specified by +Table~\ref{bitamotypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_fetch\_or\_nbi}@(TYPE *fetch, TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_fetch\_or\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{bitamotypes}. + +\begin{apiarguments} + + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{fetch}{Local data object to be updated.} + \apiargument{OUT}{dest}{A pointer to the remotely accessible data object to + be updated.} + \apiargument{IN}{value}{The operand to the bitwise OR operation.} + \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} + is to be updated.} + +\end{apiarguments} + +\apidescription{ + The nonblocking \FUNC{shmem\_atomic\_fetch\_or\_nbi} routines atomically + performs a fetching bitwise OR on the remotely accessible data object pointed + to by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine + returns after posting the operation. The operation is considered complete + after a subsequent call to \FUNC{shmem\_quiet}. At the completion of + \FUNC{shmem\_quiet}, these routines atomically performs a fetching bitwise + OR on \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the + previous contents of \VAR{dest} as an atomic operation into the \VAR{fetch} + local data object. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + None. +} + +\end{apidefinition} +\color{Black} diff --git a/content/shmem_atomic_fetch_xor_nbi.tex b/content/shmem_atomic_fetch_xor_nbi.tex new file mode 100644 index 000000000..ed5ac57e2 --- /dev/null +++ b/content/shmem_atomic_fetch_xor_nbi.tex @@ -0,0 +1,58 @@ +\color{Green} +\apisummary{ + This nonblocking operation atomically performs a fetching bitwise XOR + operation on a remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_atomic\_fetch\_xor\_nbi}@(TYPE *fetch, TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_atomic\_fetch\_xor\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types specified by +Table~\ref{bitamotypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_fetch\_xor\_nbi}@(TYPE *fetch, TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_fetch\_xor\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{bitamotypes}. + +\begin{apiarguments} + + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{fetch}{Local data object to be updated.} + \apiargument{OUT}{dest}{A pointer to the remotely accessible data object to + be updated.} + \apiargument{IN}{value}{The operand to the bitwise XOR operation.} + \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} + is to be updated.} + +\end{apiarguments} + +\apidescription{ + The nonblocking \FUNC{shmem\_atomic\_fetch\_xor\_nbi} routines atomically + performs a fetching bitwise XOR on the remotely accessible data object pointed + to by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine + returns after posting the operation. The operation is considered complete + after a subsequent call to \FUNC{shmem\_quiet}. At the completion of + \FUNC{shmem\_quiet}, these routines atomically performs a fetching bitwise + XOR on \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the + previous contents of \VAR{dest} as an atomic operation into the \VAR{fetch} + local data object. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + None. +} + +\end{apidefinition} +\color{Black} diff --git a/content/shmem_atomic_swap_nbi.tex b/content/shmem_atomic_swap_nbi.tex new file mode 100644 index 000000000..5cfcaac41 --- /dev/null +++ b/content/shmem_atomic_swap_nbi.tex @@ -0,0 +1,53 @@ +\color{Green} +\apisummary{ + This nonblocking operation performs an atomic swap to a remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_atomic\_swap\_nbi}@(TYPE *fetch, TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_atomic\_swap\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of the extended \ac{AMO} types specified by Table \ref{extamotypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_swap\_nbi}@(TYPE *fetch, TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_swap\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of the extended \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table \ref{extamotypes}. + +\begin{apiarguments} + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{fetch}{Local data object to be updated.} + \apiargument{OUT}{dest}{The remotely accessible integer data object to be + updated on the remote \ac{PE}. When using \CorCpp, the type of + \dest{} should match that implied in the SYNOPSIS section.} + \apiargument{IN}{value}{The value to be atomically written to the remote + \ac{PE}. \VAR{value} is the same type as \dest.} + \apiargument{IN}{pe}{ An integer that indicates the \ac{PE} number on which + \dest{} is to be updated. When using \Fortran, it must be a default + integer value.} +\end{apiarguments} + +\apidescription{ + The nonblocking \FUNC{shmem\_atomic\_swap\_nbi} performs an atomic swap + operation. This routine returns after posting the operation. The operation + is considered complete after a subsequent call to \FUNC{shmem\_quiet}. + At the completion of \FUNC{shmem\_quiet}, it writes \VAR{value} into + \dest{} on \ac{PE} and fetches the contents of \dest{} as an atomic + operation into \VAR{fetch} local data object. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + None. +} + +\end{apidefinition} +\color{Black} diff --git a/main_spec.tex b/main_spec.tex index 6c2c46596..4b824f7da 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -235,8 +235,39 @@ \subsubsection{\textbf{SHMEM\_ATOMIC\_XOR}} \input{content/shmem_atomic_xor.tex} +\subsection{Non-blocking Atomic Memory Operations}\label{sec:amo} +\subsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_NBI}} +\label{subsec:shmem_atomic_fetch_nbi} +\input{content/shmem_atomic_fetch_nbi.tex} +\subsubsection{\textbf{SHMEM\_ATOMIC\_COMPARE\_SWAP\_NBI}} +\label{subsec:shmem_atomic_compare_swap_nbi} +\input{content/shmem_atomic_compare_swap_nbi.tex} + +\subsubsection{\textbf{SHMEM\_ATOMIC\_SWAP\_NBI}} +\label{subsec:shmem_atomic_swap_nbi} +\input{content/shmem_atomic_swap_nbi.tex} + +\subsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_INC\_NBI}} +\label{subsec:shmem_atomic_fetch_inc_nbi} +\input{content/shmem_atomic_fetch_inc_nbi.tex} + +\subsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_ADD\_NBI}} +\label{subsec:shmem_atomic_fetch_add_nbi} +\input{content/shmem_atomic_fetch_add_nbi.tex} + +\subsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_AND\_NBI}} +\label{subsec:shmem_atomic_fetch_and_nbi} +\input{content/shmem_atomic_fetch_and_nbi.tex} + +\subsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_OR\_NBI}} +\label{subsec:shmem_atomic_fetch_or_nbi} +\input{content/shmem_atomic_fetch_or_nbi.tex} + +\subsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_XOR\_NBI}} +\label{subsec:shmem_atomic_fetch_xor_nbi} +\input{content/shmem_atomic_fetch_xor_nbi.tex} \subsection{Collective Routines}\label{subsec:coll} \input{content/collective_intro.tex} From 6dd8427bbdc4e3fb41360bcfba29d85bf8ad69e8 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 19 Sep 2018 10:42:38 -0500 Subject: [PATCH 052/319] Added library handle description of team shared --- content/library_handles.tex | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/content/library_handles.tex b/content/library_handles.tex index ca7b8e606..9ccbd2522 100644 --- a/content/library_handles.tex +++ b/content/library_handles.tex @@ -24,12 +24,14 @@ \tabularnewline \hline %% \color{Green} -\LibHandleDecl{SHMEM\_TEAM\_NODE} & +\LibHandleDecl{SHMEM\_TEAM\_SHARED} & \color{Green} Handle of type \CTYPE{shmem\_team\_t} that corresponds a team of \acp{PE} -which share node level resources, such as shared memory, network -interfaces, etc. When this handle is used by some \ac{PE}, it will refer -to the node level team containing that \ac{PE}. +that share a memory domain. When this handle is used by some \ac{PE}, +it will refer to the team of all \acp{PE} that would return a non-null +pointer from \FUNC{shmem\_ptr} for symmetric objects on that \ac{PE}, +and vice versa. This means that symmetric objects on each \ac{PE} are +directly load/store accessible by all \acp{PE} in the team. See Section~\ref{subsec:team} for more detail about its use. \tabularnewline \hline %% From 737b476fea89557fd9299d06bcf6bc7a59df1978 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 19 Sep 2018 10:43:23 -0500 Subject: [PATCH 053/319] Update parent team arg description in split strided --- content/shmem_team_split_strided.tex | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index e2e602755..357d5785f 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -11,13 +11,8 @@ \end{Csynopsis} \begin{apiarguments} -\apiargument{IN}{parent\_team}{A valid SHMEM team. The predefined teams -\LibHandleRef{SHMEM\_TEAM\_WORLD} or \LibHandleRef{SHMEM\_TEAM\_NODE} may -be used, or any team created by the user.} - -%% \apiargument{IN}{options}{The set of options requested for the new team. -%% Multiple options may be requested by combining them with a bitwise OR operation; -%% otherwise, \CONST{0} can be given if no options are requested.} +\apiargument{IN}{parent\_team}{A valid \openshmem team. The predefined teams, such as +\LibHandleRef{SHMEM\_TEAM\_WORLD}, may be used, or any team created by the user.} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the subset of \acp{PE} from the parent team that will form the new team} From fc2fba379efe67d7fe729a6d12491f2c64974168 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Thu, 20 Sep 2018 10:55:09 -0500 Subject: [PATCH 054/319] Update return values and error checking on core team routines --- content/shmem_team_destroy.tex | 20 ++++++++++++-------- content/shmem_team_my_pe.tex | 9 +++------ content/shmem_team_n_pes.tex | 9 +++------ content/shmem_team_split_2d.tex | 16 +++++++++------- content/shmem_team_split_strided.tex | 18 +++++++++--------- content/shmem_team_translate.tex | 18 ++++++------------ 6 files changed, 42 insertions(+), 48 deletions(-) diff --git a/content/shmem_team_destroy.tex b/content/shmem_team_destroy.tex index 841344590..12f678294 100644 --- a/content/shmem_team_destroy.tex +++ b/content/shmem_team_destroy.tex @@ -5,11 +5,11 @@ \begin{apidefinition} \begin{Csynopsis} -int @\FuncDecl{shmem\_team\_destroy}@(shmem_team_t team); +int @\FuncDecl{shmem\_team\_destroy}@(shmem_team_t *team); \end{Csynopsis} \begin{apiarguments} -\apiargument{IN}{team}{A valid \openshmem team handle.} +\apiargument{INOUT}{team}{A pointer to a valid \openshmem team handle.} \end{apiarguments} \apidescription{ @@ -20,16 +20,20 @@ handle can no longer be used for team API calls. It is considered erroneous to free \LibHandleRef{SHMEM\_TEAM\_WORLD} or -any other predefined team. Error checking will be done to ensure a valid -team handle is provided. Errors will result in a return value less than \CONST{0}. +any other predefined team. + +If a pointer to an invalid handle is provided, behavior is undefined. + +If the pointer to \VAR{team} is a null pointer, no team is destroyed, +and a nonzero value is returned. + +After returning from the routine, if the team was successfully destroyed, +the handle will be assigned the value \LibConstRef{SHMEM\_TEAM\_NULL}. } -\begin{FeedbackRequest} \apireturnvalues{ -On success, the function will return 0. Otherwise a value less than -\CONST{0} will be returned. +Zero upon successful destruction of the team, nonzero otherwise. } -\end{FeedbackRequest} \apinotes{ None. diff --git a/content/shmem_team_my_pe.tex b/content/shmem_team_my_pe.tex index 1ffa4d1cc..9974182d2 100644 --- a/content/shmem_team_my_pe.tex +++ b/content/shmem_team_my_pe.tex @@ -19,16 +19,13 @@ For the team \LibHandleRef{SHMEM\_TEAM\_WORLD}, this will return the same value as \FUNC{shmem\_my\_pe}. -Error checking will be done to ensure a valid team handle is provided. -Errors will result in a return value less than \CONST{0}. +If the provided team handle is invalid, behavior is undefined. } -\begin{FeedbackRequest} \apireturnvalues{ -The number of the calling \ac{PE} within the provided team, or a value less than -\CONST{0} if the team handle is invalid. +The number of the calling \ac{PE} within the provided team, or the value +\CONST{-1} if the provided team is \LibConstRef{SHMEM\_TEAM\_NULL}. } -\end{FeedbackRequest} \apinotes{ None. diff --git a/content/shmem_team_n_pes.tex b/content/shmem_team_n_pes.tex index 6b30a8a0c..11ec377cc 100644 --- a/content/shmem_team_n_pes.tex +++ b/content/shmem_team_n_pes.tex @@ -21,16 +21,13 @@ All \acp{PE} in the team will get back the same value for the team size. -Error checking will be done to ensure a valid team handle is provided. -Errors will result in a return value less than \CONST{0}. +If the provided team handle is invalid, behavior is undefined. } -\begin{FeedbackRequest} \apireturnvalues{ -Total number of \acp{PE} in the provided team, or a value less than -\CONST{0} if the team handle is invalid. +Total number of \acp{PE} in the provided team, or the value +\CONST{-1} if the provided team is \LibConstRef{SHMEM\_TEAM\_NULL}. } -\end{FeedbackRequest} \apinotes{ None. diff --git a/content/shmem_team_split_2d.tex b/content/shmem_team_split_2d.tex index f675c6f70..a16bb2ee4 100644 --- a/content/shmem_team_split_2d.tex +++ b/content/shmem_team_split_2d.tex @@ -7,7 +7,7 @@ \begin{apidefinition} \begin{Csynopsis} -void @\FuncDecl{shmem\_team\_split\_2d}@(shmem_team_t parent_team, int xrange, +int @\FuncDecl{shmem\_team\_split\_2d}@(shmem_team_t parent_team, int xrange, shmem_team_config_t *xaxis_config, long xaxis_mask, shmem_team_t *xaxis_team, shmem_team_config_t *yaxis_config, long yaxis_mask, shmem_team_t *yaxis_team); \end{Csynopsis} @@ -96,17 +96,19 @@ } } -If \VAR{parent\_team} is equal to \LibConstRef{SHMEM\_TEAM\_NULL}, both -\VAR{xaxis\_team} and \VAR{yaxis\_team} will be assigned the value -\LibConstRef{SHMEM\_TEAM\_NULL}. -Otherwise, if \VAR{parent\_team} is an invalid team handle, -the behavior is undefined. +If \VAR{parent\_team} is an invalid team handle, the behavior is undefined. + +If \VAR{parent\_team} is equal to \LibConstRef{SHMEM\_TEAM\_NULL}, no new +teams will be created, and both \VAR{xaxis\_team} and \VAR{yaxis\_team} +will be assigned the value \LibConstRef{SHMEM\_TEAM\_NULL}. + If either team cannot be created, that team will be assigned the value \LibConstRef{SHMEM\_TEAM\_NULL}. } \apireturnvalues{ -None. + Zero on successful creation of both \VAR{xaxis\_team} and \VAR{yaxis\_team}, + nonzero otherwise. } \apinotes{ diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index e2e602755..03ad59d66 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -6,7 +6,7 @@ \begin{apidefinition} \begin{Csynopsis} -void @\FuncDecl{shmem\_team\_split\_strided}@(shmem_team_t parent_team, int PE_start, int PE_stride, +int @\FuncDecl{shmem\_team\_split\_strided}@(shmem_team_t parent_team, int PE_start, int PE_stride, int PE_size, shmem_team_config_t *config, long config_mask, shmem_team_t *new_team); \end{Csynopsis} @@ -84,20 +84,20 @@ } } -Error checking will be done to ensure a valid \ac{PE} triplet is provided, -and also to determine whether a valid team handle is provided for the -parent team. +If \VAR{parent\_team} is an invalid team handle, the behavior is undefined. + +If \VAR{parent\_team} is equal to \LibConstRef{SHMEM\_TEAM\_NULL}, then no +new team will be created, and \VAR{new\_team} will be assigned the value +\LibConstRef{SHMEM\_TEAM\_NULL}. + +If an invalid \ac{PE} triplet is provided, the \VAR{new\_team} will not be created. -If \VAR{parent\_team} is equal to \LibConstRef{SHMEM\_TEAM\_NULL}, then -\VAR{new\_team} will be assigned the value \LibConstRef{SHMEM\_TEAM\_NULL}. -Otherwise, if \VAR{parent\_team} is an invalid team handle, -the behavior is undefined. If \VAR{new\_team} cannot be created, it will be assigned the value \LibConstRef{SHMEM\_TEAM\_NULL}. } \apireturnvalues{ - None. + Zero on successful creation of \VAR{new\_team}, nonzero otherwise. } \apinotes{ diff --git a/content/shmem_team_translate.tex b/content/shmem_team_translate.tex index d4e5f2105..c1edff0af 100644 --- a/content/shmem_team_translate.tex +++ b/content/shmem_team_translate.tex @@ -20,25 +20,19 @@ to the corresponding \ac{PE} number in another team. Specifically, given the \VAR{src\_pe} in \VAR{src\_team}, this function returns that \ac{PE}'s number in \VAR{dest\_team}. If \VAR{src\_pe} is not a member of both the -\VAR{src\_team} and \VAR{dest\_team}, a value less than \CONST{0} is returned. +\VAR{src\_team} and \VAR{dest\_team}, a value of \CONST{-1} is returned. -If \LibHandleRef{SHMEM\_TEAM\_WORLD} is provided as the \VAR{dest\_team} parameter, this function -acts as a global \ac{PE} number translator and will return the corresponding -\LibHandleRef{SHMEM\_TEAM\_WORLD} number. This may be useful when performing point-to- -point operations between \acp{PE} in a subset, as point-to-point operations -that do not take a context argument require the global \LibHandleRef{SHMEM\_TEAM\_WORLD} -\ac{PE} number. +If \LibHandleRef{SHMEM\_TEAM\_WORLD} is provided as the \VAR{dest\_team} parameter, +this function acts as a global \ac{PE} number translator and will return the corresponding +\LibHandleRef{SHMEM\_TEAM\_WORLD} number. -Error checking will be done to ensure valid team handles are provided. -Errors will result in a return value less than \CONST{0}. +If either of the \VAR{src\_team} or \VAR{dest\_team} handle is invalid, behavior is undefined. } -\begin{FeedbackRequest} \apireturnvalues{ -The specified \ac{PE}'s number in the \VAR{dest\_team}, or a value less than \CONST{0} if any +The specified \ac{PE}'s number in the \VAR{dest\_team}, or a value of \CONST{-1} if any team handle arguments are invalid or the \VAR{src\_pe} is not in both the source and destination teams. } -\end{FeedbackRequest} \apinotes{ None. From 9f00e4f898749b233935923b3a8d36756d98fd15 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 24 Sep 2018 12:26:49 -0500 Subject: [PATCH 055/319] Updates to grammar and usage --- content/shmem_team_destroy.tex | 6 +++--- content/shmem_team_my_pe.tex | 6 +++--- content/shmem_team_n_pes.tex | 8 ++++---- content/shmem_team_split_2d.tex | 2 +- content/shmem_team_split_strided.tex | 16 ++++++++-------- content/shmem_team_translate.tex | 8 ++++---- 6 files changed, 23 insertions(+), 23 deletions(-) diff --git a/content/shmem_team_destroy.tex b/content/shmem_team_destroy.tex index 12f678294..e942d133d 100644 --- a/content/shmem_team_destroy.tex +++ b/content/shmem_team_destroy.tex @@ -13,7 +13,7 @@ \end{apiarguments} \apidescription{ -The \FUNC{shmem\_team\_destroy} function destroys an existing team. This is a +The \FUNC{shmem\_team\_destroy} routine destroys an existing team. This is a collective call, in which every member of the team being destroyed needs to participate. This will free all internal memory structures associated with the team and invalidate the team handle. Upon return, the team @@ -22,9 +22,9 @@ It is considered erroneous to free \LibHandleRef{SHMEM\_TEAM\_WORLD} or any other predefined team. -If a pointer to an invalid handle is provided, behavior is undefined. +If a pointer to an invalid handle is provided, the behavior is undefined. -If the pointer to \VAR{team} is a null pointer, no team is destroyed, +If the pointer to \VAR{team} is a null pointer, then no team is destroyed, and a nonzero value is returned. After returning from the routine, if the team was successfully destroyed, diff --git a/content/shmem_team_my_pe.tex b/content/shmem_team_my_pe.tex index 9974182d2..43d41aa5a 100644 --- a/content/shmem_team_my_pe.tex +++ b/content/shmem_team_my_pe.tex @@ -13,18 +13,18 @@ \end{apiarguments} \apidescription{ -The \FUNC{shmem\_team\_my\_pe} function returns the number of calling \ac{PE} within the +The \FUNC{shmem\_team\_my\_pe} routine returns the number of calling \ac{PE} within the provided team. The number will be a value between 0 and N-1, for a team of size N. Each member of the team has a unique number. For the team \LibHandleRef{SHMEM\_TEAM\_WORLD}, this will return the same value as \FUNC{shmem\_my\_pe}. -If the provided team handle is invalid, behavior is undefined. +If the provided team handle is invalid, the behavior is undefined. } \apireturnvalues{ The number of the calling \ac{PE} within the provided team, or the value -\CONST{-1} if the provided team is \LibConstRef{SHMEM\_TEAM\_NULL}. +\CONST{-1} if the provided team compares equal to \LibConstRef{SHMEM\_TEAM\_NULL}. } \apinotes{ diff --git a/content/shmem_team_n_pes.tex b/content/shmem_team_n_pes.tex index 11ec377cc..f4dc45755 100644 --- a/content/shmem_team_n_pes.tex +++ b/content/shmem_team_n_pes.tex @@ -13,20 +13,20 @@ \end{apiarguments} \apidescription{ -The \FUNC{shmem\_team\_n\_pes} function returns the number of \acp{PE} in the +The \FUNC{shmem\_team\_n\_pes} routine returns the number of \acp{PE} in the team. This will always be a value between 1 and N, where N is the total number of \acp{PE} accessible to the \openshmem program. For the team -\LibHandleRef{SHMEM\_TEAM\_WORLD}, this will return the same value as +\LibHandleRef{SHMEM\_TEAM\_WORLD}, this routine will return the same value as \FUNC{shmem\_n\_pes}. All \acp{PE} in the team will get back the same value for the team size. -If the provided team handle is invalid, behavior is undefined. +If the provided team handle is invalid, the behavior is undefined. } \apireturnvalues{ Total number of \acp{PE} in the provided team, or the value -\CONST{-1} if the provided team is \LibConstRef{SHMEM\_TEAM\_NULL}. +\CONST{-1} if the provided team compares equal to \LibConstRef{SHMEM\_TEAM\_NULL}. } \apinotes{ diff --git a/content/shmem_team_split_2d.tex b/content/shmem_team_split_2d.tex index a16bb2ee4..639d044c3 100644 --- a/content/shmem_team_split_2d.tex +++ b/content/shmem_team_split_2d.tex @@ -98,7 +98,7 @@ If \VAR{parent\_team} is an invalid team handle, the behavior is undefined. -If \VAR{parent\_team} is equal to \LibConstRef{SHMEM\_TEAM\_NULL}, no new +If \VAR{parent\_team} compares equal to \LibConstRef{SHMEM\_TEAM\_NULL}, no new teams will be created, and both \VAR{xaxis\_team} and \VAR{yaxis\_team} will be assigned the value \LibConstRef{SHMEM\_TEAM\_NULL}. diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 03ad59d66..304c45a66 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -1,7 +1,7 @@ \apisummary{ Create a new \openshmem team from a subset of the existing parent team \acp{PE}, where the subset is defined by the -\ac{PE} triplet (\VAR{PE\_start}, \VAR{PE\_stride}, and \VAR{PE\_size}) supplied to the function.} +\ac{PE} triplet (\VAR{PE\_start}, \VAR{PE\_stride}, and \VAR{PE\_size}) supplied to the routine.} \begin{apidefinition} @@ -43,16 +43,16 @@ \end{apiarguments} \apidescription{ -The \FUNC{shmem\_team\_split\_strided} function is a collective routine. +The \FUNC{shmem\_team\_split\_strided} routine is a collective routine. It creates a new \openshmem team from a subset of the existing parent team, where the subset is defined by the \ac{PE} triplet (\VAR{PE\_start}, -\VAR{PE\_stride}, and \VAR{PE\_size}) supplied to the function. +\VAR{PE\_stride}, and \VAR{PE\_size}) supplied to the routine. -This function must be called by all processes contained in the \ac{PE} triplet +This routine must be called by all processes contained in the \ac{PE} triplet specification. It may be called by additional \acp{PE} not included in the triplet specification, but for those processes a \VAR{new\_team} value of \LibConstRef{SHMEM\_TEAM\_NULL} is returned. All calling processes must provide the -same values for the \ac{PE} triplet. This function will return a \VAR{new\_team} +same values for the \ac{PE} triplet. This routine will return a \VAR{new\_team} containing the \ac{PE} subset specified by the triplet, and ordered by the existing global \ac{PE} number. None of the parameters need to reside in symmetric memory. @@ -86,13 +86,13 @@ If \VAR{parent\_team} is an invalid team handle, the behavior is undefined. -If \VAR{parent\_team} is equal to \LibConstRef{SHMEM\_TEAM\_NULL}, then no +If \VAR{parent\_team} compares equal to \LibConstRef{SHMEM\_TEAM\_NULL}, then no new team will be created, and \VAR{new\_team} will be assigned the value \LibConstRef{SHMEM\_TEAM\_NULL}. -If an invalid \ac{PE} triplet is provided, the \VAR{new\_team} will not be created. +If an invalid \ac{PE} triplet is provided, then the \VAR{new\_team} will not be created. -If \VAR{new\_team} cannot be created, it will be assigned the value +If \VAR{new\_team} cannot be created, then it will be assigned the value \LibConstRef{SHMEM\_TEAM\_NULL}. } diff --git a/content/shmem_team_translate.tex b/content/shmem_team_translate.tex index c1edff0af..b9941ae7f 100644 --- a/content/shmem_team_translate.tex +++ b/content/shmem_team_translate.tex @@ -16,17 +16,17 @@ \end{apiarguments} \apidescription{ -The \FUNC{shmem\_team\_translate\_pe} function will translate a given \ac{PE} number +The \FUNC{shmem\_team\_translate\_pe} routine will translate a given \ac{PE} number to the corresponding \ac{PE} number in another team. -Specifically, given the \VAR{src\_pe} in \VAR{src\_team}, this function returns that +Specifically, given the \VAR{src\_pe} in \VAR{src\_team}, this routine returns that \ac{PE}'s number in \VAR{dest\_team}. If \VAR{src\_pe} is not a member of both the \VAR{src\_team} and \VAR{dest\_team}, a value of \CONST{-1} is returned. If \LibHandleRef{SHMEM\_TEAM\_WORLD} is provided as the \VAR{dest\_team} parameter, -this function acts as a global \ac{PE} number translator and will return the corresponding +this routine acts as a global \ac{PE} number translator and will return the corresponding \LibHandleRef{SHMEM\_TEAM\_WORLD} number. -If either of the \VAR{src\_team} or \VAR{dest\_team} handle is invalid, behavior is undefined. +If either of the \VAR{src\_team} or \VAR{dest\_team} handle is invalid, the behavior is undefined. } \apireturnvalues{ From 9664170b6625a2753cca8748cb036f8ba6298c93 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 26 Sep 2018 10:31:03 -0500 Subject: [PATCH 056/319] Move teams intro text into new file --- .gitignore | 1 + content/shmem_teams_intro.tex | 54 ++++++++++++++++++++++++++++++++++ main_spec.tex | 55 +---------------------------------- 3 files changed, 56 insertions(+), 54 deletions(-) create mode 100644 content/shmem_teams_intro.tex diff --git a/.gitignore b/.gitignore index 721f3a3f6..1b1bc10ab 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ main_spec.log main_spec.out main_spec.pdf main_spec.toc +*~ \ No newline at end of file diff --git a/content/shmem_teams_intro.tex b/content/shmem_teams_intro.tex new file mode 100644 index 000000000..23555e8a0 --- /dev/null +++ b/content/shmem_teams_intro.tex @@ -0,0 +1,54 @@ +The \acp{PE} in an \openshmem program can communicate either using +point-to-point routines that specify the \ac{PE} number of the target +\ac{PE} or using collective routines which operate over some predefined +set of \acp{PE}. Teams in \openshmem allow programs to group subsets +of \acp{PE} for collective communications and provide a contiguous reindexing +of the \acp{PE} within that subset that can be used in point-to-point communication. + +An \openshmem team is a set of \acp{PE} defined by calling a specific team +split routine with a parent team argument and other arguments to further +specify how the parent team is to be split into one or more new teams. +A team created by a \FUNC{shmem\_team\_split\_*} routine can be used as the parent team +for a subsequent call to a team split routine. A team persists and can +be used for multiple collective routine calls until it is destroyed by +\FUNC{shmem\_team\_destroy}. + +Every team must have a least one member. Any attempt to create a team over an +empty set of \acp{PE} will result in no new team being created. + +A ``team handle'' is an opaque object with type \CTYPE{shmem\_team\_t} that is used +to reference a defined team. Team handles are created by one of the team split +routines and destroyed by the team destroy routine. Team handles have local +semantics only. That is, team handles should not be stored in shared variables +and used across other \acp{PE}. Doing so will result in undefined behavior. + +By default, \openshmem creates predefined teams that will be available +for use once the routine \FUNC{shmem\_init} has been called. See +Section~\ref{subsec:library_handles} for a description of all predefined team handles +provided by \openshmem. Predefined \CTYPE{shmem\_team\_t} handles can be used as +the parent team when creating new \openshmem teams. + +Every \ac{PE} is a member of the default team, which may be referenced +through the team handle \LibHandleRef{SHMEM\_TEAM\_WORLD}, +and its number in the default team is equal to the +value of its \ac{PE} number as returned by \FUNC{shmem\_my\_pe}. + +A special team handle value, \LibConstRef{SHMEM\_TEAM\_NULL}, may be used to +indicate that a returned team handle is not valid. This value can be tested +against to check for successful split operations and can be assigned to user +declared team handles as a sentinel value. + +Teams that are created by a \FUNC{shmem\_team\_split\_*} routine may be +provided a configuration argument that specifies options that may affect +a team's capabilities and may allow for optimized performance. +This configuration argument is of type \CTYPE{shmem\_team\_config\_t}, which +is detailed further in Section~\ref{subsec:shmem_team_config_t}. + +%% Teams may be created with options that change team behavior and may allow for +%% more optimized performance. These options are described in +%% Section~\ref{subsec:library_constants} and in the various descriptions of the +%% team split routines below. In particular, teams may be created with an option +%% to disable support for collective communications, which allows implementations +%% to reduce team creation overheads for those teams. In that case, the team is +%% just a local reindexing of some set of \acp{PE} that can be used for +%% point to point communications or as parent teams in subsequent split operations. diff --git a/main_spec.tex b/main_spec.tex index e268f7e8e..d03a3ff5f 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -120,60 +120,7 @@ \subsubsection{\textbf{SHPDEALLC}}\label{subsec:shpdeallc} \color{Green} \subsection{Team Management Routines}\label{subsec:team} - -The \acp{PE} in an \openshmem program can communicate either using -point-to-point routines that specify the \ac{PE} number of the target -\ac{PE} or using collective routines which operate over some predefined -set of \acp{PE}. Teams in \openshmem allow programs to group subsets -of \acp{PE} for collective communications and provide a contiguous reindexing -of the \acp{PE} within that subset that can be used in point-to-point communication. - -An \openshmem team is a set of \acp{PE} defined by calling a specific team -split routine with a parent team argument and other arguments to further -specify how the parent team is to be split into one or more new teams. -A team created by a \FUNC{shmem\_team\_split\_*} routine can be used as the parent team -for a subsequent call to a team split routine. A team persists and can -be used for multiple collective routine calls until it is destroyed by -\FUNC{shmem\_team\_destroy}. - -Every team must have a least one member. Any attempt to create a team over an empty set of \acp{PE} will result in no new team being created. - -A ``team handle'' is an opaque object with type \CTYPE{shmem\_team\_t} that is used -to reference a defined team. Team handles are created by one of the team split -routines and destroyed by the team destroy routine. Team handles have local -semantics only. That is, team handles should not be stored in shared variables -and used across other \acp{PE}. Doing so will result in undefined behavior. - -By default, \openshmem creates predefined teams that will be available -for use once the routine \FUNC{shmem\_init} has been called. See -Section~\ref{subsec:library_handles} for a description of all predefined team handles -provided by \openshmem. Predefined \CTYPE{shmem\_team\_t} handles can be used as -the parent team when creating new \openshmem teams. - -Every \ac{PE} is a member of the default team, which may be referenced -through the team handle \LibHandleRef{SHMEM\_TEAM\_WORLD}, -and its number in the default team is equal to the -value of its \ac{PE} number as returned by \FUNC{shmem\_my\_pe}. - -A special team handle value, \LibConstRef{SHMEM\_TEAM\_NULL}, may be used to -indicate that a returned team handle is not valid. This value can be tested -against to check for successful split operations and can be assigned to user -declared team handles as a sentinel value. - -Teams that are created by a \FUNC{shmem\_team\_split\_*} routine may be -provided a configuration argument that specifies options that may affect -a team's capabilities and may allow for optimized performance. -This configuration argument is of type \CTYPE{shmem\_team\_config\_t}, which -is detailed further in Section~\ref{subsec:shmem_team_config_t}. - -%% Teams may be created with options that change team behavior and may allow for -%% more optimized performance. These options are described in -%% Section~\ref{subsec:library_constants} and in the various descriptions of the -%% team split routines below. In particular, teams may be created with an option -%% to disable support for collective communications, which allows implementations -%% to reduce team creation overheads for those teams. In that case, the team is -%% just a local reindexing of some set of \acp{PE} that can be used for -%% point to point communications or as parent teams in subsequent split operations. +\input{content/shmem_teams_intro.tex} \subsubsection{\textbf{SHMEM\_TEAM\_MY\_PE}}\label{subsec:shmem_team_my_pe} \input{content/shmem_team_my_pe.tex} From 16db9c4bb5ab3859708063d9bf56d23c1c53c76f Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 26 Sep 2018 10:52:09 -0500 Subject: [PATCH 057/319] Add text for concurrent team creation requirements --- content/shmem_teams_intro.tex | 36 +++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/content/shmem_teams_intro.tex b/content/shmem_teams_intro.tex index 23555e8a0..a92f0af83 100644 --- a/content/shmem_teams_intro.tex +++ b/content/shmem_teams_intro.tex @@ -44,6 +44,42 @@ This configuration argument is of type \CTYPE{shmem\_team\_config\_t}, which is detailed further in Section~\ref{subsec:shmem_team_config_t}. +Team creation is a collective operation. As such, team creation in a +multithreaded environment follows the same semantics as discussed in section +\ref{subsec:thread_support}. Like other collectives, team creation is an action +of the \ac{PE} as a whole, and it is up to the programmer to ensure that each +\ac{PE} has consistent and predictable ordering of team creation calls over +all of its threads. + +In addition to ordering requirements on team creation, there may also be +synchronization requirements. +\begin{itemize} +\item There is a special case where all new teams resulting from a split operation are +created with the \LibHandleRef{SHMEM\_TEAM\_NOCOLLECTIVE} option and are configured +to create \CONST{0} new contexts for the team. This means the team will \emph{not} be used +directly for point-to-point or collective communication routines, but only for +\ac{PE} number translation and further split operations. When creating new teams +of this type, no synchronization will be required around team creation operations. +\item In the case where a parent team uses multiple team split calls to create +multiple new teams which do not meet the above criteria for avoiding synchronization, +\emph{and} there is an overlap in team membership in the resulting new teams, all \acp{PE} +in the parent team must perform a barrier between team creation calls. +In other words, undefined behavior will result from allowing simultaneous execution of team +creation collective operations that split some team $T_0$ into multiple new +teams, $T_1, T_2...$, where some \ac{PE} $p$ exists such that $p \in T_1, p \in T_2, ...$. +By executing a barrier among the set of +all \acp{PE} in team $T_0$ between team creation operations, simultaneous execution +of split operations by those \acp{PE} is prevented. \ac{PE} $p$ will be ensured to +complete each split operation on $T_0$ before the next split operation on $T_0$ commences. +\item In the case where the above conditions hold to require a barrier on $T_0$, +but the parent team $T_0$ cannot be used for barriers due to having +been created with the \LibHandleRef{SHMEM\_TEAM\_NOCOLLECTIVE} option, the program may use any +ancestor team, e.g. \LibHandleRef{SHMEM\_TEAM\_WORLD}, for synchronization around new +team creation. The program may alternatively use some other custom synchronization method +as long as it ensures that each split collective completes for all \acp{PE} +in team $T_0$ before the next split on $T_0$ commences. +\end{itemize} + %% Teams may be created with options that change team behavior and may allow for %% more optimized performance. These options are described in %% Section~\ref{subsec:library_constants} and in the various descriptions of the From 0c3da855bda805114477c3d3b27d798f87c21940 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Fri, 12 Oct 2018 11:45:44 -0500 Subject: [PATCH 058/319] Update reductions to add team based functions --- content/shmem_reductions.tex | 349 ++++++++++++++++++++++++++--------- 1 file changed, 258 insertions(+), 91 deletions(-) diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 11ee26923..41744c9a9 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -5,32 +5,152 @@ \begin{apidefinition} +\begin{table}[h] + \begin{center} + \begin{tabular}{|l|l|l|l|l|} + \hline + \TYPE & \TYPENAME & \multicolumn{3}{c|}{Operations Supporting \TYPE}\\ \hline + short & short & AND, OR, XOR & MAX, MIN & SUM, PROD \\ \hline + int & int & AND, OR, XOR & MAX, MIN & SUM, PROD \\ \hline + long & long & AND, OR, XOR & MAX, MIN & SUM, PROD \\ \hline + long long & longlong & AND, OR, XOR & MAX, MIN & SUM, PROD \\ \hline + float & float & & MAX, MIN & SUM, PROD \\ \hline + double & double & & MAX, MIN & SUM, PROD \\ \hline + long double & longdouble & & MAX, MIN & SUM, PROD \\ \hline + double \_Complex & complexd & & & SUM, PROD \\ \hline + float \_Complex & complexf & & & SUM, PROD \\ \hline + \end{tabular} + \TableCaptionRef{Reduction Types, Names and Supporting Operations} + \label{reducetypes} + \end{center} +\end{table} + \paragraph{AND} Performs a bitwise AND reduction across a set of \acp{PE}.\newline + +%% C11 +{\color{Green} +\begin{C11synopsis} +void @\FuncDecl{shmem\_and\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +\end{C11synopsis} +where \TYPE{} is one of the integer types supported for the AND operation as specified by Table \ref{reducetypes}. +} + +%% C/C++ \begin{Csynopsis} -void @\FuncDecl{shmem\_short\_and\_to\_all}@(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); -void @\FuncDecl{shmem\_int\_and\_to\_all}@(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void @\FuncDecl{shmem\_long\_and\_to\_all}@(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void @\FuncDecl{shmem\_longlong\_and\_to\_all}@(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); \end{Csynopsis} - +{\color{Green} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_and\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +\end{CsynopsisCol} +} +\begin{DeprecateBlock} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_and\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +\end{CsynopsisCol} +\end{DeprecateBlock} +\newtext{where \TYPE{} is one of the integer types supported for the AND operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}.} + +%% Fortran \begin{Fsynopsis} CALL @\FuncDecl{SHMEM\_INT4\_AND\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) CALL @\FuncDecl{SHMEM\_INT8\_AND\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) \end{Fsynopsis} +%% + +\paragraph{OR} +Performs a bitwise OR reduction across a set of \acp{PE}.\newline + +%% C11 +{\color{Green} +\begin{C11synopsis} +void @\FuncDecl{shmem\_or\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +\end{C11synopsis} +where \TYPE{} is one of the integer types supported for the OR operation as specified by Table \ref{reducetypes}. +} + +%% C/C++ +\begin{Csynopsis} +\end{Csynopsis} +{\color{Green} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_or\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +\end{CsynopsisCol} +} +\begin{DeprecateBlock} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_or\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +\end{CsynopsisCol} +\end{DeprecateBlock} +\newtext{where \TYPE{} is one of the integer types supported for the OR operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}.} + +%% Fortran +\begin{Fsynopsis} +CALL @\FuncDecl{SHMEM\_INT4\_OR\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_INT8\_OR\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +\end{Fsynopsis} +%% + +\paragraph{XOR} +Performs a bitwise exclusive OR (XOR) reduction across a set of \acp{PE}.\newline + +%% C11 +{\color{Green} +\begin{C11synopsis} +void @\FuncDecl{shmem\_xor\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +\end{C11synopsis} +where \TYPE{} is one of the integer types supported for the XOR operation as specified by Table \ref{reducetypes}. +} + +%% C/C++ +\begin{Csynopsis} +\end{Csynopsis} +{\color{Green} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_xor\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +\end{CsynopsisCol} +} +\begin{DeprecateBlock} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_xor\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +\end{CsynopsisCol} +\end{DeprecateBlock} +\newtext{where \TYPE{} is one of the integer types supported for the XOR operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}.} + +%% Fortran +\begin{Fsynopsis} +CALL @\FuncDecl{SHMEM\_INT4\_XOR\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_INT8\_XOR\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +\end{Fsynopsis} +%% \paragraph{MAX} Performs a maximum-value reduction across a set of \acp{PE}.\newline + +%% C11 +{\color{Green} +\begin{C11synopsis} +void @\FuncDecl{shmem\_max\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +\end{C11synopsis} +where \TYPE{} is one of the integer or real types supported for the MAX operation as specified by Table \ref{reducetypes}. +} + +%% C/C++ \begin{Csynopsis} -void @\FuncDecl{shmem\_short\_max\_to\_all}@(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); -void @\FuncDecl{shmem\_int\_max\_to\_all}@(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void @\FuncDecl{shmem\_double\_max\_to\_all}@(double *dest, const double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync); -void @\FuncDecl{shmem\_float\_max\_to\_all}@(float *dest, const float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync); -void @\FuncDecl{shmem\_long\_max\_to\_all}@(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void @\FuncDecl{shmem\_longdouble\_max\_to\_all}@(long double *dest, const long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync); -void @\FuncDecl{shmem\_longlong\_max\_to\_all}@(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); \end{Csynopsis} - +{\color{Green} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_max\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +\end{CsynopsisCol} +} +\begin{DeprecateBlock} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_max\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +\end{CsynopsisCol} +\end{DeprecateBlock} +\newtext{where \TYPE{} is one of the integer or real types supported for the MAX operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}.} + +%% Fortran \begin{Fsynopsis} CALL @\FuncDecl{SHMEM\_INT4\_MAX\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) CALL @\FuncDecl{SHMEM\_INT8\_MAX\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) @@ -41,16 +161,31 @@ \paragraph{MIN} Performs a minimum-value reduction across a set of \acp{PE}.\newline + +%% C11 +{\color{Green} +\begin{C11synopsis} +void @\FuncDecl{shmem\_min\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +\end{C11synopsis} +where \TYPE{} is one of the integer or real types supported for the MIN operation as specified by Table \ref{reducetypes}. +} + +%% C/C++ \begin{Csynopsis} -void @\FuncDecl{shmem\_short\_min\_to\_all}@(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); -void @\FuncDecl{shmem\_int\_min\_to\_all}@(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void @\FuncDecl{shmem\_double\_min\_to\_all}@(double *dest, const double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync); -void @\FuncDecl{shmem\_float\_min\_to\_all}@(float *dest, const float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync); -void @\FuncDecl{shmem\_long\_min\_to\_all}@(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void @\FuncDecl{shmem\_longdouble\_min\_to\_all}@(long double *dest, const long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync); -void @\FuncDecl{shmem\_longlong\_min\_to\_all}@(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); \end{Csynopsis} - +{\color{Green} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_min\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +\end{CsynopsisCol} +} +\begin{DeprecateBlock} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_min\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +\end{CsynopsisCol} +\end{DeprecateBlock} +\newtext{where \TYPE{} is one of the integer or real types supported for the MIN operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}.} + +%% Fortran \begin{Fsynopsis} CALL @\FuncDecl{SHMEM\_INT4\_MIN\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) CALL @\FuncDecl{SHMEM\_INT8\_MIN\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) @@ -61,18 +196,31 @@ \paragraph{SUM} Performs a sum reduction across a set of \acp{PE}.\newline + +%% C11 +{\color{Green} +\begin{C11synopsis} +void @\FuncDecl{shmem\_sum\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +\end{C11synopsis} +where \TYPE{} is one of the integer, real, or complex types supported for the SUM operation as specified by Table \ref{reducetypes}. +} + +%% C/C++ \begin{Csynopsis} -void @\FuncDecl{shmem\_complexd\_sum\_to\_all}@(double _Complex *dest, const double _Complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double _Complex *pWrk, long *pSync); -void @\FuncDecl{shmem\_complexf\_sum\_to\_all}@(float _Complex *dest, const float _Complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float _Complex *pWrk, long *pSync); -void @\FuncDecl{shmem\_short\_sum\_to\_all}@(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); -void @\FuncDecl{shmem\_int\_sum\_to\_all}@(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void @\FuncDecl{shmem\_double\_sum\_to\_all}@(double *dest, const double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync); -void @\FuncDecl{shmem\_float\_sum\_to\_all}@(float *dest, const float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync); -void @\FuncDecl{shmem\_long\_sum\_to\_all}@(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride,int PE_size, long *pWrk, long *pSync); -void @\FuncDecl{shmem\_longdouble\_sum\_to\_all}@(long double *dest, const long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync); -void @\FuncDecl{shmem\_longlong\_sum\_to\_all}@(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); \end{Csynopsis} - +{\color{Green} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_sum\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +\end{CsynopsisCol} +} +\begin{DeprecateBlock} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +\end{CsynopsisCol} +\end{DeprecateBlock} +\newtext{where \TYPE{} is one of the integer, real, or complex types supported for the SUM operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}.} + +%% Fortran \begin{Fsynopsis} CALL @\FuncDecl{SHMEM\_COMP4\_SUM\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) CALL @\FuncDecl{SHMEM\_COMP8\_SUM\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) @@ -85,18 +233,31 @@ \paragraph{PROD} Performs a product reduction across a set of \acp{PE}.\newline + +%% C11 +{\color{Green} +\begin{C11synopsis} +void @\FuncDecl{shmem\_prod\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +\end{C11synopsis} +where \TYPE{} is one of the integer, real, or complex types supported for the PROD operation as specified by Table \ref{reducetypes}. +} + +%% C/C++ \begin{Csynopsis} -void @\FuncDecl{shmem\_complexd\_prod\_to\_all}@(double _Complex *dest, const double _Complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double _Complex *pWrk, long *pSync); -void @\FuncDecl{shmem\_complexf\_prod\_to\_all}@(float _Complex *dest, const float _Complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float _Complex *pWrk, long *pSync); -void @\FuncDecl{shmem\_short\_prod\_to\_all}@(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); -void @\FuncDecl{shmem\_int\_prod\_to\_all}@(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void @\FuncDecl{shmem\_double\_prod\_to\_all}@(double *dest, const double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync); -void @\FuncDecl{shmem\_float\_prod\_to\_all}@(float *dest, const float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync); -void @\FuncDecl{shmem\_long\_prod\_to\_all}@(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void @\FuncDecl{shmem\_longdouble\_prod\_to\_all}@(long double *dest, const long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync); -void @\FuncDecl{shmem\_longlong\_prod\_to\_all}@(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); \end{Csynopsis} - +{\color{Green} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_prod\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +\end{CsynopsisCol} +} +\begin{DeprecateBlock} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_prod\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +\end{CsynopsisCol} +\end{DeprecateBlock} +\newtext{where \TYPE{} is one of the integer, real, or complex types supported for the PROD operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}.} + +%% Fortran \begin{Fsynopsis} CALL @\FuncDecl{SHMEM\_COMP4\_PROD\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) CALL @\FuncDecl{SHMEM\_COMP8\_PROD\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) @@ -107,33 +268,7 @@ CALL @\FuncDecl{SHMEM\_REAL16\_PROD\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) \end{Fsynopsis} -\paragraph{OR} -Performs a bitwise OR reduction across a set of \acp{PE}.\newline -\begin{Csynopsis} -void @\FuncDecl{shmem\_short\_or\_to\_all}@(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); -void @\FuncDecl{shmem\_int\_or\_to\_all}@(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void @\FuncDecl{shmem\_long\_or\_to\_all}@(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void @\FuncDecl{shmem\_longlong\_or\_to\_all}@(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); -\end{Csynopsis} -\begin{Fsynopsis} -CALL @\FuncDecl{SHMEM\_INT4\_OR\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL @\FuncDecl{SHMEM\_INT8\_OR\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -\end{Fsynopsis} - -\paragraph{XOR} -Performs a bitwise exclusive OR (XOR) reduction across a set of \acp{PE}.\newline -\begin{Csynopsis} -void @\FuncDecl{shmem\_short\_xor\_to\_all}@(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); -void @\FuncDecl{shmem\_int\_xor\_to\_all}@(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void @\FuncDecl{shmem\_long\_xor\_to\_all}@(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void @\FuncDecl{shmem\_longlong\_xor\_to\_all}@(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); -\end{Csynopsis} - -\begin{Fsynopsis} -CALL @\FuncDecl{SHMEM\_INT4\_XOR\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL @\FuncDecl{SHMEM\_INT8\_XOR\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -\end{Fsynopsis} \begin{apiarguments} @@ -147,6 +282,12 @@ \apiargument{IN}{nreduce}{The number of elements in the \dest{} and \source{} arrays. \VAR{nreduce} must be of type integer. When using \Fortran, it must be a default integer value.} + +\newtext{% +\apiargument{IN}{team}{The team over which to perform the operation.}% +} + +\begin{DeprecateBlock} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}. \VAR{PE\_start} must be of type integer. When using \Fortran, it must be a default integer value.} @@ -167,48 +308,74 @@ Every element of this array must be initialized with the value \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the active set enter the reduction routine.} +\end{DeprecateBlock} \end{apiarguments} \apidescription{ - \openshmem reduction routines compute one or more reductions across symmetric + \openshmem reduction routines \newtext{are collective routines over an active set or + existing \openshmem team that} compute one or more reductions across symmetric arrays on multiple \acp{PE}. A reduction performs an associative binary routine across a set of values. The \VAR{nreduce} argument determines the number of separate reductions to - perform. The \source{} array on all \acp{PE} in the active set provides one - element for each reduction. The results of the reductions are placed in the - \dest{} array on all \acp{PE} in the active set. The active set is defined - by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. - + perform. The \source{} array on all \acp{PE} \newtext{participating in the reduction} + \oldtext{in the active set} %% + provides one element for each reduction. The results of the reductions are placed in the + \dest{} array on all \acp{PE} \newtext{participating in the reduction.} + \oldtext{in the active set.} %% + The \source{} and \dest{} arrays may be the same array, but they may not be - overlapping arrays. - - As with all \openshmem collective routines, each of these routines assumes + overlapping arrays. The same \dest{} and \source{} arrays + must be passed to all \acp{PE} \newtext{participating in the reduction.} + \oldtext{in the active set.} %% + + {\color{Green} + Team-based reduction routines operate over all \acp{PE} in the provided team argument. All + \acp{PE} in the provided team must participate in the reduction. + + If the team has been created with the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} option, + it will not have the required support structures to complete this routine. If + such a team is passed to this or any other team collective routine, the behavior + is undefined. + } + + \newtext{Active-set-based reduction routines operate over all \acp{PE} in} the active set + \oldtext{is} %% + defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. + + As with all \newtext{active-set-based} + \oldtext{\openshmem} %% + collective routines, each of these routines assumes that only \acp{PE} in the active set call the routine. If a \ac{PE} not in - the active set calls an \openshmem collective routine, the behavior is undefined. + the active set calls an \newtext{active-set-based} + \oldtext{\openshmem} %% + collective routine, the behavior is undefined. - The values of arguments \VAR{nreduce}, \VAR{PE\_start}, \VAR{logPE\_stride}, and - \VAR{PE\_size} must be equal on all \acp{PE} in the active set. The same \dest{} - and \source{} arrays, and the same \VAR{pWrk} and \VAR{pSync} work arrays, must - be passed to all \acp{PE} in the active set. + The values of arguments \VAR{nreduce}, \VAR{PE\_start}, \VAR{logPE\_stride}, + and \VAR{PE\_size} must be equal on all \acp{PE} in the active set. + The same \VAR{pWrk} and \VAR{pSync} work arrays must be passed to all + \acp{PE} in the active set. - Before any \ac{PE} calls a reduction routine, - the following conditions must be ensured: + Before any \ac{PE} calls a reduction routine, the following conditions must be ensured: \begin{itemize} - \item The \VAR{pWrk} and \VAR{pSync} arrays on all \acp{PE} in the + \item The \dest{} array on all \acp{PE} \newtext{participating in the reduction} + \oldtext{in the active set} %% + is ready to accept the results of the \OPR{reduction}. + \item \newtext{If using active-set-based routines,} the + \VAR{pWrk} and \VAR{pSync} arrays on all \acp{PE} in the active set are not still in use from a prior call to a collective \openshmem routine. - \item The \dest{} array on all \acp{PE} in the active set is ready - to accept the results of the \OPR{reduction}. \end{itemize} Otherwise, the behavior is undefined. - + Upon return from a reduction routine, the following are true for the local - \ac{PE}: The \dest{} array is updated and the \source{} array may be safely reused. - The values in the \VAR{pSync} array are - restored to the original values. - + \ac{PE}: + \begin{itemize} + \item The \dest{} array is updated and the \source{} array may be safely reused. + \item \newtext{If using active-set-based routines,} + the values in the \VAR{pSync} array are restored to the original values. + \end{itemize} The complex-typed interfaces are only provided for sum and product reductions. When the \Cstd translation environment does not support complex types From 6ebb151576e18b6aac18abae9ce98fffe657a8bd Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Fri, 12 Oct 2018 12:48:22 -0500 Subject: [PATCH 059/319] Merge team and set broadcast; deprecate set broadcast --- content/shmem_broadcast.tex | 108 ++++++++++++++++++++++++------- content/shmem_team_broadcast.tex | 80 ----------------------- main_spec.tex | 3 - 3 files changed, 83 insertions(+), 108 deletions(-) delete mode 100644 content/shmem_team_broadcast.tex diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index ac8f66751..4603ec7b9 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -5,10 +5,29 @@ \begin{apidefinition} +%% C11 +{\color{Green} +\begin{C11synopsis} +void @\FuncDecl{shmem\_broadcast32}@(void *dest, const void *source, size_t nelems, int PE_root, shmem_team_t team); +void @\FuncDecl{shmem\_broadcast64}@(void *dest, const void *source, size_t nelems, int PE_root, shmem_team_t team); +\end{C11synopsis} +} + +%% C/C++ \begin{Csynopsis} +\end{Csynopsis} +{\color{Green} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_team\_broadcast32}@(void *dest, const void *source, size_t nelems, int PE_root, shmem_team_t team); +void @\FuncDecl{shmem\_team\_broadcast64}@(void *dest, const void *source, size_t nelems, int PE_root, shmem_team_t team); +\end{CsynopsisCol} +} +\begin{DeprecateBlock} +\begin{CsynopsisCol} void @\FuncDecl{shmem\_broadcast32}@(void *dest, const void *source, size_t nelems, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync); void @\FuncDecl{shmem\_broadcast64}@(void *dest, const void *source, size_t nelems, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync); -\end{Csynopsis} +\end{CsynopsisCol} +\end{DeprecateBlock} \begin{Fsynopsis} INTEGER nelems, PE_root, PE_start, logPE_stride, PE_size @@ -21,7 +40,8 @@ \begin{apiarguments} -\apiargument{OUT}{dest}{A symmetric data object.} +\apiargument{OUT}{dest}{A symmetric data object. \newtext{See the table below in this description + for allowable types.}} \apiargument{IN}{source}{A symmetric data object that can be of any data type that is permissible for the \dest{} argument.} \apiargument{IN}{nelems}{The number of elements in \source. For @@ -29,8 +49,15 @@ 32-bit halfwords. nelems must be of type \VAR{size\_t} in \Cstd. When using \Fortran, it must be a default integer value.} \apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to - the active set, from which the data is copied. Must be greater than or equal to - 0 and less than \VAR{PE\_size}. \VAR{PE\_root} must be of type integer. When using \Fortran, it must be a default integer value.} + the \newtext{team or} active set, from which the data is copied. + \VAR{PE\_root} must be of type \CTYPE{int}. + When using \Fortran, it must be a default integer value.} + +\newtext{% +\apiargument{IN}{team}{The team over which to perform the operation.}% +} + +\begin{DeprecateBlock} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}. \VAR{PE\_start} must be of type integer. When using \Fortran, it must be a default integer value.} @@ -47,32 +74,63 @@ Every element of this array must be initialized with the value \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the active set enters \FUNC{shmem\_broadcast}.} +\end{DeprecateBlock} \end{apiarguments} \apidescription{ - \openshmem broadcast routines are collective routines. They copy data object + \openshmem broadcast routines are collective routines \newtext{over an active set or + existing \openshmem team}. They copy data object \source{} on the processor specified by \VAR{PE\_root} and store the values at - \dest{} on the other \acp{PE} specified by the triplet \VAR{PE\_start}, - \VAR{logPE\_stride}, \VAR{PE\_size}. The data is not copied to the \dest{} area - on the root \ac{PE}. - - As with all \openshmem collective routines, each of these routines assumes that + \dest{} on the other \acp{PE} \newtext{particpating in the collective.} + \oldtext{specified by the triplet \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size}.} %% + The data is not copied to the \dest{} area on the root \ac{PE}. + + {\color{Green} + The same \dest{} and \source{} data objects and the same value of \VAR{PE\_root} must be + passed by all \acp{PE} particpating in the collective. + + Team-based broadcast routines operate over all \acp{PE} in the provided team argument. All + \acp{PE} in the provided team must participate in the reduction. + + If the team has been created with the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} option, + it will not have the required support structures to complete this routine. If + such a team is passed to this or any other team collective routine, the behavior + is undefined. + + As with all team-based \openshmem routines, \ac{PE} + numbering is relative to the team. The specified root \ac{PE} must be a valid \ac{PE} + number for the team, between \CONST{0} and \VAR{N-1}, where \VAR{N} is + the size of the team. + + Error checking will be done to detect a value of \LibConstRef{SHMEM\_TEAM\_NULL} passed + for the team argument. In that case, the program will abort with an informative + error message. If an invalid team handle is passed to the routine, + the behavior is undefined. + + Active-set-based broadcast routines operate over all \acp{PE} in the active set + defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. + } + + As with all \newtext{active-set-based} \openshmem collective routines, + each of these routines assumes that only \acp{PE} in the active set call the routine. If a \ac{PE} not in the active set calls an \openshmem collective routine, the behavior is undefined. The values of arguments \VAR{PE\_root}, \VAR{PE\_start}, \VAR{logPE\_stride}, - and \VAR{PE\_size} must be the same value on all \acp{PE} in the active set. The same - \dest{} and \source{} data objects and the same \VAR{pSync} work array must be - passed by all \acp{PE} in the active set. - - Before any \ac{PE} calls a broadcast routine, - the following conditions must be ensured: + and \VAR{PE\_size} must be the same value on all \acp{PE} in the active set. + \newtext{The value of \VAR{PE\_root} must be between \CONST{0} and \VAR{PE\_size}.} + The same \VAR{pSync} work array must be passed by all \acp{PE} in the active set. + + Before any \ac{PE} calls a broadcast routine, the following conditions must be ensured: \begin{itemize} - \item The \VAR{pSync} array on all \acp{PE} in the active set is - not still in use from a prior call to a broadcast routine. - \item The \dest{} array on all \acp{PE} in the active set is ready - to accept the broadcast data. + \item The \dest{} array on all \acp{PE} \newtext{participating in the reduction} + \oldtext{in the active set} %% + is ready to accept the broadcast data. + \item \newtext{If using active-set-based routines,} the + \VAR{pSync} array on all \acp{PE} in the + active set is not still in use from a prior call to a collective + \openshmem routine. \end{itemize} Otherwise, the behavior is undefined. @@ -82,8 +140,8 @@ \item If the current \ac{PE} is not the root \ac{PE}, the \dest{} data object is updated. \item The \source{} data object may be safely reused. - \item The values in the \VAR{pSync} array are restored to the - original values. + \item \newtext{If using active-set-based routines,} + the values in the \VAR{pSync} array are restored to the original values. \end{itemize} } @@ -93,11 +151,11 @@ }{Routine}{Data type of \VAR{dest} and \VAR{source}} \apitablerow{shmem\_broadcast8, shmem\_broadcast64}{Any noncharacter - type that has an element size of \CONST{64} bits. No \Fortran derived types or + type that has an element size of \CONST{64} bits. No \Fortran derived types \newtext{nor} \oldtext{or} \CorCpp{} structures are allowed.} \apitablerow{shmem\_broadcast4, shmem\_broadcast32}{Any noncharacter - type that has an element size of \CONST{32} bits. No \Fortran - derived types or \CorCpp{} structures are allowed.} + type that has an element size of \CONST{32} bits. No \Fortran derived types \newtext{nor} \oldtext{or} + \CorCpp{} structures are allowed.} \apireturnvalues{ None. diff --git a/content/shmem_team_broadcast.tex b/content/shmem_team_broadcast.tex deleted file mode 100644 index 3417aee6b..000000000 --- a/content/shmem_team_broadcast.tex +++ /dev/null @@ -1,80 +0,0 @@ -\apisummary{ - Broadcasts a block of data from one \ac{PE} in a team to all other \acp{PE} in the team -} - -\begin{apidefinition} - -\begin{Csynopsis} -void @\FuncDecl{shmem\_team\_broadcast32}@(shmem_team_t team, void *dest, const void *source, size_t nelems, int PE_root); -void @\FuncDecl{shmem\_team\_broadcast64}@(shmem_team_t team, void *dest, const void *source, size_t nelems, int PE_root); -\end{Csynopsis} - -\begin{apiarguments} - -\apiargument{IN}{team}{A valid \openshmem team handle to a team which has been created without - disabling support for collective operations.} -\apiargument{OUT}{dest}{A symmetric data object. See the table below in this description - for allowable types} -\apiargument{IN}{source}{A symmetric data object that can be of any data type - that is permissible for the \dest{} argument.} -\apiargument{IN}{nelems}{The number of elements in \source{}. For - \FUNC{shmem\_team\_broadcast32}, this is the number of - 32-bit halfwords. nelems must be of type \CTYPE{size\_t}.} -\apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to - the team, from which the data is copied. \VAR{PE\_root} must be of type \CTYPE{int}.} - -\end{apiarguments} - -\apidescription{ - \openshmem team broadcast routines are collective routines over an existing team. - They copy data object \source{} on the processor specified by \VAR{PE\_root} - and store the values at \dest{} on the other \acp{PE} that are members of the - team. The data is not copied to the \dest{} area on the root \ac{PE}. - - If the team has been created with the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} option, - it will not have the required support structures to complete this routine. If - such a team is passed to this or any other team collective routine, the behavior - is undefined. - - As with all \openshmem routines where the operation occurs over a given team, \ac{PE} - numbering is relative to the team. The specified root \ac{PE} must be a valid \ac{PE} - number for the team, between \CONST{0} and \VAR{N-1}, where \VAR{N} is - the size of the team. - - The values of the argument \VAR{PE\_root} must be the same value on all \acp{PE} in - the team. The same \dest{} and \source{} data objects must be passed by all \acp{PE} - in the team. - - Upon return from a broadcast routine, the following are true for the local \ac{PE}: - \begin{itemize} - \item If the current \ac{PE} is not the root \ac{PE}, - the \dest{} data object is updated. - \item The \source{} data object may be safely reused. - \end{itemize} - - Error checking will be done to detect a value of \LibConstRef{SHMEM\_TEAM\_NULL} passed - for the team argument. In that case, the program will abort with an informative - error message. If an invalid team handle is passed to the routine, - the behavior is undefined. -} - -\apidesctable{ -The \dest{} and \source{} data objects must conform to certain typing -constraints, which are as follows: -}{Routine}{Data type of \VAR{dest} and \VAR{source}} - -\apitablerow{shmem\_team\_broadcast64}{Any noncharacter - type that has an element size of \CONST{64} bits. - \CorCpp{} structures are NOT allowed.} -\apitablerow{shmem\_team\_broadcast32}{Any noncharacter - type that has an element size of \CONST{32} bits. - \CorCpp{} structures are NOT allowed.} - -\apireturnvalues{ - None. -} - -\apinotes{ -} - -\end{apidefinition} diff --git a/main_spec.tex b/main_spec.tex index d03a3ff5f..96791d506 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -329,9 +329,6 @@ \subsubsection{\textbf{SHMEM\_SYNC\_ALL}}\label{subsec:shmem_sync_all} \subsubsection{\textbf{SHMEM\_SYNC}}\label{subsec:shmem_sync} \input{content/shmem_sync.tex} -\subsubsection{\textbf{SHMEM\_TEAM\_BROADCAST}}\label{subsec:shmem_team_broadcast} -\input{content/shmem_team_broadcast.tex} - \subsubsection{\textbf{SHMEM\_BROADCAST}}\label{subsec:shmem_broadcast} \input{content/shmem_broadcast.tex} From 981413ed31364f15b7a4528f0f25f2baff366fb2 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 15 Oct 2018 09:24:24 -0500 Subject: [PATCH 060/319] Merge team and set collect; deprecate set collect --- content/shmem_collect.tex | 104 +++++++++++++++++++++++++-------- content/shmem_team_collect.tex | 89 ---------------------------- main_spec.tex | 3 - 3 files changed, 80 insertions(+), 116 deletions(-) delete mode 100644 content/shmem_team_collect.tex diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 2ca75d491..3ae5a106d 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -5,12 +5,34 @@ \begin{apidefinition} +%% C11 +{\color{Green} +\begin{C11synopsis} +void @\FuncDecl{shmem\_collect32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +void @\FuncDecl{shmem\_collect64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +void @\FuncDecl{shmem\_fcollect32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +void @\FuncDecl{shmem\_fcollect64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +\end{C11synopsis} +} + \begin{Csynopsis} +\end{Csynopsis} +{\color{Green} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_team\_collect32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +void @\FuncDecl{shmem\_team\_collect64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +void @\FuncDecl{shmem\_team\_fcollect32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +void @\FuncDecl{shmem\_team\_fcollect64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +\end{CsynopsisCol} +} +\begin{DeprecateBlock} +\begin{CsynopsisCol} void @\FuncDecl{shmem\_collect32}@(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); void @\FuncDecl{shmem\_collect64}@(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); void @\FuncDecl{shmem\_fcollect32}@(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); void @\FuncDecl{shmem\_fcollect64}@(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); -\end{Csynopsis} +\end{CsynopsisCol} +\end{DeprecateBlock} \begin{Fsynopsis} INTEGER nelems @@ -28,20 +50,20 @@ \begin{apiarguments} -\apiargument{OUT}{dest}{A symmetric array. The \dest{} argument must be large enough - to accept the concatenation of the \source{} arrays on all participating \acp{PE}. The data - types are as follows: For \FUNC{shmem\_collect8}, \FUNC{shmem\_collect64}, - \FUNC{shmem\_fcollect8}, and \FUNC{shmem\_fcollect64}, any data type with an - element size of 64 bits. \Fortran derived types, \Fortran character type, - and \CorCpp{} structures are not permitted. For \FUNC{shmem\_collect4}, - \FUNC{shmem\_collect32}, \FUNC{shmem\_fcollect4}, and \FUNC{shmem\_fcollect32}, - any data type with an element size of \CONST{32} bits. \Fortran derived - types, \Fortran character type, and \CorCpp{} structures are not permitted.} +\apiargument{OUT}{dest}{A symmetric array large enough + to accept the concatenation of the \source{} arrays on all participating \acp{PE}. + \newtext{See table below in this description for allowable data types.}} \apiargument{IN}{source}{A symmetric data object that can be of any type permissible for the \dest{} argument.} \apiargument{IN}{nelems}{The number of elements in the \source{} array. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd. When using \Fortran, it must be a default integer value.} + +\newtext{% +\apiargument{IN}{team}{A valid \openshmem team handle to a team which has been + created without disabling support for collective operations.} +} + \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}. \VAR{PE\_start} must be of type integer. When using \Fortran, it must be a default integer value.} @@ -64,30 +86,64 @@ \apidescription{ \openshmem \FUNC{collect} and \FUNC{fcollect} routines concatenate \VAR{nelems} \CONST{64}-bit or \CONST{32}-bit data items from the \source{} array into the - \dest{} array, over the set of \acp{PE} defined by \VAR{PE\_start}, - \VAR{log2PE\_stride}, and \VAR{PE\_size}, in processor number order. The - resultant \dest{} array contains the contribution from \ac{PE} \VAR{PE\_start} - first, then the contribution from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} - second, and so on. The collected result is written to the \dest{} array for all - \acp{PE} in the active set. + \dest{} array, over \newtext{an \openshmem team or active set} + \oldtext{the set of \acp{PE} defined by \VAR{PE\_start}, \VAR{log2PE\_stride}, and \VAR{PE\_size},} %% + in processor number order. The resultant \dest{} array contains the contribution from + {\color{Green} \acp{PE} as follows: + + \begin{itemize} + \item For an active set, the data from \ac{PE} \VAR{PE\_start} is first, then the + contribution from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} second, and so on. + \item For a team, the data from \ac{PE} number \CONST{0} in the team is first, then the + contribution from \ac{PE} \CONST{1} in the team, and so on. + \end{itemize} + + The collected result is written to the \dest{} array for all \acp{PE} + that participate in the collective. The same \dest{} and \source{} + arrays must be passed by all \acp{PE} that participate in the collective. } The \FUNC{fcollect} routines require that \VAR{nelems} be the same value in all participating \acp{PE}, while the \FUNC{collect} routines allow \VAR{nelems} to vary from \ac{PE} to \ac{PE}. - - As with all \openshmem collective routines, each of these routines assumes that + + {\color{Green} + Team-based collect routines operate over all \acp{PE} in the provided team argument. All + \acp{PE} in the provided team must participate in the collective. If a team created without + support for collectives is passed to this or any other team collective routine, the + behavior is undefined. + + Active-set-based broadcast routines operate over all \acp{PE} in the active set + defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. + As with all \newtext{active-set-based} collective routines, + each of these routines assumes that only \acp{PE} in the active set call the routine. If a \ac{PE} not in the - active set and calls this collective routine, the behavior is undefined. + active set and calls this collective routine, the behavior is undefined. } The values of arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} - must be the same value on all \acp{PE} in the active set. The same \dest{} and \source{} - arrays and the same \VAR{pSync} work array must be passed by all \acp{PE} in the - active set. + must be the same value on all \acp{PE} in the active set. The same + \oldtext{\dest{} and \source{} arrays and the same} %% + \VAR{pSync} work array must be passed by all \acp{PE} in the active set. Upon return from a collective routine, the following are true for the local - \ac{PE}: The \dest{} array is updated and the \source{} array may be safely reused. - The values in the \VAR{pSync} array are + \ac{PE}: + \begin{itemize} + \item The \dest{} array is updated and the \source{} array may be safely reused. + \item \newtext{For active-set-based collectives,} the values in the \VAR{pSync} array are restored to the original values. + \end{itemize} +} + +{\color{Green} +\apidesctable{ +The \dest{} and \source{} data objects must conform to certain typing +constraints, which are as follows: +}{Routine}{Data type of \VAR{dest} and \VAR{source}} +\apitablerow{\FUNC{shmem\_collect8}, \FUNC{shmem\_collect64}, \FUNC{shmem\_fcollect8}, \FUNC{shmem\_fcollect64}}% + {Any noncharacter type that has an element size of \CONST{64} bits. No \Fortran derived types nor + \CorCpp{} structures are allowed.} +\apitablerow{\FUNC{shmem\_collect4}, \FUNC{shmem\_collect32}, \FUNC{shmem\_fcollect4}, \FUNC{shmem\_fcollect32}}% + {Any noncharacter type that has an element size of \CONST{32} bits. No \Fortran derived types nor + \CorCpp{} structures are allowed.} } \apireturnvalues{ diff --git a/content/shmem_team_collect.tex b/content/shmem_team_collect.tex deleted file mode 100644 index 7d1ab0eb0..000000000 --- a/content/shmem_team_collect.tex +++ /dev/null @@ -1,89 +0,0 @@ -\apisummary{ - Concatenates blocks of data from multiple \acp{PE} in a team to an array in every - \ac{PE} in the team. -} - -\begin{apidefinition} - -\begin{Csynopsis} -void @\FuncDecl{shmem\_team\_collect32}@(shmem_team_t team, void *dest, const void *source, size_t nelems); -void @\FuncDecl{shmem\_team\_collect64}@(shmem_team_t team, void *dest, const void *source, size_t nelems); -void @\FuncDecl{shmem\_team\_fcollect32}@(shmem_team_t team, void *dest, const void *source, size_t nelems); -void @\FuncDecl{shmem\_team\_fcollect64}@(shmem_team_t team, void *dest, const void *source, size_t nelems); -\end{Csynopsis} - -\begin{apiarguments} - -\apiargument{IN}{team}{A valid \openshmem team handle to a team which has been - created without disabling support for collective operations.} -\apiargument{OUT}{dest}{A symmetric array large enough - to accept the concatenation of the \source{} arrays on all \acp{PE} in the team. - See table below in this description for allowable data types.} -\apiargument{IN}{source}{A symmetric data object that can be of any type permissible - for the \dest{} argument.} -\apiargument{IN}{nelems}{The number of elements in the \source{} array. \VAR{nelems} - must be of type \VAR{size\_t}.} - -\end{apiarguments} - -\apidescription{ - \openshmem \FUNC{team\_collect} and \FUNC{team\_fcollect} are collective routines - over an existing team. These routines concatenate \VAR{nelems} - \CONST{64}-bit or \CONST{32}-bit data items from the \source{} array into the - \dest{} array, over all \acp{PE} in the specified \VAR{team} in processor number order. - The resultant \dest{} array contains the contribution from the first \ac{PE} in the \VAR{team} - , then the contribution from the second \ac{PE} in the \VAR{team}, and so on. - The collected result is written to the \dest{} array for all \acp{PE} in the team. - - The \FUNC{fcollect} routines require that all \acp{PE} in the team provide the same - value for \VAR{nelems}, while the \FUNC{collect} routines allow \VAR{nelems} to - vary from \ac{PE} to \ac{PE}. - - If the team has been created with the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} option, - it will not have the required support structures to complete this routine. If - such a team is passed to this or any other team collective routine, the behavior - is undefined. - - The same \dest{} and \source{} data objects must be passed by all \acp{PE} - in the team. - - Upon return from a collective routine, the following are true for the local - \ac{PE}: - \begin{itemize} - \item The \dest{} array is updated. - \item The \source{} array may be safely reused. - \end{itemize} - - Error checking will be done to detect a value of \LibConstRef{SHMEM\_TEAM\_NULL} passed - for the team argument. In that case, the program will abort with an informative - error message. If an invalid team handle is passed to the routine, - the behavior is undefined. -} - -\apidesctable{ -The \dest{} and \source{} data objects must conform to certain typing -constraints, which are as follows: -}{Routine}{Data type of \VAR{dest} and \VAR{source}} - -\apitablerow{shmem\_team\_collect64, shmem\_team\_fcollect64}{Any noncharacter - type that has an element size of \CONST{64} bits. - \CorCpp{} structures are NOT allowed.} -\apitablerow{shmem\_team\_collect32, shmem\_team\_fcollect32}{Any noncharacter - type that has an element size of \CONST{32} bits. - \CorCpp{} structures are NOT allowed.} - -\apireturnvalues{ - None. -} - -\apinotes{ - All \openshmem team collective routines use symmetric data structures associated - with the team to synchronize and share data. By default, new teams that result from - split operations will have these structures. -} - -\begin{apiexamples} - -\end{apiexamples} - -\end{apidefinition} diff --git a/main_spec.tex b/main_spec.tex index d03a3ff5f..ea8d28f3b 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -338,9 +338,6 @@ \subsubsection{\textbf{SHMEM\_BROADCAST}}\label{subsec:shmem_broadcast} \subsubsection{\textbf{SHMEM\_COLLECT, SHMEM\_FCOLLECT}}\label{subsec:shmem_collect} \input{content/shmem_collect.tex} -\subsubsection{\textbf{SHMEM\_TEAM\_COLLECT, SHMEM\_TEAM\_FCOLLECT}}\label{subsec:shmem_team_collect} -\input{content/shmem_team_collect.tex} - \subsubsection{\textbf{SHMEM\_REDUCTIONS}}\label{subsec:shmem_reductions} \input{content/shmem_reductions.tex} From 1ed6e94d66d80c91cbcf45bb521b38b06ee0a281 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 17 Oct 2018 15:03:29 -0500 Subject: [PATCH 061/319] rename team config num_threads to num_contexts; add default config values --- content/library_constants.tex | 4 +-- content/shmem_team_config_t.tex | 54 +++++++++++++++++++++++++--- content/shmem_team_split_2d.tex | 26 +++----------- content/shmem_team_split_strided.tex | 25 +++---------- 4 files changed, 60 insertions(+), 49 deletions(-) diff --git a/content/library_constants.tex b/content/library_constants.tex index 0c2d4bfba..f68ea2c51 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -60,10 +60,10 @@ \tabularnewline \hline %% \color{Green} -\LibConstDecl{SHMEM\_TEAM\_NUM\_THREADS} & +\LibConstDecl{SHMEM\_TEAM\_NUM\_CONTEXTS} & \color{Green} The bitwise flag which specifies that a team creation routine should use the -\VAR{num\_threads} member of the provided +\VAR{num\_contexts} member of the provided \CTYPE{shmem\_team\_config\_t} configuration parameter. See Sections~\ref{subsec:shmem_team_config_t} and \ref{subsec:shmem_team_split_strided} for more detail about its use. diff --git a/content/shmem_team_config_t.tex b/content/shmem_team_config_t.tex index cd309a5ed..440512bbc 100644 --- a/content/shmem_team_config_t.tex +++ b/content/shmem_team_config_t.tex @@ -8,7 +8,7 @@ typedef struct { int disable_collectives; int return_local_limit; - int num_threads; + int num_contexts; } shmem_team_config_t; \end{Csynopsis} @@ -40,11 +40,57 @@ When its value is zero, the most restrictive parameters are returned; otherwise, the locally restrictive parameters are returned. - The \VAR{num\_threads} member specifies the number of threads that will - create contexts from the new team. - It must have a nonnegative value. + The \VAR{num\_contexts} member specifies the total number of contexts + created from this team that can simultaneously exist. These contexts + may be created in any number of threads. A program + may destroy any number of contexts made from this team and make + any number of new ones so long as the total existing at any point + remains less than \VAR{num\_contexts}. See Section~\ref{sec:ctx} for more on communication contexts and Section~\ref{subsec:shmem_team_create_ctx} for team-based context creation. + + When using the configuration structure to create teams, a mask parameter + controls which fields to use and which to ignore. So, a program does + not have to set all fields in the config struct; only those for which + it does not want the default values. + + A configuration mask value is created by combining individual field + masks with through a bitwise OR operation of the following library constants: + + { + \apitablerow{\LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE}}{ + The team should be created using the value of the + \VAR{disable\_collectives} member of the configuration parameter + \VAR{config}. + } + \apitablerow{\LibConstRef{SHMEM\_TEAM\_LOCAL\_LIMIT}}{ + The team should be created using the value of the + \VAR{return\_local\_limit} member of the configuration parameter + \VAR{config}. + } + \apitablerow{\LibConstRef{SHMEM\_TEAM\_NUM\_CONTEXTS}}{ + The team should be created using the value of the + \VAR{num\_contexts} member of the configuration parameter \VAR{config}. + } + } + + A configuration mask value of \CONST{0} indicates that the team + should be created with the default values for all configuration + parameters, as follows: + + { + \apitablerow{disable\_collectives = \CONST{0}}{ + By default, teams support collective operations + } + \apitablerow{return\_local\_limit = \CONST{0}}{ + By default, when team creation fails, the configuration structure returns the most restrictive + parameter value across all \acp{PE} in the new team + } + \apitablerow{num\_contexts = \CONST{0}}{ + By default, no contexts can be created on a new team + } + } + } \apinotes{ diff --git a/content/shmem_team_split_2d.tex b/content/shmem_team_split_2d.tex index 639d044c3..246a24a7b 100644 --- a/content/shmem_team_split_2d.tex +++ b/content/shmem_team_split_2d.tex @@ -73,28 +73,10 @@ The \VAR{xaxis\_mask} and\VAR{xaxis\_mask} arguments are a bitwise masks representing the set of configuration parameters to use from \VAR{xaxis\_config} and \VAR{yaxis\_config}, respectively. -A mask value of \CONST{0} indicates that all the field members of the -configuration parameter argument should be used. -Individual field masks can be combined through a bitwise OR operation -of the following library constants: - -{ - \apitablerow{\LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE}}{ - The team should be created using the value of the - \VAR{disable\_collectives} member of the respective - configuration parameter. - } - \apitablerow{\LibConstRef{SHMEM\_TEAM\_LOCAL\_LIMIT}}{ - The team should be created using the value of the - \VAR{return\_local\_limit} member of the respective - configuration parameter. - } - \apitablerow{\LibConstRef{SHMEM\_TEAM\_NUM\_THREADS}}{ - The team should be created using the value of the - \VAR{num\_threads} member of the respective - configuration parameter. - } -} +A mask value of \CONST{0} indicates that the team +should be created with the default values for all configuration parameters. +See Section~\ref{subsec:shmem_team_config_t} for field mask names and +default configuration parameters. If \VAR{parent\_team} is an invalid team handle, the behavior is undefined. diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 928b5f0f1..142677e73 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -57,27 +57,10 @@ The \VAR{config\_mask} argument is a bitwise mask representing the set of configuration parameters to use from \VAR{config}. -A \VAR{config\_mask} value of \CONST{0} indicates that all the field members -of \VAR{config} should be used. -Individual field masks can be combined through a bitwise OR operation -of the following library constants: - -{ - \apitablerow{\LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE}}{ - The team should be created using the value of the - \VAR{disable\_collectives} member of the configuration parameter - \VAR{config}. - } - \apitablerow{\LibConstRef{SHMEM\_TEAM\_LOCAL\_LIMIT}}{ - The team should be created using the value of the - \VAR{return\_local\_limit} member of the configuration parameter - \VAR{config}. - } - \apitablerow{\LibConstRef{SHMEM\_TEAM\_NUM\_THREADS}}{ - The team should be created using the value of the - \VAR{num\_threads} member of the configuration parameter \VAR{config}. - } -} +A \VAR{config\_mask} value of \CONST{0} indicates that the team +should be created with the default values for all configuration parameters. +See Section~\ref{subsec:shmem_team_config_t} for field mask names and +default configuration parameters. If \VAR{parent\_team} is an invalid team handle, the behavior is undefined. From 74794a738532f296b3f49179d85e12239f2e7393 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Thu, 25 Oct 2018 10:53:00 -0500 Subject: [PATCH 062/319] Added broadcast return code and error behavior --- content/shmem_broadcast.tex | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 4603ec7b9..57b23a18c 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -8,8 +8,8 @@ %% C11 {\color{Green} \begin{C11synopsis} -void @\FuncDecl{shmem\_broadcast32}@(void *dest, const void *source, size_t nelems, int PE_root, shmem_team_t team); -void @\FuncDecl{shmem\_broadcast64}@(void *dest, const void *source, size_t nelems, int PE_root, shmem_team_t team); +int @\FuncDecl{shmem\_broadcast32}@(void *dest, const void *source, size_t nelems, int PE_root, shmem_team_t team); +int @\FuncDecl{shmem\_broadcast64}@(void *dest, const void *source, size_t nelems, int PE_root, shmem_team_t team); \end{C11synopsis} } @@ -18,8 +18,8 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -void @\FuncDecl{shmem\_team\_broadcast32}@(void *dest, const void *source, size_t nelems, int PE_root, shmem_team_t team); -void @\FuncDecl{shmem\_team\_broadcast64}@(void *dest, const void *source, size_t nelems, int PE_root, shmem_team_t team); +int @\FuncDecl{shmem\_team\_broadcast32}@(void *dest, const void *source, size_t nelems, int PE_root, shmem_team_t team); +int @\FuncDecl{shmem\_team\_broadcast64}@(void *dest, const void *source, size_t nelems, int PE_root, shmem_team_t team); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -82,32 +82,25 @@ \openshmem broadcast routines are collective routines \newtext{over an active set or existing \openshmem team}. They copy data object \source{} on the processor specified by \VAR{PE\_root} and store the values at - \dest{} on the other \acp{PE} \newtext{particpating in the collective.} + \dest{} on the other \acp{PE} \newtext{particpating in the collective operation.} \oldtext{specified by the triplet \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size}.} %% The data is not copied to the \dest{} area on the root \ac{PE}. {\color{Green} The same \dest{} and \source{} data objects and the same value of \VAR{PE\_root} must be - passed by all \acp{PE} particpating in the collective. + passed by all \acp{PE} particpating in the collective operation. Team-based broadcast routines operate over all \acp{PE} in the provided team argument. All - \acp{PE} in the provided team must participate in the reduction. - - If the team has been created with the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} option, - it will not have the required support structures to complete this routine. If - such a team is passed to this or any other team collective routine, the behavior - is undefined. + \acp{PE} in the provided team must participate in the reduction. If a team created without + support for collectives is passed to this or any other team collective routine, the + behavior is undefined. If an invalid team handle or \LibConstRef{SHMEM\_TEAM\_NULL} + is passed to this routine, the behavior is undefined. As with all team-based \openshmem routines, \ac{PE} numbering is relative to the team. The specified root \ac{PE} must be a valid \ac{PE} number for the team, between \CONST{0} and \VAR{N-1}, where \VAR{N} is the size of the team. - Error checking will be done to detect a value of \LibConstRef{SHMEM\_TEAM\_NULL} passed - for the team argument. In that case, the program will abort with an informative - error message. If an invalid team handle is passed to the routine, - the behavior is undefined. - Active-set-based broadcast routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. } @@ -158,7 +151,7 @@ \CorCpp{} structures are allowed.} \apireturnvalues{ - None. + Integer error code with currently undefined behavior. } \apinotes{ @@ -177,6 +170,11 @@ active set are still processing a prior \openshmem broadcast routine call that used the same \VAR{pSync} array. In general, this can be ensured only by doing some type of synchronization. + + Team handle error checking and integer return codes are currently undefined. + Implementations may define these behaviors as needed, but programs should + ensure portability by doing their own checks for invalid team handles and for + \LibConstRef{SHMEM\_TEAM\_NULL}. } \begin{apiexamples} From 6d618dd862b87ed471567869daa5ea9b92115e9d Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Thu, 25 Oct 2018 10:57:07 -0500 Subject: [PATCH 063/319] Fix typo in broadcast description --- content/shmem_broadcast.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 57b23a18c..6f63ef9af 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -91,7 +91,7 @@ passed by all \acp{PE} particpating in the collective operation. Team-based broadcast routines operate over all \acp{PE} in the provided team argument. All - \acp{PE} in the provided team must participate in the reduction. If a team created without + \acp{PE} in the provided team must participate in the operation. If a team created without support for collectives is passed to this or any other team collective routine, the behavior is undefined. If an invalid team handle or \LibConstRef{SHMEM\_TEAM\_NULL} is passed to this routine, the behavior is undefined. @@ -117,7 +117,7 @@ Before any \ac{PE} calls a broadcast routine, the following conditions must be ensured: \begin{itemize} - \item The \dest{} array on all \acp{PE} \newtext{participating in the reduction} + \item The \dest{} array on all \acp{PE} \newtext{participating in the broadcast} \oldtext{in the active set} %% is ready to accept the broadcast data. \item \newtext{If using active-set-based routines,} the From cbeeb7264c3438a6e004e19d3a480fac5dcb13de Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Thu, 1 Nov 2018 13:17:56 -0500 Subject: [PATCH 064/319] Added description of expectation of error handling --- content/error_handling.tex | 10 ++++++++++ main_spec.tex | 6 ++++-- 2 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 content/error_handling.tex diff --git a/content/error_handling.tex b/content/error_handling.tex new file mode 100644 index 000000000..b5d5e3dd1 --- /dev/null +++ b/content/error_handling.tex @@ -0,0 +1,10 @@ +In many cases, \openshmem routines will guarantee the correct completion of operations without any need for programs to check for error states, diagnose system problems, or retry operations. For example, there are no error codes returned for remote memory operations. The implementation is expected to internally attempt any feasible checking and recovery to best guarantee completion as specified. However, there are also cases where routines return error codes to allow programs to detect problems that may be correctable at the application layer, e.g. invalid arguments to routines or requests for system resources that cannot be fulfilled at runtime. + +\CorCpp routines that return an integer error code follow the convention that \CONST{0} indicates successful local completion of the operation. This is considered a best effort of the implementation to indicate that all required local operations have been performed correctly inside the routine and the internal \openshmem state on the calling \ac{PE} is consistent with the description of the routine and its arguments upon completion. Implementations may use an integer return value from a routine to define other integer error codes specific to the implementation as long as those codes are not already explicitly defined for that routine. + +Because \openshmem defines asynchronous communication operations, errors may not arise until sometime after the error-generating routine has returned control to the calling program. In these cases, the implementation might abort the application with an informative message or take other appropriate actions. So, a return value indicating success in a routine cannot be considered a guarantee that the routine will complete all future actions successfully. + +Collective operations involving many \acp{PE} may return values indicating success while other \acp{PE} are still executing the collective operation. So, return values indicating success of a collective routine on one \ac{PE} do not by default indicate that all \acp{PE} involved in the collective operation will return from the routine successfully. If a collective routine provides any such global error checking behavior, it will be explicitly stated in the description of that routine. + +\openshmem implementations for high performance production environments may do minimal internal error checking for performance reasons. Also, some errors may be undetectable or uncorrectable due to limitations in underlying technologies, resulting in undefined behaviors. For these reasons, \openshmem does not provide strict error checking guarantees in most cases. For \CorCpp routines returning integer error codes, implementations are expected to detect certain error conditions as explicitly defined in individual routines, then make best efforts to detect other problems in routine completion and return implementation specific nonzero error codes where feasible. + diff --git a/main_spec.tex b/main_spec.tex index d03a3ff5f..bb0a03d2f 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -34,8 +34,10 @@ \section{Library Handles}\label{subsec:library_handles} \section{Environment Variables }\label{subsec:environment_variables} \input{content/environment_variables} - - +{\color{Green} +\section{Error Handling}\label{subsec:error_handling} +\input{content/error_handling} +} \clearpage From 78633a294bc503228e81eab0bf1b72b34090dadc Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Fri, 2 Nov 2018 10:37:23 -0500 Subject: [PATCH 065/319] Deprecate shmem_barrier, shmem_barrier_all --- content/shmem_barrier.tex | 11 ++++++++++- content/shmem_barrier_all.tex | 8 ++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/content/shmem_barrier.tex b/content/shmem_barrier.tex index ad02a8cbb..c9ec30512 100644 --- a/content/shmem_barrier.tex +++ b/content/shmem_barrier.tex @@ -1,3 +1,4 @@ +\begin{DeprecateBlock} \apisummary{ Performs all operations described in the \FUNC{shmem\_barrier\_all} interface but with respect to a subset of \acp{PE} defined by the active set. @@ -38,7 +39,7 @@ \apidescription{ \FUNC{shmem\_barrier} is a collective synchronization routine over an - active set. Control returns from \FUNC{shmem\_barrier} after all \acp{PE} in + active set. Control returns from \FUNC{shmem\_barrier} after all \acp{PE} in the active set (specified by \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size}) have called \FUNC{shmem\_barrier}. @@ -56,6 +57,13 @@ The same \VAR{pSync} array may be reused on consecutive calls to \FUNC{shmem\_barrier} if the same active set is used. + +{\color{Green} + \FUNC{shmem\_barrier} has been deprecated in favor of the equivalent + call to \FUNC{shmem\_quiet} followed by a call to + \FUNC{shmem\_sync} on a team or active set with the desired + set of \acp{PE}. +} } \apireturnvalues{ @@ -99,3 +107,4 @@ \end{apiexamples} \end{apidefinition} +\end{DeprecateBlock} diff --git a/content/shmem_barrier_all.tex b/content/shmem_barrier_all.tex index 8392054bf..9f405069f 100644 --- a/content/shmem_barrier_all.tex +++ b/content/shmem_barrier_all.tex @@ -1,3 +1,4 @@ +\begin{DeprecateBlock} \apisummary{ Registers the arrival of a \ac{PE} at a barrier and blocks the \ac{PE} until all other \acp{PE} arrive at the barrier and all local @@ -33,6 +34,12 @@ \ac{RMA} routine calls such as \FUNC{shmem\_int\_add}, \FUNC{shmem\_put32}, \FUNC{shmem\_put\_nbi}, and \FUNC{shmem\_get\_nbi}. + +{\color{Green} + \FUNC{shmem\_barrier} has been deprecated in favor of the equivalent + call to \FUNC{shmem\_quiet} followed by a call to + \FUNC{shmem\_sync(SHMEM\_TEAM\_WORLD)}. +} } \apireturnvalues{ @@ -59,3 +66,4 @@ \end{apiexamples} \end{apidefinition} +\end{DeprecateBlock} From c36bd24b2bfbc6df59033fb5660fdbd2292d5428 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Fri, 2 Nov 2018 10:38:05 -0500 Subject: [PATCH 066/319] Update description of shmem_sync adding shmem_team_sync --- content/shmem_sync.tex | 67 ++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 22 deletions(-) diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 1fa319d58..acd51db97 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -1,33 +1,34 @@ \apisummary{ - Performs all operations described in the \FUNC{shmem\_sync\_all} interface - but with respect to a subset of \acp{PE} defined by \oldtext{the} - \newtext{a team or} active set. + \newtext{Registers the arrival of a \ac{PE} at a synchronization point and suspends \ac{PE} + execution until all other \acp{PE} in a given \openshmem team or active set + arrive at the same synchronization point.} } \begin{apidefinition} {\color{ForestGreen} \begin{C11synopsis} -void @\FuncDecl{shmem\_sync}@(shmem_team_t team); +int @\FuncDecl{shmem\_sync}@(shmem_team_t team); \end{C11synopsis} -} -% These separate synopses should be merged when the color highlighting removed, -% preferably with shmem_ctx_barrier listed first. \begin{Csynopsis} -void @\FuncDecl{shmem\_sync}@(int PE_start, int logPE_stride, int PE_size, long *pSync); +int @\FuncDecl{shmem\_team\_sync}@(shmem_team_t team); \end{Csynopsis} -{\color{ForestGreen} +} + +\begin{DeprecateBlock} \begin{CsynopsisCol} -void @\FuncDecl{shmem\_team\_sync}@(shmem_team_t team); +void @\FuncDecl{shmem\_sync}@(int PE_start, int logPE_stride, int PE_size, long *pSync); \end{CsynopsisCol} -} +\end{DeprecateBlock} \begin{apiarguments} \newtext{% \apiargument{IN}{team}{The team over which to perform the operation.}% } + +\begin{DeprecateBlock} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}. \VAR{PE\_start} must be of type integer.} \apiargument{IN}{logPE\_stride}{The log (base 2) of the stride between @@ -39,23 +40,36 @@ of type \CTYPE{long} and size \CONST{SHMEM\_BARRIER\_SYNC\_SIZE}. Every element of this array must be initialized to \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the active set enter \FUNC{shmem\_sync} the first time.} +\end{DeprecateBlock} \end{apiarguments} \apidescription{ \FUNC{shmem\_sync} is a collective synchronization routine over - \newtext {a team or} an active set. - Control returns from \FUNC{shmem\_sync} after all \acp{PE} in - the \newtext{specified team or} active set \oldtext{(specified by \VAR{PE\_start}, \VAR{logPE\_stride}, and - \VAR{PE\_size})} have called \FUNC{shmem\_sync}. - \newtext{ - An active set is specified by the triple of values: \VAR{PE\_start}, - \VAR{logPE\_stride}, and \VAR{PE\_size}. - } + \newtext{an existing \openshmem team or} an active set + +{\color{Green} + The routine registers the arrival of a \ac{PE} at a synchronization point in the program. + This is a fast mechanism for synchronizing all \acp{PE} that participate in this + collective call. The routine blocks the calling \ac{PE} until all \ac{PE} in the + specified team or active set have called \FUNC{shmem\_sync}. In a multithreaded \openshmem + program, only the calling thread is blocked. + + Team-based sync routines operate over all \acp{PE} in the provided team argument. All + \acp{PE} in the provided team must participate in the sync operation. If a team created without + support for collectives is passed to this or any other team collective routine, the + behavior is undefined. If an invalid team handle or \LibConstRef{SHMEM\_TEAM\_NULL} + is passed to this routine, the behavior is undefined. + + Active-set-based sync routines operate over all \acp{PE} in the active set + defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. +} - As with all \oldtext{\openshmem} \newtext{active set-based} collective routines, each of these routines assumes + As with all \oldtext{\openshmem} \newtext{active set-based} collective routines, + each of these routines assumes that only \acp{PE} in the active set call the routine. If a \ac{PE} not in - the active set calls an \openshmem collective routine, the behavior is undefined. + the active set calls an \oldtext{\openshmem} \newtext{active set-based} collective routine, + the behavior is undefined. The values of arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be equal on all \acp{PE} in the active set. The same @@ -70,10 +84,19 @@ } \apireturnvalues{ - None. + \newtext{Zero on successful local completion. Nonzero otherwise.} } \apinotes{ + +\newtext{% + There are no specifically defined error codes for sync operations. + See section \ref{subsec:error_handling} for expected error checking and + return code behavior specific to implementations. For portable + error checking and debugging behavior, programs should do their own checks + for invalid team handles or \LibConstRef{SHMEM\_TEAM\_NULL} + } + If the \VAR{pSync} array is initialized at run time, another method of synchronization (e.g., \FUNC{shmem\_sync\_all}) must be used before the initial use of that \VAR{pSync} array by \FUNC{shmem\_sync}. From 9e79550db072dcc31bb56f4eb31db87735460a48 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Fri, 2 Nov 2018 10:38:31 -0500 Subject: [PATCH 067/319] Deprecate shmem_sync_all --- content/shmem_sync_all.tex | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/content/shmem_sync_all.tex b/content/shmem_sync_all.tex index 8d6c95244..6179796c9 100644 --- a/content/shmem_sync_all.tex +++ b/content/shmem_sync_all.tex @@ -1,6 +1,7 @@ +\begin{DeprecateBlock} \apisummary{ - Registers the arrival of a \ac{PE} at a barrier and suspends \ac{PE} - execution until all other \acp{PE} arrive at the barrier. + \newtext{Performs all operations described in the \FUNC{shmem\_sync} interface + but implicitly operates on \LibConstRef{SHMEM\_TEAM\_WORLD}.} } \begin{apidefinition} @@ -16,16 +17,19 @@ \end{apiarguments} \apidescription{ - The \FUNC{shmem\_sync\_all} routine registers the arrival of a \ac{PE} at a - barrier. Barriers are a fast mechanism for synchronizing all \acp{PE} at - once. This routine blocks the \ac{PE} until all \acp{PE} have called - \FUNC{shmem\_sync\_all}. In a multithreaded \openshmem +{\color{Green} + This routine blocks the \ac{PE} until all \acp{PE} in the \openshmem + program have called \FUNC{shmem\_sync\_all}. In a multithreaded \openshmem program, only the calling thread is blocked. In contrast with the \FUNC{shmem\_barrier\_all} routine, \FUNC{shmem\_sync\_all} only ensures completion and visibility of previously issued memory stores and does not ensure completion of remote memory updates issued via \openshmem routines. + + The \FUNC{shmem\_sync\_all} routine is deprecated in favor of the equivalent call to + \FUNC{shmem\_sync(SHMEM\_TEAM\_WORLD)}. +} } \apireturnvalues{ @@ -33,11 +37,8 @@ } \apinotes{ - The \FUNC{shmem\_sync\_all} routine can be used to portably ensure that - memory access operations observe remote updates in the order enforced by the - initiator \acp{PE}, provided that the initiator PE ensures completion of remote - updates with a call to \FUNC{shmem\_quiet} prior to the call to the - \FUNC{shmem\_sync\_all} routine. + None. } \end{apidefinition} +\end{DeprecateBlock} From ce1620c5ffde91075dc8a50ab1e0ed08ba42a523 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Fri, 2 Nov 2018 10:39:02 -0500 Subject: [PATCH 068/319] Swap order of shmem_sync and shmem_sync_all in doc --- main_spec.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/main_spec.tex b/main_spec.tex index bb0a03d2f..9c60c3537 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -325,12 +325,12 @@ \subsubsection{\textbf{SHMEM\_BARRIER\_ALL}}\label{subsec:shmem_barrier_all} \subsubsection{\textbf{SHMEM\_BARRIER}}\label{subsec:shmem_barrier} \input{content/shmem_barrier.tex} -\subsubsection{\textbf{SHMEM\_SYNC\_ALL}}\label{subsec:shmem_sync_all} -\input{content/shmem_sync_all.tex} - \subsubsection{\textbf{SHMEM\_SYNC}}\label{subsec:shmem_sync} \input{content/shmem_sync.tex} +\subsubsection{\textbf{SHMEM\_SYNC\_ALL}}\label{subsec:shmem_sync_all} +\input{content/shmem_sync_all.tex} + \subsubsection{\textbf{SHMEM\_TEAM\_BROADCAST}}\label{subsec:shmem_team_broadcast} \input{content/shmem_team_broadcast.tex} From 46318dd072086ef9ac21272b358fa28feaf9f120 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Fri, 2 Nov 2018 16:11:49 -0500 Subject: [PATCH 069/319] Added return codes to collect routines --- content/shmem_collect.tex | 44 ++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 3ae5a106d..b7a456461 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -8,10 +8,10 @@ %% C11 {\color{Green} \begin{C11synopsis} -void @\FuncDecl{shmem\_collect32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); -void @\FuncDecl{shmem\_collect64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); -void @\FuncDecl{shmem\_fcollect32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); -void @\FuncDecl{shmem\_fcollect64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_collect32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_collect64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_fcollect32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_fcollect64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); \end{C11synopsis} } @@ -19,10 +19,10 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -void @\FuncDecl{shmem\_team\_collect32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); -void @\FuncDecl{shmem\_team\_collect64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); -void @\FuncDecl{shmem\_team\_fcollect32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); -void @\FuncDecl{shmem\_team\_fcollect64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_team\_collect32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_team\_collect64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_team\_fcollect32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_team\_fcollect64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -83,13 +83,13 @@ \end{apiarguments} -\apidescription{ +\apidescription{ +{\color{Green} \openshmem \FUNC{collect} and \FUNC{fcollect} routines concatenate \VAR{nelems} \CONST{64}-bit or \CONST{32}-bit data items from the \source{} array into the - \dest{} array, over \newtext{an \openshmem team or active set} - \oldtext{the set of \acp{PE} defined by \VAR{PE\_start}, \VAR{log2PE\_stride}, and \VAR{PE\_size},} %% + \dest{} array, over an \openshmem team or active set in processor number order. The resultant \dest{} array contains the contribution from - {\color{Green} \acp{PE} as follows: + \acp{PE} as follows: \begin{itemize} \item For an active set, the data from \ac{PE} \VAR{PE\_start} is first, then the @@ -100,13 +100,14 @@ The collected result is written to the \dest{} array for all \acp{PE} that participate in the collective. The same \dest{} and \source{} - arrays must be passed by all \acp{PE} that participate in the collective. } + arrays must be passed by all \acp{PE} that participate in the collective. +} The \FUNC{fcollect} routines require that \VAR{nelems} be the same value in all participating \acp{PE}, while the \FUNC{collect} routines allow \VAR{nelems} to vary from \ac{PE} to \ac{PE}. - {\color{Green} +{\color{Green} Team-based collect routines operate over all \acp{PE} in the provided team argument. All \acp{PE} in the provided team must participate in the collective. If a team created without support for collectives is passed to this or any other team collective routine, the @@ -114,10 +115,11 @@ Active-set-based broadcast routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. - As with all \newtext{active-set-based} collective routines, + As with all active-set-based collective routines, each of these routines assumes that only \acp{PE} in the active set call the routine. If a \ac{PE} not in the - active set and calls this collective routine, the behavior is undefined. } + active set and calls this collective routine, the behavior is undefined. +} The values of arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be the same value on all \acp{PE} in the active set. The same @@ -147,10 +149,18 @@ } \apireturnvalues{ - None. + \newtext{Zero on successful local completion. Nonzero otherwise.} } \apinotes{ +\newtext{% + There are no specifically defined error codes for sync operations. + See section \ref{subsec:error_handling} for expected error checking and + return code behavior specific to implementations. For portable + error checking and debugging behavior, programs should do their own checks + for invalid team handles or \LibConstRef{SHMEM\_TEAM\_NULL}. +} + All \openshmem collective routines reset the values in \VAR{pSync} before they return, so a particular \VAR{pSync} buffer need only be initialized the first time it is used. From 3b54a63ae74784b56b6565b7ed82ec9965d871b9 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 5 Nov 2018 10:39:35 -0600 Subject: [PATCH 070/319] Added return codes for reduction routines --- content/shmem_reductions.tex | 71 ++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 41744c9a9..b1d65b73e 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -5,8 +5,10 @@ \begin{apidefinition} + \begin{table}[h] \begin{center} +{\color{Green} \begin{tabular}{|l|l|l|l|l|} \hline \TYPE & \TYPENAME & \multicolumn{3}{c|}{Operations Supporting \TYPE}\\ \hline @@ -22,16 +24,18 @@ \end{tabular} \TableCaptionRef{Reduction Types, Names and Supporting Operations} \label{reducetypes} +} \end{center} \end{table} + \paragraph{AND} Performs a bitwise AND reduction across a set of \acp{PE}.\newline %% C11 {\color{Green} \begin{C11synopsis} -void @\FuncDecl{shmem\_and\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_and\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); \end{C11synopsis} where \TYPE{} is one of the integer types supported for the AND operation as specified by Table \ref{reducetypes}. } @@ -41,7 +45,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -void @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_and\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_and\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -64,7 +68,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -void @\FuncDecl{shmem\_or\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_or\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); \end{C11synopsis} where \TYPE{} is one of the integer types supported for the OR operation as specified by Table \ref{reducetypes}. } @@ -74,7 +78,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -void @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_or\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_or\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -97,7 +101,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -void @\FuncDecl{shmem\_xor\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_xor\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); \end{C11synopsis} where \TYPE{} is one of the integer types supported for the XOR operation as specified by Table \ref{reducetypes}. } @@ -107,7 +111,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -void @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_xor\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_xor\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -130,7 +134,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -void @\FuncDecl{shmem\_max\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_max\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); \end{C11synopsis} where \TYPE{} is one of the integer or real types supported for the MAX operation as specified by Table \ref{reducetypes}. } @@ -140,7 +144,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -void @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_max\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_max\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -165,7 +169,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -void @\FuncDecl{shmem\_min\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_min\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); \end{C11synopsis} where \TYPE{} is one of the integer or real types supported for the MIN operation as specified by Table \ref{reducetypes}. } @@ -175,7 +179,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -void @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_min\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_min\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -200,7 +204,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -void @\FuncDecl{shmem\_sum\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_sum\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); \end{C11synopsis} where \TYPE{} is one of the integer, real, or complex types supported for the SUM operation as specified by Table \ref{reducetypes}. } @@ -210,7 +214,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -void @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_sum\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_sum\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -237,7 +241,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -void @\FuncDecl{shmem\_prod\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_prod\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); \end{C11synopsis} where \TYPE{} is one of the integer, real, or complex types supported for the PROD operation as specified by Table \ref{reducetypes}. } @@ -247,7 +251,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -void @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_prod\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_prod\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -330,27 +334,22 @@ must be passed to all \acp{PE} \newtext{participating in the reduction.} \oldtext{in the active set.} %% - {\color{Green} +{\color{Green} Team-based reduction routines operate over all \acp{PE} in the provided team argument. All - \acp{PE} in the provided team must participate in the reduction. - - If the team has been created with the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} option, - it will not have the required support structures to complete this routine. If - such a team is passed to this or any other team collective routine, the behavior - is undefined. - } + \acp{PE} in the provided team must participate in the reduction. If a team created without + support for collectives is passed to this or any other team collective routine, the + behavior is undefined. If an invalid team handle or \LibConstRef{SHMEM\_TEAM\_NULL} + is passed to this routine, the behavior is undefined. - \newtext{Active-set-based reduction routines operate over all \acp{PE} in} the active set - \oldtext{is} %% + Active-set-based sync routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. - - As with all \newtext{active-set-based} - \oldtext{\openshmem} %% - collective routines, each of these routines assumes +} + + As with all \oldtext{\openshmem} \newtext{active set-based} collective routines, + each of these routines assumes that only \acp{PE} in the active set call the routine. If a \ac{PE} not in - the active set calls an \newtext{active-set-based} - \oldtext{\openshmem} %% - collective routine, the behavior is undefined. + the active set calls an \oldtext{\openshmem} \newtext{active set-based} collective routine, + the behavior is undefined. The values of arguments \VAR{nreduce}, \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be equal on all \acp{PE} in the active set. @@ -426,10 +425,18 @@ \apireturnvalues{ - None. + \newtext{Zero on successful local completion. Nonzero otherwise.} } \apinotes{ +\newtext{% + There are no specifically defined error codes for this routine. + See section \ref{subsec:error_handling} for expected error checking and + return code behavior specific to implementations. For portable + error checking and debugging behavior, programs should do their own checks + for invalid team handles or \LibConstRef{SHMEM\_TEAM\_NULL} + } + All \openshmem reduction routines reset the values in \VAR{pSync} before they return, so a particular \VAR{pSync} buffer need only be initialized the first time it is used. The user must ensure that the \VAR{pSync} array is not being updated on any \ac{PE} From f9e1b06376c9279baa109da01b4d0b5291029380 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 5 Nov 2018 10:42:01 -0600 Subject: [PATCH 071/319] Fix typo in collectives API notes --- content/shmem_collect.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index b7a456461..e6487f494 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -154,7 +154,7 @@ \apinotes{ \newtext{% - There are no specifically defined error codes for sync operations. + There are no specifically defined error codes for these routines. See section \ref{subsec:error_handling} for expected error checking and return code behavior specific to implementations. For portable error checking and debugging behavior, programs should do their own checks From ba86f3e1a5be0f78b0127d5d61710fd701986145 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 5 Nov 2018 10:57:52 -0600 Subject: [PATCH 072/319] Update notes in broadcast to match error checking section --- content/shmem_broadcast.tex | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 6f63ef9af..98c7a33ec 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -105,10 +105,11 @@ defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. } - As with all \newtext{active-set-based} \openshmem collective routines, + As with all \newtext{active-set-based} \oldtext{\openshmem} collective routines, each of these routines assumes that only \acp{PE} in the active set call the routine. If a \ac{PE} not in the - active set calls an \openshmem collective routine, the behavior is undefined. + active set calls an \newtext{active-set-based} \oldtext{\openshmem} + collective routine, the behavior is undefined. The values of arguments \VAR{PE\_root}, \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be the same value on all \acp{PE} in the active set. @@ -151,10 +152,18 @@ \CorCpp{} structures are allowed.} \apireturnvalues{ - Integer error code with currently undefined behavior. + \newtext{Zero on successful local completion. Nonzero otherwise.} } \apinotes{ +\newtext{% + There are no specifically defined error codes for these routines. + See section \ref{subsec:error_handling} for expected error checking and + return code behavior specific to implementations. For portable + error checking and debugging behavior, programs should do their own checks + for invalid team handles or \LibConstRef{SHMEM\_TEAM\_NULL} + } + All \openshmem broadcast routines restore \VAR{pSync} to its original contents. Multiple calls to \openshmem routines that use the same \VAR{pSync} array do not require that \VAR{pSync} be reinitialized after the first call. From 215a0b39bb865023975aeb56030781f792f55f9d Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 5 Nov 2018 13:19:54 -0600 Subject: [PATCH 073/319] Add text to collectives intro for teams routines --- content/collective_intro.tex | 99 +++++++++++++++++++++++++++++++----- 1 file changed, 85 insertions(+), 14 deletions(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index 3e15b888f..5365794f4 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -1,39 +1,97 @@ -\emph{Collective routines} are defined as communication or synchronization -operations on a group of \acp{PE} called an active set. The collective -routines require all \acp{PE} in the active set to simultaneously call the +\emph{Collective routines} are defined as \newtext{coordinated} communication or synchronization +operations \oldtext{on} \newtext{performed by} a group of \acp{PE} \oldtext{called an active set}. + +{\color{Green} +\openshmem provides two types of collective routines: + +\begin{enumerate} +\item Collective routines that operate on teams use a team handle parameter to determine +which \acp{PE} will participate in the routine, and use resources encapsulated by the team object +to perform operations. See Section~\ref{subsec:team} for details on team management. +These routines will be the standard for \openshmem moving forward. +\item Collective routines that operate on active sets use a set of parameters to determine +which \acp{PE} will participate and what resources are used to perform operations. These routines +are the legacy API for collectives which will be deprecated and phased out of +implementations moving forward. +\end{enumerate} + +Collective routines with no team or active set parameters are deprecated, +and implicitly operate on the team consisting of all \acp{PE} in the computation, +\LibHandleRef{SHMEM\_TEAM\_WORLD} + +The team-based collective routines are performed with respect to a valid +\openshmem team, which is specified by a team handle argument. +Team-based collective operations require all \acp{PE} in the team to call +the routine in order for the operation to complete. Team-based collective routines +should not be passed team handles to teams created with a configuration +that disables support for collective operations. If such a team +or \LibConstRef{SHMEM\_TEAM\_NULL} is passed to a team-based collective +routine, the behavior is undefined. + +Team objects encapsulate the system resources required to complete team-based collective routines. +On completion of a team-based collective call, the team resources on the calling +\ac{PE} will be ready for the next collective call. However, other \acp{PE} in the +team may still be participating in the collective call, and therefore team +resources may still be in use on some \acp{PE} in the team after others have returned from +the collective routine. Before a subsequent call to a collective routine by the team, +the previous collective operation must be complete on all \acp{PE} in the team, +which can be ensured by a call to a synchronization routine, like \FUNC{shmem\_sync}, +by the team. + +The team-based collective routines defined in the \openshmem Specification are: + +\begin{itemize} +\item \FUNC{shmem\_team\_sync} +\item \FUNC{shmem\_team\_broadcast\{32, 64\}} +\item \FUNC{shmem\_team\_collect\{32, 64\}} +\item \FUNC{shmem\_team\_fcollect\{32, 64\}} +\item Reductions for the following operations: AND, MAX, MIN, SUM, PROD, OR, XOR +\item \FUNC{shmem\_team\_alltoall\{32, 64\}} +\item \FUNC{shmem\_team\_alltoalls\{32, 64\}} +\end{itemize} + +The deprecated function \FUNC{shmem\_sync\_all} is provided for backward compatibility to synchronize +all \acp{PE} in the computation. This should be replaced in applications by the equivalent +\FUNC{shmem\_sync(SHMEM\_TEAM\_WORLD)}. +} + +\begin{DeprecateBlock} +The \newtext{active-set-based} collective routines require all \acp{PE} +in the active set to simultaneously call the routine. A \ac{PE} that is not in the active set calling the collective -routine results in undefined behavior. All collective routines have an +routine results in undefined behavior. \oldtext{All collective routines have an active set as an input parameter except \FUNC{shmem\_barrier\_all} and \FUNC{shmem\_sync\_all}. Both \FUNC{shmem\_barrier\_all} and -\FUNC{shmem\_sync\_all} must be called by all \acp{PE} of the \openshmem program. +\FUNC{shmem\_sync\_all} must be called by all \acp{PE} of the \openshmem program.} The active set is defined by the arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size}. \VAR{PE\_start} specifies the starting \ac{PE} number and -is the lowest numbered PE in the active set. The stride between successive +is the lowest numbered \ac{PE} in the active set. The stride between successive \acp{PE} in the active set is $2^{logPE\_stride}$ and \VAR{logPE\_stride} must be greater than or equal to zero. \VAR{PE\_size} specifies the number of \acp{PE} in the active set and must be greater than zero. The active set must satisfy the requirement that its last member corresponds to a valid \ac{PE} number, that is $0 \le PE\_start + (PE\_size - 1) * 2^{logPE\_stride} < npes$. -All \acp{PE} participating in the collective routine must provide the same + +All \acp{PE} participating in the \newtext{active-set-based} collective routine must provide the same values for these arguments. If any of these requirements are not met, the behavior is undefined. -Another argument important to collective routines is \VAR{pSync}, which is a -symmetric work array. All \acp{PE} participating in a collective must pass the -same \VAR{pSync} array. On completion of a collective call, the \VAR{pSync} is +Another argument important to \newtext{active-set-based} collective routines is \VAR{pSync}, which is a +symmetric work array. All \acp{PE} participating in an \newtext{active-set-based} collective must pass the +same \VAR{pSync} array. On completion of \newtext{such} a collective call, the \VAR{pSync} is restored to its original contents. The user is permitted to reuse a \VAR{pSync} array if all previous collective routines using the \VAR{pSync} array have been -completed by all participating \acp{PE}. One can use a synchronization -collective routine such as \FUNC{shmem\_barrier} to ensure completion of previous collective +completed by all participating \acp{PE}. One can use a synchronization +collective routine such as \FUNC{shmem\_barrier} to ensure completion of previous \newtext{active-set-based} collective routines. The \FUNC{shmem\_barrier} and \FUNC{shmem\_sync} routines allow the same \VAR{pSync} array to be used on consecutive calls as long as the \acp{PE} in the active set do not change. All collective routines defined in the Specification are blocking. The -collective routines return on completion. The collective routines defined in -the \openshmem Specification are: +collective routines return on completion. The \newtext{active-set-based} collective +routines defined in the \openshmem Specification are: \begin{itemize} \item \FUNC{shmem\_barrier\_all} @@ -47,3 +105,16 @@ \item \FUNC{shmem\_alltoall\{32, 64\}} \item \FUNC{shmem\_alltoalls\{32, 64\}} \end{itemize} + +{\color{Green} +The active-set-based \FUNC{shmem\_barrier} and routine has been deprecated and +no team-based barrier routines will be defined. In future, the behavior +previously provided by \FUNC{shmem\_barrier} should be realized by first calling +\FUNC{shmem\_ctx\_quiet} on any relevant communication contexts followed by a call +to \FUNC{shmem\_sync} by some \openshmem team. + +Calls to \FUNC{shmem\_barrier\_all} +should be replaced with a call to quiet the default communication context followed +by a call to \FUNC{shmem\_sync} by \LibHandleRef{SHMEM\_TEAM\_WORLD}. +} +\end{DeprecateBlock} From 54fe60a8a64d94eeec3f0823d92132b7774de8b3 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 6 Nov 2018 11:22:20 -0600 Subject: [PATCH 074/319] Update description of alltoall for teams --- content/shmem_alltoall.tex | 101 +++++++++++++++++++++++++++++-------- 1 file changed, 81 insertions(+), 20 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 35e3d5108..ce68d0f94 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -1,14 +1,32 @@ \apisummary{ - shmem\_alltoall is a collective routine where each \ac{PE} exchanges a fixed amount of data with all other \acp{PE} in the - active set. + shmem\_alltoall is a collective routine where each \ac{PE} exchanges a fixed amount of data with all other \acp{PE} \oldtext{in the active set} \newtext{participating in the collective}. } \begin{apidefinition} +%% C11 +{\color{Green} +\begin{C11synopsis} +int @\FuncDecl{shmem\_alltoall32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_alltoall64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +\end{C11synopsis} +} + \begin{Csynopsis} +\end{Csynopsis} +{\color{Green} +\begin{CsynopsisCol} +int @\FuncDecl{shmem\_team\_alltoall32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_team\_alltoall64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +\end{CsynopsisCol} +} + +\begin{DeprecateBlock} +\begin{CsynopsisCol} void @\FuncDecl{shmem\_alltoall32}@(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); void @\FuncDecl{shmem\_alltoall64}@(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); -\end{Csynopsis} +\end{CsynopsisCol} +\end{DeprecateBlock} \begin{Fsynopsis} INTEGER pSync(SHMEM_ALLTOALL_SYNC_SIZE) @@ -28,6 +46,13 @@ \apiargument{IN}{nelems}{The number of elements to exchange for each \ac{PE}. \VAR{nelems} must be of type size\_t for \CorCpp. When using \Fortran, it must be a default integer value.} + +\newtext{% +\apiargument{IN}{team}{A valid \openshmem team handle to a team which has been + created without disabling support for collective operations.} +} + +\begin{DeprecateBlock} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}. \VAR{PE\_start} must be of type integer. When using \Fortran, it must be a default integer value.} @@ -44,49 +69,77 @@ Every element of this array must be initialized with the value \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the active set enter the routine.} +\end{DeprecateBlock} \end{apiarguments} \apidescription{ The \FUNC{shmem\_alltoall} routines are collective routines. Each \ac{PE} - in the active set exchanges \VAR{nelems} data elements of size + \oldtext{in the active set} \newtext{participating in the operation} + exchanges \VAR{nelems} data elements of size 32 bits (for \FUNC{shmem\_alltoall32}) or 64 bits (for \FUNC{shmem\_alltoall64}) - with all other \acp{PE} in the set. The data being sent and received are + with all other \acp{PE} \oldtext{in the set} + \newtext{participating in the operation}. The data being sent and received are stored in a contiguous symmetric data object. The total size of each \acp{PE} \VAR{source} object and \VAR{dest} object is \VAR{nelems} times the size of - an element (32 bits or 64 bits) times \VAR{PE\_size}. - The \VAR{source} object contains \VAR{PE\_size} blocks of data (the size of each - block defined by \VAR{nelems}) and each block of data is sent to a different \ac{PE}. - Given a \ac{PE} \VAR{i} that is the \kth PE in the active set and a \ac{PE} - \VAR{j} that is the \lth \ac{PE} in the active set, + an element (32 bits or 64 bits) times \oldtext{\VAR{PE\_size}} + \newtext{\VAR{N}, where \VAR{N} equals the number of \acp{PE} participating + in the operation}. + The \VAR{source} object contains oldtext{\VAR{PE\_size}} \VAR{N} blocks of data + (where the size of each block is defined by \VAR{nelems}) and each block of data + is sent to a different \ac{PE}. + + \newtext{The same \dest{} and \source{} + arrays, and same value for \newtext{nelems} + must be passed by all \acp{PE} that participate in the collective.} + + Given a \ac{PE} \VAR{i} that is the \kth \ac{PE} \oldtext{in the active set} + \newtext{participating in the operation} and a \ac{PE} + \VAR{j} that is the \lth \ac{PE} \oldtext{in the active set} + \newtext{participating in the operation}, \ac{PE} \VAR{i} sends the \lth block of its \VAR{source} object to the \kth block of the \VAR{dest} object of \ac{PE} \VAR{j}. - As with all \openshmem collective routines, this routine assumes - that only \acp{PE} in the active set call the routine. If a \ac{PE} not - in the active set calls an \openshmem collective routine, +{\color{Green} + Team-based collect routines operate over all \acp{PE} in the provided team + argument. All \acp{PE} in the provided team must participate in the collective. + If a team created without support for collectives is passed to this or any other + team collective routine, the behavior is undefined. + + Active-set-based collective routines operate over all \acp{PE} in the active set + defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. +} + As with all \oldtext{\openshmem} \newtext{active-set-based} collective routines, + this routine assumes that only \acp{PE} in the active set call the routine. + If a \ac{PE} not in the active set calls an \oldtext{\openshmem} + \newtext{active-set-based} collective routine, the behavior is undefined. - The values of arguments \VAR{nelems}, \VAR{PE\_start}, \VAR{logPE\_stride}, + The values of arguments \oldtext{\VAR{nelems},} \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be equal on all \acp{PE} in the active set. The same - \VAR{dest} and \VAR{source} data objects, and the same \VAR{pSync} work + \oldtext{\VAR{dest} and \VAR{source} data objects, and the same} \VAR{pSync} work array must be passed to all \acp{PE} in the active set. Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, the following conditions must be ensured: \begin{itemize} - \item The \VAR{pSync} array on all \acp{PE} in the active set is not - still in use from a prior call to a \FUNC{shmem\_alltoall} routine. \item The \VAR{dest} data object on all \acp{PE} in the active set is ready to accept the \FUNC{shmem\_alltoall} data. + \item \newtext{For active-set-based routines}, the \VAR{pSync} array + on all \acp{PE} in the active set is not still in use from a prior call + to a \FUNC{shmem\_alltoall} routine. \end{itemize} Otherwise, the behavior is undefined. Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for - the local PE: Its \VAR{dest} symmetric data object is completely updated and + the local PE: + \begin{itemize} + \item Its \VAR{dest} symmetric data object is completely updated and the data has been copied out of the \VAR{source} data object. - The values in the \VAR{pSync} array are restored to the original values. + \item \newtext{For active-set-based routines, } + the values in the \VAR{pSync} array are restored to the original values. + \end{itemize} } \apidesctable{ @@ -98,10 +151,18 @@ \apitablerow{shmem\_alltoall32}{\CONST{32} bits aligned.} \apireturnvalues{ - None. + \newtext{Zero on successful local completion. Nonzero otherwise.} } \apinotes{ +\newtext{% + There are no specifically defined error codes for these routines. + See section \ref{subsec:error_handling} for expected error checking and + return code behavior specific to implementations. For portable + error checking and debugging behavior, programs should do their own checks + for invalid team handles or \LibConstRef{SHMEM\_TEAM\_NULL}. +} + This routine restores \VAR{pSync} to its original contents. Multiple calls to \openshmem\ routines that use the same \VAR{pSync} array do not require that \VAR{pSync} be reinitialized after the first call. From 02ff9cca641ff260512c11336d78fc948cdc66f0 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 6 Nov 2018 11:22:31 -0600 Subject: [PATCH 075/319] Update description of alltoalls for teams --- content/shmem_alltoalls.tex | 113 +++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 54 deletions(-) diff --git a/content/shmem_alltoalls.tex b/content/shmem_alltoalls.tex index 0f2aa47da..3ac07b4e8 100644 --- a/content/shmem_alltoalls.tex +++ b/content/shmem_alltoalls.tex @@ -1,14 +1,32 @@ \apisummary{ - shmem\_alltoalls is a collective routine where each \ac{PE} exchanges a fixed amount of strided data with all other - \acp{PE} in the active set. + shmem\_alltoalls is a collective routine where each \ac{PE} exchanges a fixed amount of strided data with all other \acp{PE} \oldtext{in the active set} \newtext{participating in the collective}. } \begin{apidefinition} +%% C11 +{\color{Green} +\begin{C11synopsis} +int @\FuncDecl{shmem\_alltoall32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_alltoall64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +\end{C11synopsis} +} + \begin{Csynopsis} +\end{Csynopsis} +{\color{Green} +\begin{CsynopsisCol} +int @\FuncDecl{shmem\_team\_alltoall32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_team\_alltoall64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +\end{CsynopsisCol} +} + +\begin{DeprecateBlock} +\begin{CsynopsisCol} void @\FuncDecl{shmem\_alltoalls32}@(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); void @\FuncDecl{shmem\_alltoalls64}@(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); -\end{Csynopsis} +\end{CsynopsisCol} +\end{DeprecateBlock} \begin{Fsynopsis} INTEGER pSync(SHMEM_ALLTOALLS_SYNC_SIZE) @@ -36,6 +54,13 @@ A value of \CONST{1} indicates contiguous data. \VAR{sst} must be of type \CTYPE{ptrdiff\_t}. When using \Fortran, it must be a default integer value.} + +\newtext{% +\apiargument{IN}{team}{A valid \openshmem team handle to a team which has been + created without disabling support for collective operations.} +} + +\begin{DeprecateBlock} \apiargument{IN}{nelems}{The number of elements to exchange for each \ac{PE}. \VAR{nelems} must be of type size\_t for \CorCpp. When using \Fortran, it must be a default integer value.} @@ -55,75 +80,55 @@ Every element of this array must be initialized with the value \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the active set enter the routine.} - +\end{DeprecateBlock} + \end{apiarguments} \apidescription{ - The \FUNC{shmem\_alltoalls} routines are collective routines. Each \ac{PE} - in the active set exchanges \VAR{nelems} strided data elements of size + The \FUNC{shmem\_alltoalls} routines are collective routines. + \newtext{These routines are equivalent in functionality to the corresponding + \FUNC{shmem\_alltoall} routines except that they add explicit stride values + for accessing the source and destination data arrays, whereas the array + access in \FUNC{shmem\_alltoall} is always with a stride of \CONST{1}.} + + Each \ac{PE} \oldtext{in the active set} \newtext{participating in the operation} + exchanges \VAR{nelems} strided data elements of size 32 bits (for \FUNC{shmem\_alltoalls32}) or 64 bits (for \FUNC{shmem\_alltoalls64}) - with all other \acp{PE} in the set. Both strides, \VAR{dst} and \VAR{sst}, must be greater + with all other \acp{PE} \oldtext{in the set} \newtext{participating in the operation}. + Both strides, \VAR{dst} and \VAR{sst}, must be greater than or equal to \CONST{1}. - Given a \ac{PE} \VAR{i} that is the \kth PE in the active set and a \ac{PE} - \VAR{j} that is the \lth \ac{PE} in the active set, + + \newtext{The same \dest{} and \source{} arrays and same values for values of + arguments \VAR{dst}, \VAR{sst}, \VAR{nelems} must be passed by all \acp{PE} + that participate in the collective.} + + Given a \ac{PE} \VAR{i} that is the \kth \ac{PE} \oldtext{in the active set} + \newtext{participating in the operation} and a \ac{PE} + \VAR{j} that is the \lth \ac{PE} \oldtext{in the active set} + \newtext{participating in the operation} \ac{PE} \VAR{i} sends the \VAR{sst}*\lth block of the \VAR{source} data object to the \VAR{dst}*\kth block of the \VAR{dest} data object on \ac{PE} \VAR{j}. - As with all \openshmem collective routines, these routines assume - that only \acp{PE} in the active set call the routine. If a \ac{PE} not - in the active set calls an \openshmem collective routine, undefined - behavior results. - - The values of arguments \VAR{dst}, \VAR{sst}, \VAR{nelems}, \VAR{PE\_start}, - \VAR{logPE\_stride}, and \VAR{PE\_size} must be equal on all \acp{PE} in the - active set. The same \VAR{dest} and \VAR{source} data objects, and the same - \VAR{pSync} work array must be passed to all \acp{PE} in the active set. - - Before any \ac{PE} calls a \FUNC{shmem\_alltoalls} routine, - the following conditions must be ensured: +{\color{Green} + See the description of \FUNC{shmem\_alltoall} in section + \ref{subsec:shmem_alltoall} for: \begin{itemize} - \item The \VAR{pSync} array on all \acp{PE} in the active set is not - still in use from a prior call to a \FUNC{shmem\_alltoall} routine. - \item The \VAR{dest} data object on all \acp{PE} in the active set is - ready to accept the \FUNC{shmem\_alltoalls} data. + \item Rules for \ac{PE} participation in the collective routine. + \item The pre- and post-conditions for symmetric objects. + \item Typing constraints for \dest{} and \source{} data objects. \end{itemize} - Otherwise, the behavior is undefined. - - Upon return from a \FUNC{shmem\_alltoalls} routine, the following is true for - the local PE: Its \VAR{dest} symmetric data object is completely updated and - the data has been copied out of the \VAR{source} data object. - The values in the \VAR{pSync} array are restored to the original values. +} + } -\apidesctable{ -The \dest{} and \source{} data objects must conform to certain typing -constraints, which are as follows: -}{Routine}{Data type of \VAR{dest} and \VAR{source}} - -\apitablerow{shmem\_alltoalls64}{\CONST{64} bits aligned.} -\apitablerow{shmem\_alltoalls32}{\CONST{32} bits aligned.} \apireturnvalues{ - None. + \newtext{Zero on successful local completion. Nonzero otherwise.} } \apinotes{ - This routine restores \VAR{pSync} to its original contents. Multiple calls - to \openshmem\ routines that use the same \VAR{pSync} array do not require - that \VAR{pSync} be reinitialized after the first call. - The user must ensure that the \VAR{pSync} array is not being updated by any - \ac{PE} in the active set while any of the \acp{PE} participates in - processing of an \openshmem\ \FUNC{shmem\_alltoalls} routine. Be careful to - avoid these situations: If the \VAR{pSync} array is initialized at run time, - some type of synchronization is needed to ensure that all \acp{PE} in the - active set have initialized \VAR{pSync} before any of them enter an - \openshmem\ routine called with the \VAR{pSync} synchronization array. A - \VAR{pSync} array may be reused on a subsequent \openshmem\ - \FUNC{shmem\_alltoalls} routine only if none of the \acp{PE} in the - active set are still processing a prior \openshmem\ \FUNC{shmem\_alltoalls} - routine call that used the same \VAR{pSync} array. In general, this can be - ensured only by doing some type of synchronization. + \newtext{See notes for \FUNC{shmem\_alltoall} in section \ref{subsec:shmem_alltoall}}. } \begin{apiexamples} From f54588d17d19ce55b5d727ff8c13ce25ecdb8978 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 6 Nov 2018 11:36:43 -0600 Subject: [PATCH 076/319] fix typos in shmem_collect description --- content/shmem_collect.tex | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index e6487f494..fdebe6fd2 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -1,6 +1,6 @@ \apisummary{ Concatenates blocks of data from multiple \acp{PE} to an array in every - \ac{PE}. + \ac{PE} \newtext{participating in the collective routine}. } \begin{apidefinition} @@ -85,7 +85,8 @@ \apidescription{ {\color{Green} - \openshmem \FUNC{collect} and \FUNC{fcollect} routines concatenate \VAR{nelems} + \openshmem \FUNC{collect} and \FUNC{fcollect} routines perform a collective + operation to concatenate \VAR{nelems} \CONST{64}-bit or \CONST{32}-bit data items from the \source{} array into the \dest{} array, over an \openshmem team or active set in processor number order. The resultant \dest{} array contains the contribution from @@ -99,8 +100,8 @@ \end{itemize} The collected result is written to the \dest{} array for all \acp{PE} - that participate in the collective. The same \dest{} and \source{} - arrays must be passed by all \acp{PE} that participate in the collective. + that participate in the operation. The same \dest{} and \source{} + arrays must be passed by all \acp{PE} that participate in the operation. } The \FUNC{fcollect} routines require that \VAR{nelems} be the same value in all @@ -109,11 +110,11 @@ {\color{Green} Team-based collect routines operate over all \acp{PE} in the provided team argument. All - \acp{PE} in the provided team must participate in the collective. If a team created without - support for collectives is passed to this or any other team collective routine, the + \acp{PE} in the provided team must participate in the operation. If a team created without + support for collective operations is passed to this or any other team collective routine, the behavior is undefined. - Active-set-based broadcast routines operate over all \acp{PE} in the active set + Active-set-based collective routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. As with all active-set-based collective routines, each of these routines assumes that @@ -130,7 +131,7 @@ \ac{PE}: \begin{itemize} \item The \dest{} array is updated and the \source{} array may be safely reused. - \item \newtext{For active-set-based collectives,} the values in the \VAR{pSync} array are + \item \newtext{For active-set-based collective routines,} the values in the \VAR{pSync} array are restored to the original values. \end{itemize} } From cf40ac46b7b834e3ec79eb92e2af5f5ce6063f0b Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Thu, 6 Dec 2018 11:44:11 -0600 Subject: [PATCH 077/319] Update NB AMO context definitions --- content/shmem_atomic_compare_swap_nbi.log | 2286 +++++++++++++++++++++ content/shmem_atomic_compare_swap_nbi.tex | 19 +- content/shmem_atomic_fetch_add_nbi.tex | 15 +- content/shmem_atomic_fetch_and_nbi.tex | 12 +- content/shmem_atomic_fetch_inc_nbi.tex | 11 +- content/shmem_atomic_fetch_nbi.tex | 8 +- content/shmem_atomic_fetch_or_nbi.tex | 12 +- content/shmem_atomic_fetch_xor_nbi.tex | 12 +- content/shmem_atomic_swap_nbi.tex | 15 +- 9 files changed, 2338 insertions(+), 52 deletions(-) create mode 100644 content/shmem_atomic_compare_swap_nbi.log diff --git a/content/shmem_atomic_compare_swap_nbi.log b/content/shmem_atomic_compare_swap_nbi.log new file mode 100644 index 000000000..1ac90f425 --- /dev/null +++ b/content/shmem_atomic_compare_swap_nbi.log @@ -0,0 +1,2286 @@ +This is pdfTeX, Version 3.14159265-2.6-1.40.16 (MiKTeX 2.9) (preloaded format=pdflatex 2016.4.29) 6 DEC 2018 11:36 +entering extended mode +**C:/Users/nravi/Documents/documents/wrk/src/std/nravi/content/shmem_atomic_com +pare_swap_nbi.tex + +(C:/Users/nravi/Documents/documents/wrk/src/std/nravi/content/shmem_atomic_comp +are_swap_nbi.tex +LaTeX2e <2015/10/01> patch level 2 +Babel <3.9n> and hyphenation patterns for 69 languages loaded. +! Undefined control sequence. +l.1 \color + {Green} +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + + +! LaTeX Error: Missing \begin{document}. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.1 \color{G + reen} +You're in trouble here. Try typing to proceed. +If that doesn't work, type X to quit. + +Missing character: There is no G in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no n in font nullfont! +! Undefined control sequence. +l.2 \apisummary + { +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no T in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no k in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no g in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no w in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no j in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no . in font nullfont! + +Overfull \hbox (20.0pt too wide) in paragraph at lines 1--6 +[] + [] + + +! LaTeX Error: Environment apidefinition undefined. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.7 \begin{apidefinition} + +Your command was ignored. +Type I to replace it with another command, +or to continue without it. + + +! LaTeX Error: Environment C11synopsis undefined. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.9 \begin{C11synopsis} + +Your command was ignored. +Type I to replace it with another command, +or to continue without it. + + +! LaTeX Error: Missing \begin{document}. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.10 v + oid @\FuncDecl{shmem\_atomic\_compare\_swap\_nbi}@(TYPE *fetch, TYPE *... + +You're in trouble here. Try typing to proceed. +If that doesn't work, type X to quit. + +Missing character: There is no v in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no @ in font nullfont! +! Undefined control sequence. +l.10 void @\FuncDecl + {shmem\_atomic\_compare\_swap\_nbi}@(TYPE *fetch, TYPE *... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no s in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no w in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no @ in font nullfont! +Missing character: There is no ( in font nullfont! +Missing character: There is no T in font nullfont! +Missing character: There is no Y in font nullfont! +Missing character: There is no P in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no * in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no , in font nullfont! +Missing character: There is no T in font nullfont! +Missing character: There is no Y in font nullfont! +Missing character: There is no P in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no * in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no , in font nullfont! +Missing character: There is no T in font nullfont! +Missing character: There is no Y in font nullfont! +Missing character: There is no P in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no , in font nullfont! +Missing character: There is no T in font nullfont! +Missing character: There is no Y in font nullfont! +Missing character: There is no P in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no v in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no , in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no ) in font nullfont! +Missing character: There is no ; in font nullfont! +Missing character: There is no v in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no @ in font nullfont! +! Undefined control sequence. +l.11 void @\FuncDecl + {shmem\_atomic\_compare\_swap\_nbi}@(shmem_ctx_t ctx, TY... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no s in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no w in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no @ in font nullfont! +Missing character: There is no ( in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +! Missing $ inserted. + + $ +l.11 ...shmem\_atomic\_compare\_swap\_nbi}@(shmem_ + ctx_t ctx, TYPE *fetch, TY... +I've inserted a begin-math/end-math symbol since I think +you left one out. Proceed, with fingers crossed. + +LaTeX Font Info: External font `cmex10' loaded for size +(Font) <7> on input line 11. +LaTeX Font Info: External font `cmex10' loaded for size +(Font) <5> on input line 11. + +! LaTeX Error: \begin{document} ended by \end{C11synopsis}. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.12 \end{C11synopsis} + +Your command was ignored. +Type I to replace it with another command, +or to continue without it. + +! Missing $ inserted. + + $ +l.12 \end{C11synopsis} + +I've inserted something that you may have forgotten. +(See the above.) +With luck, this will get me unwedged. But if you +really didn't forget anything, try typing `2' now; then +my insertion and my current dilemma will both disappear. + +Missing character: There is no w in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +! Undefined control sequence. +l.13 where \TYPE + {} is one of the standard \ac{AMO} types specified by +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no i in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no d in font nullfont! +! Undefined control sequence. +l.13 where \TYPE{} is one of the standard \ac + {AMO} types specified by +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no A in font nullfont! +Missing character: There is no M in font nullfont! +Missing character: There is no O in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no y in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no y in font nullfont! +Missing character: There is no T in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no e in font nullfont! +! Undefined control sequence. + ...ference `stdamotypes' on page \thepage + \space undefined\on@line . +l.14 Table~\ref{stdamotypes} + . +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + + +LaTeX Warning: Reference `stdamotypes' on page undefined on input line 14. + +Missing character: There is no . in font nullfont! + +Overfull \hbox (20.0pt too wide) in paragraph at lines 10--15 +[] + [] + + +Overfull \hbox (71.04713pt too wide) in paragraph at lines 10--15 +[][][][]$[]\OML/cmm/m/it/10 tx[]ctx; TYPE \OMS/cmsy/m/n/10  + [] + + +Overfull \hbox (64.801pt too wide) in paragraph at lines 10--15 +\OML/cmm/m/it/10 fetch; TYPE \OMS/cmsy/m/n/10  + [] + + +Overfull \hbox (167.39667pt too wide) in paragraph at lines 10--15 +\OML/cmm/m/it/10 dest; TYPEcond; TYPEvalue; intpe\OT1/cmr/m/n/10 );$ + [] + + +Overfull \hbox (10.86105pt too wide) in paragraph at lines 10--15 +[] + [] + + +! LaTeX Error: Environment Csynopsis undefined. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.16 \begin{Csynopsis} + +Your command was ignored. +Type I to replace it with another command, +or to continue without it. + + +! LaTeX Error: Missing \begin{document}. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.17 v + oid @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_compare\_swap\_nbi... + +You're in trouble here. Try typing to proceed. +If that doesn't work, type X to quit. + +Missing character: There is no v in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no @ in font nullfont! +! Undefined control sequence. +l.17 void @\FuncDecl + {shmem\_\FuncParam{TYPENAME}\_atomic\_compare\_swap\_nbi... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no s in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +! Undefined control sequence. +l.17 void @\FuncDecl{shmem\_\FuncParam + {TYPENAME}\_atomic\_compare\_swap\_nbi... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no T in font nullfont! +Missing character: There is no Y in font nullfont! +Missing character: There is no P in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no N in font nullfont! +Missing character: There is no A in font nullfont! +Missing character: There is no M in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no w in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no @ in font nullfont! +Missing character: There is no ( in font nullfont! +Missing character: There is no T in font nullfont! +Missing character: There is no Y in font nullfont! +Missing character: There is no P in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no * in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no , in font nullfont! +Missing character: There is no T in font nullfont! +Missing character: There is no Y in font nullfont! +Missing character: There is no P in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no * in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no , in font nullfont! +Missing character: There is no T in font nullfont! +Missing character: There is no Y in font nullfont! +Missing character: There is no P in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no , in font nullfont! +Missing character: There is no T in font nullfont! +Missing character: There is no Y in font nullfont! +Missing character: There is no P in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no v in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no , in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no ) in font nullfont! +Missing character: There is no ; in font nullfont! +Missing character: There is no v in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no @ in font nullfont! +! Undefined control sequence. +l.18 void @\FuncDecl + {shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_compare\_swap... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no s in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no x in font nullfont! +! Undefined control sequence. +l.18 void @\FuncDecl{shmem\_ctx\_\FuncParam + {TYPENAME}\_atomic\_compare\_swap... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no T in font nullfont! +Missing character: There is no Y in font nullfont! +Missing character: There is no P in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no N in font nullfont! +Missing character: There is no A in font nullfont! +Missing character: There is no M in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no w in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no @ in font nullfont! +Missing character: There is no ( in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +! Missing $ inserted. + + $ +l.18 ...NAME}\_atomic\_compare\_swap\_nbi}@(shmem_ + ctx_t ctx, TYPE *fetch, TY... +I've inserted a begin-math/end-math symbol since I think +you left one out. Proceed, with fingers crossed. + + +! LaTeX Error: \begin{document} ended by \end{Csynopsis}. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.19 \end{Csynopsis} + +Your command was ignored. +Type I to replace it with another command, +or to continue without it. + +! Missing $ inserted. + + $ +l.19 \end{Csynopsis} + +I've inserted something that you may have forgotten. +(See the above.) +With luck, this will get me unwedged. But if you +really didn't forget anything, try typing `2' now; then +my insertion and my current dilemma will both disappear. + +Missing character: There is no w in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +! Undefined control sequence. +l.20 where \TYPE + {} is one of the standard \ac{AMO} types and has a correspon... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no i in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no d in font nullfont! +! Undefined control sequence. +l.20 where \TYPE{} is one of the standard \ac + {AMO} types and has a correspon... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no A in font nullfont! +Missing character: There is no M in font nullfont! +Missing character: There is no O in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no y in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no g in font nullfont! +! Undefined control sequence. +l.21 \TYPENAME + {} specified by Table~\ref{stdamotypes}. +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no s in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no y in font nullfont! +Missing character: There is no T in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no e in font nullfont! +! Undefined control sequence. + ...ference `stdamotypes' on page \thepage + \space undefined\on@line . +l.21 ...AME{} specified by Table~\ref{stdamotypes} + . +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + + +LaTeX Warning: Reference `stdamotypes' on page undefined on input line 21. + +Missing character: There is no . in font nullfont! + +Overfull \hbox (20.0pt too wide) in paragraph at lines 17--22 +[] + [] + + +Overfull \hbox (71.04713pt too wide) in paragraph at lines 17--22 +[][][][][][]$[]\OML/cmm/m/it/10 tx[]ctx; TYPE \OMS/cmsy/m/n/10  + [] + + +Overfull \hbox (64.801pt too wide) in paragraph at lines 17--22 +\OML/cmm/m/it/10 fetch; TYPE \OMS/cmsy/m/n/10  + [] + + +Overfull \hbox (167.39667pt too wide) in paragraph at lines 17--22 +\OML/cmm/m/it/10 dest; TYPEcond; TYPEvalue; intpe\OT1/cmr/m/n/10 );$ + [] + + +Overfull \hbox (10.86105pt too wide) in paragraph at lines 17--22 +[] + [] + + +! LaTeX Error: Environment apiarguments undefined. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.23 \begin{apiarguments} + +Your command was ignored. +Type I to replace it with another command, +or to continue without it. + +! Undefined control sequence. +l.24 \apiargument + {IN}{ctx}{A context handle specifying the context on wh... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + + +! LaTeX Error: Missing \begin{document}. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.24 \apiargument{I + N}{ctx}{A context handle specifying the context on wh... + +You're in trouble here. Try typing to proceed. +If that doesn't work, type X to quit. + +Missing character: There is no I in font nullfont! +Missing character: There is no N in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no x in font nullfont! +Missing character: There is no A in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no x in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no y in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no g in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no x in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no w in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no . in font nullfont! +Missing character: There is no W in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no g in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no v in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no , in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no x in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no . in font nullfont! +! Undefined control sequence. +l.27 \apiargument + {OUT}{fetch}{Local data object to be updated.} +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no O in font nullfont! +Missing character: There is no U in font nullfont! +Missing character: There is no T in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no L in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no j in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no . in font nullfont! +! Undefined control sequence. +l.28 \apiargument + {OUT}{dest}{The remotely accessible data object to be u... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no O in font nullfont! +Missing character: There is no U in font nullfont! +Missing character: There is no T in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no T in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no y in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no j in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +! Undefined control sequence. +l.29 the remote \ac + {PE}. } +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no P in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no . in font nullfont! +! Undefined control sequence. +l.30 \apiargument + {IN}{cond}{\VAR{cond} is compared to the remote \VAR{dest} +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no I in font nullfont! +Missing character: There is no N in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +! Undefined control sequence. +l.30 \apiargument{IN}{cond}{\VAR + {cond} is compared to the remote \VAR{dest} +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +! Undefined control sequence. +l.30 ...{\VAR{cond} is compared to the remote \VAR + {dest} +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no d in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no v in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no . in font nullfont! +Missing character: There is no I in font nullfont! +Missing character: There is no f in font nullfont! +! Undefined control sequence. +l.31 value. If \VAR + {cond} and the remote \VAR{dest} are equal, then ... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +! Undefined control sequence. +l.31 ... value. If \VAR{cond} and the remote \VAR + {dest} are equal, then \VA... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no d in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no q in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no , in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no n in font nullfont! +! Undefined control sequence. +l.31 ...the remote \VAR{dest} are equal, then \VAR + {value} +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no v in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no w in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +! Undefined control sequence. +l.32 is swapped into the remote \VAR + {dest}; otherwise, the remote \V... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no d in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no ; in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no w in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no , in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +! Undefined control sequence. +l.32 ...ote \VAR{dest}; otherwise, the remote \VAR + {dest} is +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no d in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no g in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no . in font nullfont! +! Undefined control sequence. +l.33 unchanged. \VAR + {cond} must be of the same data type as \VAR{des... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no y in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no s in font nullfont! +! Undefined control sequence. +l.33 ...ond} must be of the same data type as \VAR + {dest}.} +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no d in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no . in font nullfont! +! Undefined control sequence. +l.34 \apiargument + {IN}{value}{The value to be atomically written to the r... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no I in font nullfont! +Missing character: There is no N in font nullfont! +Missing character: There is no v in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no T in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no v in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no y in font nullfont! +Missing character: There is no w in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +! Undefined control sequence. +l.35 \ac + {PE}. \VAR{value} must be the same data type as \VAR{dest}.} +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no P in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no . in font nullfont! +! Undefined control sequence. +l.35 \ac{PE}. \VAR + {value} must be the same data type as \VAR{dest}.} +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no v in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no y in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no s in font nullfont! +! Undefined control sequence. +l.35 ...{value} must be the same data type as \VAR + {dest}.} +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no d in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no . in font nullfont! +! Undefined control sequence. +l.36 \apiargument + {IN}{pe}{An integer that indicates the \ac{PE} number u... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no I in font nullfont! +Missing character: There is no N in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no A in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no g in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +! Undefined control sequence. +l.36 ...{IN}{pe}{An integer that indicates the \ac + {PE} number upon which +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no P in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no w in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no h in font nullfont! +! Undefined control sequence. +l.37 \VAR + {dest} is to be updated.} +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no d in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no . in font nullfont! + +! LaTeX Error: \begin{document} ended by \end{apiarguments}. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.38 \end{apiarguments} + +Your command was ignored. +Type I to replace it with another command, +or to continue without it. + + +Overfull \hbox (20.0pt too wide) in paragraph at lines 24--39 +[] + [] + +! Undefined control sequence. +l.40 \apidescription + { +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + + +! LaTeX Error: Missing \begin{document}. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.41 T + he nonblocking conditional swap routines conditionally update \VAR... + +You're in trouble here. Try typing to proceed. +If that doesn't work, type X to quit. + +Missing character: There is no T in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no k in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no g in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no w in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no y in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +! Undefined control sequence. +l.41 ...al swap routines conditionally update \VAR + {dest} +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no d in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no j in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +! Undefined control sequence. +l.42 data object on the specified \ac + {PE} and fetches the prior contents... +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no P in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no j in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +! Undefined control sequence. +l.43 data object into the \VAR + {fetch} local data object as a +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no f in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no j in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no g in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no . in font nullfont! +Missing character: There is no T in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no g in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no . in font nullfont! +Missing character: There is no T in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no q in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +! Undefined control sequence. +l.46 \FUNC + {shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, prior +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no s in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no q in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no . in font nullfont! +Missing character: There is no A in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no f in font nullfont! +! Undefined control sequence. +l.46 ...{shmem\_quiet}. At the completion of \FUNC + {shmem\_quiet}, prior +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no s in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no q in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no , in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +! Undefined control sequence. +l.47 contents of the \VAR + {dest} data object is atomically fetched into +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no d in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no j in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no y in font nullfont! +Missing character: There is no f in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +! Undefined control sequence. +l.48 \VAR + {fetch} local data object and the contents of \VAR{value} +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no f in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no b in font nullfont! +Missing character: There is no j in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no f in font nullfont! +! Undefined control sequence. +l.48 ...local data object and the contents of \VAR + {value} +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no v in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no y in font nullfont! +Missing character: There is no u in font nullfont! +Missing character: There is no p in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no d in font nullfont! +Missing character: There is no i in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +! Undefined control sequence. +l.49 is conditionally updated into \VAR + {dest} on to the remote \ac{PE}. +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no d in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no s in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no h in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no r in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no m in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no t in font nullfont! +Missing character: There is no e in font nullfont! +! Undefined control sequence. +l.49 ...dated into \VAR{dest} on to the remote \ac + {PE}. +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + +Missing character: There is no P in font nullfont! +Missing character: There is no E in font nullfont! +Missing character: There is no . in font nullfont! + +Overfull \hbox (20.0pt too wide) in paragraph at lines 41--51 +[] + [] + +! Undefined control sequence. +l.52 \apireturnvalues + { +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + + +! LaTeX Error: Missing \begin{document}. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.53 N + one. +You're in trouble here. Try typing to proceed. +If that doesn't work, type X to quit. + +Missing character: There is no N in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no . in font nullfont! + +Overfull \hbox (20.0pt too wide) in paragraph at lines 53--55 +[] + [] + +! Undefined control sequence. +l.56 \apinotes + { +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + + +! LaTeX Error: Missing \begin{document}. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.57 N + one. +You're in trouble here. Try typing to proceed. +If that doesn't work, type X to quit. + +Missing character: There is no N in font nullfont! +Missing character: There is no o in font nullfont! +Missing character: There is no n in font nullfont! +Missing character: There is no e in font nullfont! +Missing character: There is no . in font nullfont! + +Overfull \hbox (20.0pt too wide) in paragraph at lines 57--59 +[] + [] + + +! LaTeX Error: \begin{document} ended by \end{apidefinition}. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.60 \end{apidefinition} + +Your command was ignored. +Type I to replace it with another command, +or to continue without it. + +! Undefined control sequence. +l.61 \color + {Black} +The control sequence at the end of the top line +of your error message was never \def'ed. If you have +misspelled it (e.g., `\hobx'), type `I' and the correct +spelling (e.g., `I\hbox'). Otherwise just continue, +and I'll forget about whatever was undefined. + + +! LaTeX Error: Missing \begin{document}. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.61 \color{B + lack} +You're in trouble here. Try typing to proceed. +If that doesn't work, type X to quit. + +Missing character: There is no B in font nullfont! +Missing character: There is no l in font nullfont! +Missing character: There is no a in font nullfont! +Missing character: There is no c in font nullfont! +Missing character: There is no k in font nullfont! +) +! Emergency stop. +<*> ...i/content/shmem_atomic_compare_swap_nbi.tex + +*** (job aborted, no legal \end found) + + +Here is how much of TeX's memory you used: + 22 strings out of 493634 + 624 string characters out of 3135239 + 53604 words of memory out of 3000000 + 3505 multiletter control sequences out of 15000+200000 + 3948 words of font info for 15 fonts, out of 3000000 for 9000 + 1025 hyphenation exceptions out of 8191 + 18i,2n,14p,265b,95s stack positions out of 5000i,500n,10000p,200000b,50000s +! ==> Fatal error occurred, no output PDF file produced! diff --git a/content/shmem_atomic_compare_swap_nbi.tex b/content/shmem_atomic_compare_swap_nbi.tex index 6e1cc18dc..6c0a7da28 100644 --- a/content/shmem_atomic_compare_swap_nbi.tex +++ b/content/shmem_atomic_compare_swap_nbi.tex @@ -21,27 +21,24 @@ \TYPENAME{} specified by Table~\ref{stdamotypes}. \begin{apiarguments} - \apiargument{IN}{ctx}{The context on which to perform the operation. - When this argument is not provided, the operation is performed on - \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the operation. When this argument is not provided, the + operation is performed on the default context.} \apiargument{OUT}{fetch}{Local data object to be updated.} - \apiargument{OUT}{dest}{The remotely accessible integer data object to be - updated on the remote \ac{PE}. } + \apiargument{OUT}{dest}{The remotely accessible data object to be updated on + the remote \ac{PE}. } \apiargument{IN}{cond}{\VAR{cond} is compared to the remote \VAR{dest} value. If \VAR{cond} and the remote \VAR{dest} are equal, then \VAR{value} is swapped into the remote \VAR{dest}; otherwise, the remote \VAR{dest} is - unchanged. In either case, the old value of the remote \VAR{dest} is - returned as the routine return value. \VAR{cond} must be of the same data - type as \VAR{dest}.} + unchanged. \VAR{cond} must be of the same data type as \VAR{dest}.} \apiargument{IN}{value}{The value to be atomically written to the remote \ac{PE}. \VAR{value} must be the same data type as \VAR{dest}.} \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number upon which - \VAR{dest} is to be updated. When using \Fortran, it must be a default - integer value.} + \VAR{dest} is to be updated.} \end{apiarguments} \apidescription{ - The nonblocking conditional swap routines conditionally update a \VAR{dest} + The nonblocking conditional swap routines conditionally update \VAR{dest} data object on the specified \ac{PE} and fetches the prior contents of the data object into the \VAR{fetch} local data object as a single atomic operation. This routine returns after posting the operation. diff --git a/content/shmem_atomic_fetch_add_nbi.tex b/content/shmem_atomic_fetch_add_nbi.tex index 55c266126..863c690f6 100644 --- a/content/shmem_atomic_fetch_add_nbi.tex +++ b/content/shmem_atomic_fetch_add_nbi.tex @@ -22,18 +22,17 @@ \begin{apiarguments} -\apiargument{IN}{ctx}{The context on which to perform the operation. - When this argument is not provided, the operation is performed on - \CONST{SHMEM\_CTX\_DEFAULT}.} +\apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the operation. When this argument is not provided, the operation + is performed on the default context.} \apiargument{OUT}{fetch}{Local data object to be updated.} -\apiargument{OUT}{dest}{The remotely accessible integer data object to be updated on +\apiargument{OUT}{dest}{The remotely accessible data object to be updated on the remote \ac{PE}. The type of \VAR{dest} should match that implied in the SYNOPSIS section.} -\apiargument{IN}{value}{The value to be atomically added to \VAR{dest}. The - type of \VAR{value} should match that implied in the SYNOPSIS section.} +\apiargument{IN}{value}{The value to be atomically added to \VAR{dest}. + \VAR{value} is the same type as \dest.} \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which - \VAR{dest} is to be updated. When using \Fortran, it must be a default - integer value.} + \VAR{dest} is to be updated.} \end{apiarguments} diff --git a/content/shmem_atomic_fetch_and_nbi.tex b/content/shmem_atomic_fetch_and_nbi.tex index b1893fb63..b5bfe25b1 100644 --- a/content/shmem_atomic_fetch_and_nbi.tex +++ b/content/shmem_atomic_fetch_and_nbi.tex @@ -22,13 +22,15 @@ \begin{apiarguments} - \apiargument{IN}{ctx}{The context on which to perform the operation. - When this argument is not provided, the operation is performed on - \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the operation. When this argument is not provided, the + operation is performed on the default context.} \apiargument{OUT}{fetch}{Local data object to be updated.} \apiargument{OUT}{dest}{A pointer to the remotely accessible data object to - be updated.} - \apiargument{IN}{value}{The operand to the bitwise AND operation.} + be updated. The type of \VAR{dest} should match that implied in the + SYNOPSIS section.} + \apiargument{IN}{value}{The operand to the bitwise AND operation. \VAR{value} + is the same type as \dest.} \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} is to be updated.} diff --git a/content/shmem_atomic_fetch_inc_nbi.tex b/content/shmem_atomic_fetch_inc_nbi.tex index 40a1415e3..aa897c5b1 100644 --- a/content/shmem_atomic_fetch_inc_nbi.tex +++ b/content/shmem_atomic_fetch_inc_nbi.tex @@ -22,16 +22,15 @@ \begin{apiarguments} -\apiargument{IN}{ctx}{The context on which to perform the operation. - When this argument is not provided, the operation is performed on - \CONST{SHMEM\_CTX\_DEFAULT}.} +\apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the operation. When this argument is not provided, the + operation is performed on the default context.} \apiargument{OUT}{fetch}{Local data object to be updated.} -\apiargument{OUT}{dest}{The remotely accessible integer data object to be updated +\apiargument{OUT}{dest}{The remotely accessible data object to be updated on the remote \ac{PE}. The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which - \dest{} is to be updated. When using \Fortran, it must be a default - integer value.} + \dest{} is to be updated.} \end{apiarguments} diff --git a/content/shmem_atomic_fetch_nbi.tex b/content/shmem_atomic_fetch_nbi.tex index 6ef45319b..98e5f0651 100644 --- a/content/shmem_atomic_fetch_nbi.tex +++ b/content/shmem_atomic_fetch_nbi.tex @@ -22,9 +22,9 @@ \begin{apiarguments} - \apiargument{IN}{ctx}{The context on which to perform the operation. - When this argument is not provided, the operation is performed on - \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the operation. When this argument is not provided, the + operation is performed on the default context.} \apiargument{OUT}{fetch}{Local data object to be updated.} \apiargument{IN}{source}{The remotely accessible data object to be fetched from the remote \ac{PE}.} @@ -38,7 +38,7 @@ operation. This routine returns after posting the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, contents of the \source{} data object - is from \ac{PE} is atomically fetched into \VAR{fetch} local data object. + from \ac{PE} is atomically fetched into \VAR{fetch} local data object. } \apireturnvalues{ diff --git a/content/shmem_atomic_fetch_or_nbi.tex b/content/shmem_atomic_fetch_or_nbi.tex index 4f4a44997..a552434f3 100644 --- a/content/shmem_atomic_fetch_or_nbi.tex +++ b/content/shmem_atomic_fetch_or_nbi.tex @@ -22,13 +22,15 @@ \begin{apiarguments} - \apiargument{IN}{ctx}{The context on which to perform the operation. - When this argument is not provided, the operation is performed on - \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the operation. When this argument is not provided, the + operation is performed on the default context.} \apiargument{OUT}{fetch}{Local data object to be updated.} \apiargument{OUT}{dest}{A pointer to the remotely accessible data object to - be updated.} - \apiargument{IN}{value}{The operand to the bitwise OR operation.} + be updated. The type of \VAR{dest} should match that implied in the + SYNOPSIS section.} + \apiargument{IN}{value}{The operand to the bitwise OR operation. \VAR{value} + is the same type as \dest.} \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} is to be updated.} diff --git a/content/shmem_atomic_fetch_xor_nbi.tex b/content/shmem_atomic_fetch_xor_nbi.tex index ed5ac57e2..6aee85e98 100644 --- a/content/shmem_atomic_fetch_xor_nbi.tex +++ b/content/shmem_atomic_fetch_xor_nbi.tex @@ -22,13 +22,15 @@ \begin{apiarguments} - \apiargument{IN}{ctx}{The context on which to perform the operation. - When this argument is not provided, the operation is performed on - \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the operation. When this argument is not provided, the + operation is performed on the default context.} \apiargument{OUT}{fetch}{Local data object to be updated.} \apiargument{OUT}{dest}{A pointer to the remotely accessible data object to - be updated.} - \apiargument{IN}{value}{The operand to the bitwise XOR operation.} + be updated. The type of \VAR{dest} should match that implied in the + SYNOPSIS section.} + \apiargument{IN}{value}{The operand to the bitwise XOR operation. \VAR{value} + is the same type as \dest.} \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} is to be updated.} diff --git a/content/shmem_atomic_swap_nbi.tex b/content/shmem_atomic_swap_nbi.tex index 5cfcaac41..08859d047 100644 --- a/content/shmem_atomic_swap_nbi.tex +++ b/content/shmem_atomic_swap_nbi.tex @@ -18,18 +18,17 @@ where \TYPE{} is one of the extended \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table \ref{extamotypes}. \begin{apiarguments} - \apiargument{IN}{ctx}{The context on which to perform the operation. - When this argument is not provided, the operation is performed on - \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the operation. When this argument is not provided, the + operation is performed on the default context.} \apiargument{OUT}{fetch}{Local data object to be updated.} - \apiargument{OUT}{dest}{The remotely accessible integer data object to be - updated on the remote \ac{PE}. When using \CorCpp, the type of + \apiargument{OUT}{dest}{The remotely accessible data object to be + updated on the remote \ac{PE}. When using \CorCpp, the type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{value}{The value to be atomically written to the remote - \ac{PE}. \VAR{value} is the same type as \dest.} + \ac{PE}. \VAR{value} is the same type as \dest.} \apiargument{IN}{pe}{ An integer that indicates the \ac{PE} number on which - \dest{} is to be updated. When using \Fortran, it must be a default - integer value.} + \dest{} is to be updated.} \end{apiarguments} \apidescription{ From 293fbf7d2b1039e87cd063514964c1a66752b97b Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Thu, 6 Dec 2018 13:21:23 -0600 Subject: [PATCH 078/319] RM unnecessary SYNOPSIS and datatype ref in NB AMO --- content/shmem_atomic_compare_swap_nbi.tex | 4 ++-- content/shmem_atomic_fetch_add_nbi.tex | 6 ++---- content/shmem_atomic_fetch_and_nbi.tex | 6 ++---- content/shmem_atomic_fetch_inc_nbi.tex | 5 ++--- content/shmem_atomic_fetch_or_nbi.tex | 6 ++---- content/shmem_atomic_fetch_xor_nbi.tex | 6 ++---- content/shmem_atomic_swap_nbi.tex | 5 ++--- 7 files changed, 14 insertions(+), 24 deletions(-) diff --git a/content/shmem_atomic_compare_swap_nbi.tex b/content/shmem_atomic_compare_swap_nbi.tex index 6c0a7da28..c7569429d 100644 --- a/content/shmem_atomic_compare_swap_nbi.tex +++ b/content/shmem_atomic_compare_swap_nbi.tex @@ -30,9 +30,9 @@ \apiargument{IN}{cond}{\VAR{cond} is compared to the remote \VAR{dest} value. If \VAR{cond} and the remote \VAR{dest} are equal, then \VAR{value} is swapped into the remote \VAR{dest}; otherwise, the remote \VAR{dest} is - unchanged. \VAR{cond} must be of the same data type as \VAR{dest}.} + unchanged.} \apiargument{IN}{value}{The value to be atomically written to the remote - \ac{PE}. \VAR{value} must be the same data type as \VAR{dest}.} + \ac{PE}. } \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number upon which \VAR{dest} is to be updated.} \end{apiarguments} diff --git a/content/shmem_atomic_fetch_add_nbi.tex b/content/shmem_atomic_fetch_add_nbi.tex index 863c690f6..7c43ec024 100644 --- a/content/shmem_atomic_fetch_add_nbi.tex +++ b/content/shmem_atomic_fetch_add_nbi.tex @@ -27,10 +27,8 @@ is performed on the default context.} \apiargument{OUT}{fetch}{Local data object to be updated.} \apiargument{OUT}{dest}{The remotely accessible data object to be updated on - the remote \ac{PE}. The type of \VAR{dest} should match that implied in the - SYNOPSIS section.} -\apiargument{IN}{value}{The value to be atomically added to \VAR{dest}. - \VAR{value} is the same type as \dest.} + the remote \ac{PE}.} +\apiargument{IN}{value}{The value to be atomically added to \VAR{dest}.} \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which \VAR{dest} is to be updated.} diff --git a/content/shmem_atomic_fetch_and_nbi.tex b/content/shmem_atomic_fetch_and_nbi.tex index b5bfe25b1..eda95a3a0 100644 --- a/content/shmem_atomic_fetch_and_nbi.tex +++ b/content/shmem_atomic_fetch_and_nbi.tex @@ -27,10 +27,8 @@ operation is performed on the default context.} \apiargument{OUT}{fetch}{Local data object to be updated.} \apiargument{OUT}{dest}{A pointer to the remotely accessible data object to - be updated. The type of \VAR{dest} should match that implied in the - SYNOPSIS section.} - \apiargument{IN}{value}{The operand to the bitwise AND operation. \VAR{value} - is the same type as \dest.} + be updated.} + \apiargument{IN}{value}{The operand to the bitwise AND operation.} \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} is to be updated.} diff --git a/content/shmem_atomic_fetch_inc_nbi.tex b/content/shmem_atomic_fetch_inc_nbi.tex index aa897c5b1..471a4c4d5 100644 --- a/content/shmem_atomic_fetch_inc_nbi.tex +++ b/content/shmem_atomic_fetch_inc_nbi.tex @@ -27,8 +27,7 @@ operation is performed on the default context.} \apiargument{OUT}{fetch}{Local data object to be updated.} \apiargument{OUT}{dest}{The remotely accessible data object to be updated - on the remote \ac{PE}. The type of \dest{} should match that implied in the - SYNOPSIS section.} + on the remote \ac{PE}.} \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which \dest{} is to be updated.} @@ -36,7 +35,7 @@ \apidescription{ - These nonblocking routines perform a atomic fetch-and-increment operation. + These nonblocking routines perform an atomic fetch-and-increment operation. This routine returns after posting the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, the \dest{} on \ac{PE} \VAR{pe} is increased by one and diff --git a/content/shmem_atomic_fetch_or_nbi.tex b/content/shmem_atomic_fetch_or_nbi.tex index a552434f3..52617bbc5 100644 --- a/content/shmem_atomic_fetch_or_nbi.tex +++ b/content/shmem_atomic_fetch_or_nbi.tex @@ -27,10 +27,8 @@ operation is performed on the default context.} \apiargument{OUT}{fetch}{Local data object to be updated.} \apiargument{OUT}{dest}{A pointer to the remotely accessible data object to - be updated. The type of \VAR{dest} should match that implied in the - SYNOPSIS section.} - \apiargument{IN}{value}{The operand to the bitwise OR operation. \VAR{value} - is the same type as \dest.} + be updated.} + \apiargument{IN}{value}{The operand to the bitwise OR operation.} \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} is to be updated.} diff --git a/content/shmem_atomic_fetch_xor_nbi.tex b/content/shmem_atomic_fetch_xor_nbi.tex index 6aee85e98..089b58113 100644 --- a/content/shmem_atomic_fetch_xor_nbi.tex +++ b/content/shmem_atomic_fetch_xor_nbi.tex @@ -27,10 +27,8 @@ operation is performed on the default context.} \apiargument{OUT}{fetch}{Local data object to be updated.} \apiargument{OUT}{dest}{A pointer to the remotely accessible data object to - be updated. The type of \VAR{dest} should match that implied in the - SYNOPSIS section.} - \apiargument{IN}{value}{The operand to the bitwise XOR operation. \VAR{value} - is the same type as \dest.} + be updated.} + \apiargument{IN}{value}{The operand to the bitwise XOR operation.} \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} is to be updated.} diff --git a/content/shmem_atomic_swap_nbi.tex b/content/shmem_atomic_swap_nbi.tex index 08859d047..d45c2aca7 100644 --- a/content/shmem_atomic_swap_nbi.tex +++ b/content/shmem_atomic_swap_nbi.tex @@ -23,10 +23,9 @@ operation is performed on the default context.} \apiargument{OUT}{fetch}{Local data object to be updated.} \apiargument{OUT}{dest}{The remotely accessible data object to be - updated on the remote \ac{PE}. When using \CorCpp, the type of - \dest{} should match that implied in the SYNOPSIS section.} + updated on the remote \ac{PE}.} \apiargument{IN}{value}{The value to be atomically written to the remote - \ac{PE}. \VAR{value} is the same type as \dest.} + \ac{PE}.} \apiargument{IN}{pe}{ An integer that indicates the \ac{PE} number on which \dest{} is to be updated.} \end{apiarguments} From b9ab777b7faa2f6ad45393f04cb33f10c27a3a79 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 11 Dec 2018 13:18:42 -0600 Subject: [PATCH 079/319] Update team config to indicate contexts must be destroyed before team --- content/shmem_team_config_t.tex | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/content/shmem_team_config_t.tex b/content/shmem_team_config_t.tex index 440512bbc..0c4bb8211 100644 --- a/content/shmem_team_config_t.tex +++ b/content/shmem_team_config_t.tex @@ -45,7 +45,9 @@ may be created in any number of threads. A program may destroy any number of contexts made from this team and make any number of new ones so long as the total existing at any point - remains less than \VAR{num\_contexts}. + remains less than \VAR{num\_contexts}. Any contexts created from this + team must be destroyed before the team is destroyed, or the + behavior is undefined. See Section~\ref{sec:ctx} for more on communication contexts and Section~\ref{subsec:shmem_team_create_ctx} for team-based context creation. From 69ab5733d78576988b7c93aade169679e883e8dd Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 11 Dec 2018 13:20:03 -0600 Subject: [PATCH 080/319] Update team create ctx to indicate contexts must be destroyed before team --- content/shmem_team_create_ctx.tex | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/content/shmem_team_create_ctx.tex b/content/shmem_team_create_ctx.tex index 99f9dec53..908dcc9e4 100644 --- a/content/shmem_team_create_ctx.tex +++ b/content/shmem_team_create_ctx.tex @@ -22,19 +22,27 @@ context and returns its handle through the \VAR{ctx} argument. This context is created from the team specified by the \VAR{team} argument. - The \FUNC{shmem\_team\_create\_ctx} routine must be called by no more threads - than were specified by the \VAR{num\_threads} member of the + In addition to the team, the \FUNC{shmem\_team\_create\_ctx} routine accepts + the same arguments and provides all the same return conditions as the + \FUNC{shmem\_ctx\_create} routine. + + The \FUNC{shmem\_team\_create\_ctx} routine may be called any number of times, + but the total number of simultaneously existing contexts created from a team + must be no more than were specified by the \VAR{num\_contexts} member of the \CTYPE{shmem\_team\_config\_t} configuration parameters that were specified - when the team was created. + when the team was created. Calling \FUNC{shmem\_team\_create\_ctx} on a + team for which the maximum number of contexts currently exists results in a + failure with nonzero return code. + + All explicitly created resources associated with a team must be destroyed + before the \FUNC{shmem\_team\_destroy} routine is called. If a context + returned from \FUNC{shmem\_team\_create\_ctx} is not explicitly + destroyed before the team is destroyed, behavior is undefined. %% All \openshmem routines that operate on this context will do so with %% respect to the associated \ac{PE} team. %% That is, all point-to-point routines operating on this context will use %% team-relative \ac{PE} numbering. - - In addition to the team, the \FUNC{shmem\_team\_create\_ctx} routine accepts - the same arguments and provides all the same return conditions as the - \FUNC{shmem\_ctx\_create} routine. } \apireturnvalues{ @@ -42,7 +50,7 @@ } \apinotes{ - None. + Any contexts created from a team } \end{apidefinition} From 7c14e5830e67ffdd4689a5bc2148c014187e4b68 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 11 Dec 2018 13:22:08 -0600 Subject: [PATCH 081/319] Update team destroy to indicate team resources must be destroyed --- content/shmem_team_destroy.tex | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/content/shmem_team_destroy.tex b/content/shmem_team_destroy.tex index e942d133d..7177bb63b 100644 --- a/content/shmem_team_destroy.tex +++ b/content/shmem_team_destroy.tex @@ -29,6 +29,13 @@ After returning from the routine, if the team was successfully destroyed, the handle will be assigned the value \LibConstRef{SHMEM\_TEAM\_NULL}. + +Team destruction assumes that any resources explicitly created from the team, +such as contexts created from the team, have already been released through +the appropriate function, such as destroying the context. If there are any +objects or resources explicitly created from the team that have not been +explicitly released before \FUNC{shmem\_team\_destroy} is called, behavior is +undefined. } \apireturnvalues{ From bec94608a70181f07f8bdea596c9277cb2ea6f44 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 11 Dec 2018 13:24:02 -0600 Subject: [PATCH 082/319] Fix typo --- content/shmem_team_create_ctx.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_team_create_ctx.tex b/content/shmem_team_create_ctx.tex index 908dcc9e4..e068440e8 100644 --- a/content/shmem_team_create_ctx.tex +++ b/content/shmem_team_create_ctx.tex @@ -50,7 +50,7 @@ } \apinotes{ - Any contexts created from a team + None. } \end{apidefinition} From ebb66f2855f263ebe3eda9dd8daeda8a9538afdb Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 11 Dec 2018 14:44:03 -0600 Subject: [PATCH 083/319] Update error handling section to clarify and add better guidance --- content/error_handling.tex | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/content/error_handling.tex b/content/error_handling.tex index b5d5e3dd1..d2bd93317 100644 --- a/content/error_handling.tex +++ b/content/error_handling.tex @@ -1,10 +1,19 @@ -In many cases, \openshmem routines will guarantee the correct completion of operations without any need for programs to check for error states, diagnose system problems, or retry operations. For example, there are no error codes returned for remote memory operations. The implementation is expected to internally attempt any feasible checking and recovery to best guarantee completion as specified. However, there are also cases where routines return error codes to allow programs to detect problems that may be correctable at the application layer, e.g. invalid arguments to routines or requests for system resources that cannot be fulfilled at runtime. +In many cases, \openshmem routines will guarantee the correct completion of operations without any need for programs to check for error states, diagnose system problems, or retry operations. For example, there are no error codes returned for remote memory operations. The implementation is expected to internally attempt any feasible checking and recovery to best guarantee completion as specified. However, there are also cases where routines return error codes to allow programs to detect problems that may be correctable at the application layer, e.g. requests for system resources that cannot be fulfilled at runtime. -\CorCpp routines that return an integer error code follow the convention that \CONST{0} indicates successful local completion of the operation. This is considered a best effort of the implementation to indicate that all required local operations have been performed correctly inside the routine and the internal \openshmem state on the calling \ac{PE} is consistent with the description of the routine and its arguments upon completion. Implementations may use an integer return value from a routine to define other integer error codes specific to the implementation as long as those codes are not already explicitly defined for that routine. +\CorCpp routines that return an integer error code follow the convention that \CONST{0} indicates successful local completion of the operation. This is considered a best effort of the implementation to indicate that all required local operations have been performed correctly inside the routine and the internal \openshmem state on the calling \ac{PE} is consistent with the description of the routine and its arguments upon completion. Implementations may use an integer return value from a routine to define integer error codes specific to the implementation as long as those codes are not already explicitly defined for that routine. -Because \openshmem defines asynchronous communication operations, errors may not arise until sometime after the error-generating routine has returned control to the calling program. In these cases, the implementation might abort the application with an informative message or take other appropriate actions. So, a return value indicating success in a routine cannot be considered a guarantee that the routine will complete all future actions successfully. +Because \openshmem defines asynchronous communication operations, errors may arise at any time as communications proceed. In these cases, the implementation might generate error messages or abort the application when errors occur. The \openshmem specification cannot define these types of errors, and leaves it to the implementation to determine how these types of errors should be handled. Collective operations involving many \acp{PE} may return values indicating success while other \acp{PE} are still executing the collective operation. So, return values indicating success of a collective routine on one \ac{PE} do not by default indicate that all \acp{PE} involved in the collective operation will return from the routine successfully. If a collective routine provides any such global error checking behavior, it will be explicitly stated in the description of that routine. -\openshmem implementations for high performance production environments may do minimal internal error checking for performance reasons. Also, some errors may be undetectable or uncorrectable due to limitations in underlying technologies, resulting in undefined behaviors. For these reasons, \openshmem does not provide strict error checking guarantees in most cases. For \CorCpp routines returning integer error codes, implementations are expected to detect certain error conditions as explicitly defined in individual routines, then make best efforts to detect other problems in routine completion and return implementation specific nonzero error codes where feasible. +\openshmem implementations for production environments may do minimal internal error checking for performance reasons. Also, some errors may be undetectable or uncorrectable due to limitations in underlying technologies, resulting in undefined behaviors. For these reasons, \openshmem does not provide strict error checking guarantees in most cases. For \CorCpp routines returning integer error codes, implementations are guided, but not required, to do the following: + +\begin{itemize} +\item Return nonzero error codes and avoid application abort behaviors for resource allocation failure. +\item[] Examples of resource allocation failure are: context creation failure due to lack of network resources, team context creation failure due to lack of team resources, team creation failure due to lack of network or symmetric memory resources. +\item Return nonzero error codes when encountering errors that are likely transient and are not likely to result in the application or library becoming inconsistent. +\item Abort the application with an informative message if allowing it to continue would likely result in the application or library state becoming inconsistent. +\end{itemize} + + From a237b9769aabf419f359aa195ab85e517760cc15 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 12 Dec 2018 11:40:25 -0600 Subject: [PATCH 084/319] Updated team config section for hints --- content/shmem_team_config_t.tex | 75 ++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/content/shmem_team_config_t.tex b/content/shmem_team_config_t.tex index 440512bbc..b87c02802 100644 --- a/content/shmem_team_config_t.tex +++ b/content/shmem_team_config_t.tex @@ -4,10 +4,9 @@ \begin{apidefinition} - \begin{Csynopsis} +\begin{Csynopsis} typedef struct { - int disable_collectives; - int return_local_limit; + int no_collectives; int num_contexts; } shmem_team_config_t; \end{Csynopsis} @@ -18,28 +17,23 @@ A team configuration argument acts as both input and output to the \FUNC{shmem\_team\_split\_*} routines. As an input, it specifies the requested capabilities of the team to be - created. - As an output, the configuration argument is conditionally updated on - whether team creation is successful. - If successful, the configuration argument is not modified; - if unsuccessful, it is updated to specify the limiting configuration - parameter(s). + created. Capabilities can be requested as either hints or requirements. - The \VAR{disable\_collectives} member allows for teams to be created + If given configuration parameter input is a requirement, and the team creation + cannot provide this capability, then team creation fails. + If a given configuration parameter input is a hint, and the library + succeeds in creating the team, the parameter will be updated to + the actual configuration that the library was able to provide + during team creation. + + The \VAR{no\_collectives} member allows for teams to be created without support for collective communications, which allows implementations to reduce team creation overheads for those teams. - When its value is zero, it specifies that the team should have collectives - enabled. + When its value is zero, it specifies that the team should have collective + operations enabled. When nonzero, the team will not support collective operations, which allows implementations to reduce team creation overheads. - The \VAR{return\_local\_limit} member controls whether, after a failed - team creation, the team configuration argument is updated with the - locally restrictive parameter(s) or the most restrictive parameter(s) - across the \acp{PE} of the new team. - When its value is zero, the most restrictive parameters are returned; - otherwise, the locally restrictive parameters are returned. - The \VAR{num\_contexts} member specifies the total number of contexts created from this team that can simultaneously exist. These contexts may be created in any number of threads. A program @@ -50,42 +44,49 @@ Section~\ref{subsec:shmem_team_create_ctx} for team-based context creation. When using the configuration structure to create teams, a mask parameter - controls which fields to use and which to ignore. So, a program does - not have to set all fields in the config struct; only those for which - it does not want the default values. + controls which fields to use and whether they are hints or requirements. + Any configuration parameter that is not indicated in the mask will be + ignored. + So, a program does not have to set all fields in the config struct; + only those for which it does not want the default values. A configuration mask value is created by combining individual field masks with through a bitwise OR operation of the following library constants: { - \apitablerow{\LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE}}{ + \apitablerow{\LibConstRef{SHMEM\_TEAM\_NO\_COLLECTIVES}}{ The team should be created using the value of the - \VAR{disable\_collectives} member of the configuration parameter - \VAR{config}. + \VAR{no\_collectives} member of the configuration parameter + \VAR{config} as a requirement. } - \apitablerow{\LibConstRef{SHMEM\_TEAM\_LOCAL\_LIMIT}}{ + \apitablerow{\LibConstRef{SHMEM\_TEAM\_NO\_COLLECTIVES\_HINT}}{ The team should be created using the value of the - \VAR{return\_local\_limit} member of the configuration parameter - \VAR{config}. + \VAR{no\_collectives} member of the configuration parameter + \VAR{config} as a hint. } \apitablerow{\LibConstRef{SHMEM\_TEAM\_NUM\_CONTEXTS}}{ The team should be created using the value of the - \VAR{num\_contexts} member of the configuration parameter \VAR{config}. + \VAR{num\_contexts} member of the configuration parameter + \VAR{config} as a requirement. } + \apitablerow{\LibConstRef{SHMEM\_TEAM\_NUM\_CONTEXTS\_HINT}}{ + The team should be created using the value of the + \VAR{num\_contexts} member of the configuration parameter + \VAR{config} as a hint. + } } + If a program creates a mask using both the requirement and hint flag + for a given parameter, the behavior is undefined. + A configuration mask value of \CONST{0} indicates that the team should be created with the default values for all configuration parameters, as follows: { - \apitablerow{disable\_collectives = \CONST{0}}{ + \apitablerow{no\_collectives = \CONST{0}}{ By default, teams support collective operations } - \apitablerow{return\_local\_limit = \CONST{0}}{ - By default, when team creation fails, the configuration structure returns the most restrictive - parameter value across all \acp{PE} in the new team - } \apitablerow{num\_contexts = \CONST{0}}{ By default, no contexts can be created on a new team } @@ -94,7 +95,11 @@ } \apinotes{ - None. + The library can set a team configuration to indicate that collective operations are + disabled without providing some optimized version of teams without support for + collective operations. Disabling collectives is a portable way to + provide optimizations on some implementations, not a promise that teams with + this option will reduce overheads on all implementations. } \end{apidefinition} From 5d802296e59a6008c5b3143f41e06a86584aa522 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 12 Dec 2018 11:40:52 -0600 Subject: [PATCH 085/319] Updated library constant section for team config hints --- content/library_constants.tex | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/content/library_constants.tex b/content/library_constants.tex index f68ea2c51..e38f94622 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -40,21 +40,21 @@ \tabularnewline \hline %% \color{Green} -\LibConstDecl{SHMEM\_TEAM\_NOCOLLECTIVE} & +\LibConstDecl{SHMEM\_TEAM\_NO\_COLLECTIVES} & \color{Green} The bitwise flag which specifies that a team creation routine should use the -\VAR{disable\_collectives} member of the provided -\CTYPE{shmem\_team\_config\_t} configuration parameter. +\VAR{no\_collectives} member of the provided +\CTYPE{shmem\_team\_config\_t} configuration parameter as a requirement. See Sections~\ref{subsec:shmem_team_config_t} and \ref{subsec:shmem_team_split_strided} for more detail about its use. \tabularnewline \hline %% \color{Green} -\LibConstDecl{SHMEM\_TEAM\_LOCAL\_LIMIT} & +\LibConstDecl{SHMEM\_TEAM\_NO\_COLLECTIVES\_HINT} & \color{Green} The bitwise flag which specifies that a team creation routine should use the -\VAR{return\_local\_limit} member of the provided -\CTYPE{shmem\_team\_config\_t} configuration parameter. +\VAR{no\_collectives} member of the provided +\CTYPE{shmem\_team\_config\_t} configuration parameter as a hint. See Sections~\ref{subsec:shmem_team_config_t} and \ref{subsec:shmem_team_split_strided} for more detail about its use. \tabularnewline \hline @@ -64,7 +64,17 @@ \color{Green} The bitwise flag which specifies that a team creation routine should use the \VAR{num\_contexts} member of the provided -\CTYPE{shmem\_team\_config\_t} configuration parameter. +\CTYPE{shmem\_team\_config\_t} configuration parameter as a requirement. +See Sections~\ref{subsec:shmem_team_config_t} and +\ref{subsec:shmem_team_split_strided} for more detail about its use. +\tabularnewline \hline +%% +\color{Green} +\LibConstDecl{SHMEM\_TEAM\_NUM\_CONTEXTS\_HINT} & +\color{Green} +The bitwise flag which specifies that a team creation routine should use the +\VAR{num\_contexts} member of the provided +\CTYPE{shmem\_team\_config\_t} configuration parameter as a hint. See Sections~\ref{subsec:shmem_team_config_t} and \ref{subsec:shmem_team_split_strided} for more detail about its use. \tabularnewline \hline From 04df4910b64de231e8c443e048f4a1f63c260b9f Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 12 Dec 2018 11:42:04 -0600 Subject: [PATCH 086/319] Update team get config to remove comments --- content/shmem_team_get_config.tex | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/content/shmem_team_get_config.tex b/content/shmem_team_get_config.tex index 1610e6832..5a8e6f2e2 100644 --- a/content/shmem_team_get_config.tex +++ b/content/shmem_team_get_config.tex @@ -16,17 +16,8 @@ \apidescription{ \FUNC{shmem\_team\_get\_config} returns through the \VAR{config} argument -the configuration parameters of the given team, which were specified when the -team was created. - -\begin{FeedbackRequest} -A library implementation must apply all requested options to a team, even in -the event that the library does not make optimizations based on these options. -For example, suppose library implementation must always create teams with the same -overhead, no matter if the program disables collective support during team creation. -The library must still enable the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} option -when it is requested, so that the \openshmem program will be portable across implementations. -\end{FeedbackRequest} +the configuration parameters of the given team, which were assigned according +to input configuration parameters when the team was created. All \acp{PE} in the team will get back the same parameter values for the team options. @@ -40,8 +31,8 @@ \apinotes{ A use case for this function is to determine whether a given team can -support collective operations by testing for the \LibConstRef{SHMEM\_TEAM\_NOCOLLECTIVE} -option. When teams are created without support for collectives, they may still use +support collective operations. +When teams are created without support for collective operations, they may still use point to point operations to communicate and synchronize. So programmers may wish to design frameworks with functions that provide alternative algorithms for teams based on whether they do or do not support collectives. From d22a5ccc385f766a8081160117995d0dca7819b9 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 12 Dec 2018 11:42:39 -0600 Subject: [PATCH 087/319] Update teams intro to remove text for old disable collectives syntax --- content/shmem_teams_intro.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/shmem_teams_intro.tex b/content/shmem_teams_intro.tex index a92f0af83..3eae15b1b 100644 --- a/content/shmem_teams_intro.tex +++ b/content/shmem_teams_intro.tex @@ -55,8 +55,8 @@ synchronization requirements. \begin{itemize} \item There is a special case where all new teams resulting from a split operation are -created with the \LibHandleRef{SHMEM\_TEAM\_NOCOLLECTIVE} option and are configured -to create \CONST{0} new contexts for the team. This means the team will \emph{not} be used +created are configured without support for collective operations and indicating that \CONST{0} new contexts will be created from the team. +This means the team will \emph{not} be used directly for point-to-point or collective communication routines, but only for \ac{PE} number translation and further split operations. When creating new teams of this type, no synchronization will be required around team creation operations. @@ -73,7 +73,7 @@ complete each split operation on $T_0$ before the next split operation on $T_0$ commences. \item In the case where the above conditions hold to require a barrier on $T_0$, but the parent team $T_0$ cannot be used for barriers due to having -been created with the \LibHandleRef{SHMEM\_TEAM\_NOCOLLECTIVE} option, the program may use any +been created with collective operations disabled, the program may use any ancestor team, e.g. \LibHandleRef{SHMEM\_TEAM\_WORLD}, for synchronization around new team creation. The program may alternatively use some other custom synchronization method as long as it ensures that each split collective completes for all \acp{PE} From fd54dd320325dc67f1cf0f70e2857a931eb5f91e Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Thu, 24 May 2018 09:56:36 -0500 Subject: [PATCH 088/319] Add initial support for put with signal --- content/shmem_put_signal.tex | 98 +++++++++++++++++++++++++ example_code/shmem_put_signal_example.c | 60 +++++++++++++++ main_spec.tex | 3 + utils/defs.tex | 1 + 4 files changed, 162 insertions(+) create mode 100644 content/shmem_put_signal.tex create mode 100644 example_code/shmem_put_signal_example.c diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex new file mode 100644 index 000000000..8cc9e3841 --- /dev/null +++ b/content/shmem_put_signal.tex @@ -0,0 +1,98 @@ +\color{Green} +\apisummary{ + The put with signal routines provide a method for copying data from a + contiguous local data object to a data object on a specified \ac{PE} + and set a remote flag to signal completion. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +\end{C11synopsis} +where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +\end{Csynopsis} +where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. + +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +\end{CsynopsisCol} +where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. + +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_putmem\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_putmem\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +\end{CsynopsisCol} + +\begin{apiarguments} + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This + data object must be remotely accessible.} + \apiargument{IN}{source}{Data object containing the data to be copied.} + \apiargument{IN}{nelems}{Number of elements in the \VAR{dest} and \VAR{source} + arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd. When using + \Fortran, it must be a constant, variable, or array element of default + integer type.} + \apiargument{OUT}{sig\_addr}{Signal data object to be updated on the remote + \ac{PE} to be updated as the signal. This signal data object must be + remotely accessible and it can be in the same or differnt memory segment + as the \VAR{dest} data object.} + \apiargument{IN}{signal}{Unsigned 64-bit value used to set the remote + \VAR{sig\_addr} signal data object.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}. \VAR{pe} must be + of type integer. When using \Fortran, it must be a constant, variable, + or array element of default integer type.} +\end{apiarguments} + +\apidescription{ + The routines return after the data has been copied out of the \source{} + array on the local \ac{PE}. The delivery of \signal flag on the remote + \ac{PE} guarantees the delivery of data words into the data object on the + remote \ac{PE}. Furthermore, two successive put with signal routines or + a successive put followed by a put with signal routine may deliver data + out of order unless a call to \FUNC{shmem\_fence} is introduced between + the two calls and the delivery of the \signal flag on the remote \ac{PE} + guarantees only the delivery of its corresponding data object on the + remote \ac{PE}. + } + +\apidesctable{ + The \dest{} and \source{} data objects must conform to certain typing + constraints, which are as follows:} + {Routine}{Data type of \VAR{dest} and \VAR{source}} + \apitablerow{shmem\_putmem}{Any data type. nelems is scaled in bytes.} + \apitablerow{shmem\_put8}{Any noncharacter type that + has a storage size equal to \CONST{8} bits.} + \apitablerow{shmem\_put16}{Any noncharacter type that + has a storage size equal to \CONST{16} bits.} + \apitablerow{shmem\_put32}{Any noncharacter type + that has a storage size equal to \CONST{32} bits.} + \apitablerow{shmem\_put64}{Any noncharacter type that + has a storage size equal to \CONST{64} bits.} + \apitablerow{shmem\_put128}{Any noncharacter type that has a + storage size equal to \CONST{128} bits.} + +\apireturnvalues{ + None. +} +\apinotes{ +} + +\begin{apiexamples} + +\apicexample + { The following \FUNC{shmem\_put\_signal} example is for \Cstd[11] programs:} + {./example_code/shmem_put_signal_example.c} + {} +\end{apiexamples} + +\end{apidefinition} +\color{Black} diff --git a/example_code/shmem_put_signal_example.c b/example_code/shmem_put_signal_example.c new file mode 100644 index 000000000..f43e615e3 --- /dev/null +++ b/example_code/shmem_put_signal_example.c @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +#define ITERATIONS (100) +#define MAX_SIZE (2<<18) + +int +main(int argc, char* argv[]) +{ + shmem_init(); + + int me = shmem_my_pe(); + int n = shmem_n_pes(); + int r = ITERATIONS; + size_t bloat = MAX_SIZE; + size_t size; + + for (size = 1; size < bloat; size*=2) { + uint64_t* message = malloc(size * sizeof(uint64_t)); + uint64_t* data = shmem_malloc(r * size * sizeof(uint64_t)); + uint64_t* signals = shmem_malloc(r * sizeof(uint64_t)); + + memset(message, 0, size * sizeof(uint64_t)); + memset(data, 0, r * size * sizeof(uint64_t)); + memset(signals, 0, r * sizeof(uint64_t)); + shmem_barrier_all(); + + message[0] = 10; + int i; + for (i = 0; i < r; i++) { + int j = i - (me == 0); + if (j >= 0) { + shmem_long_wait_until((long *)&signals[j], + SHMEM_CMP_EQ, 1); + message[0] = data[j * size] + 10; + } + int pe = (me + 1) % n; + shmemx_putmem_signal(&data[i * size], message, + size * sizeof(uint64_t), + &signals[i], 1, pe); + } + if (me == 0) { + shmem_long_wait_until((long *)&signals[r-1], + SHMEM_CMP_EQ, 1); + printf("Final message = %lu for size %zu\n", + data[(r-1) * size], size); + } + + shmem_barrier_all(); + shmem_free(signals); + shmem_free(data); + free(message); + shmem_barrier_all(); + } + + shmem_finalize(); + return 0; +} diff --git a/main_spec.tex b/main_spec.tex index d27dae188..2f8e7c4c1 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -170,6 +170,9 @@ \subsubsection{\textbf{SHMEM\_P}}\label{subsec:shmem_p} \subsubsection{\textbf{SHMEM\_IPUT}}\label{subsec:shmem_iput} \input{content/shmem_iput.tex} +\subsubsection{\textbf{SHMEM\_PUT\_SIGNAL}}\label{subsec:shmem_put_signal} +\input{content/shmem_put_signal.tex} + \subsubsection{\textbf{SHMEM\_GET}}\label{subsec:shmem_get} \input{content/shmem_get.tex} diff --git a/utils/defs.tex b/utils/defs.tex index d7161ecdb..b261712bd 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -59,6 +59,7 @@ \newcommand{\source}{\textit{source}} \newcommand{\dest}{\textit{dest}} +\newcommand{\signal}{\textit{signal}} \newcommand{\PUT}{\textit{Put}} \newcommand{\GET}{\textit{Get}} \newcommand{\OPR}[1]{\textit{#1}} From 2c4075cb6bff0d9843ea8d70d251731314daeee5 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Thu, 24 May 2018 13:27:01 -0500 Subject: [PATCH 089/319] Update put+signal after RMA WG meeting --- content/shmem_put_signal.tex | 57 +++++++++++++----------------------- 1 file changed, 21 insertions(+), 36 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 8cc9e3841..5efd326f2 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -38,58 +38,43 @@ data object must be remotely accessible.} \apiargument{IN}{source}{Data object containing the data to be copied.} \apiargument{IN}{nelems}{Number of elements in the \VAR{dest} and \VAR{source} - arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd. When using - \Fortran, it must be a constant, variable, or array element of default - integer type.} - \apiargument{OUT}{sig\_addr}{Signal data object to be updated on the remote - \ac{PE} to be updated as the signal. This signal data object must be - remotely accessible and it can be in the same or differnt memory segment - as the \VAR{dest} data object.} + arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd.} + \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote + \ac{PE} as the signal. This signal data object must be + remotely accessible.} \apiargument{IN}{signal}{Unsigned 64-bit value used to set the remote \VAR{sig\_addr} signal data object.} - \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}. \VAR{pe} must be - of type integer. When using \Fortran, it must be a constant, variable, - or array element of default integer type.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} \end{apiarguments} \apidescription{ The routines return after the data has been copied out of the \source{} - array on the local \ac{PE}. The delivery of \signal flag on the remote - \ac{PE} guarantees the delivery of data words into the data object on the - remote \ac{PE}. Furthermore, two successive put with signal routines or - a successive put followed by a put with signal routine may deliver data - out of order unless a call to \FUNC{shmem\_fence} is introduced between - the two calls and the delivery of the \signal flag on the remote \ac{PE} - guarantees only the delivery of its corresponding data object on the - remote \ac{PE}. - } - -\apidesctable{ - The \dest{} and \source{} data objects must conform to certain typing - constraints, which are as follows:} - {Routine}{Data type of \VAR{dest} and \VAR{source}} - \apitablerow{shmem\_putmem}{Any data type. nelems is scaled in bytes.} - \apitablerow{shmem\_put8}{Any noncharacter type that - has a storage size equal to \CONST{8} bits.} - \apitablerow{shmem\_put16}{Any noncharacter type that - has a storage size equal to \CONST{16} bits.} - \apitablerow{shmem\_put32}{Any noncharacter type - that has a storage size equal to \CONST{32} bits.} - \apitablerow{shmem\_put64}{Any noncharacter type that - has a storage size equal to \CONST{64} bits.} - \apitablerow{shmem\_put128}{Any noncharacter type that has a - storage size equal to \CONST{128} bits.} + array on the local \ac{PE}. The delivery of \signal{} flag on the remote + \ac{PE} guarantees the delivery of its corresponding \dest{} data words + into the data object on the remote \ac{PE}. +} \apireturnvalues{ None. } + \apinotes{ + The \VAR{sig\_addr} data object can be in the same or different memory + segment as the \VAR{dest} data object. + + The delivery of \signal{} flag on the remote \ac{PE} guarantees only the + delivery of its corresponding \dest{} data words into the data object on + the remote \ac{PE}. For example, two successive put with signal routines + or a successive put followed by a put with signal routine may deliver data + out of order unless a call to \FUNC{shmem\_fence} is introduced between + the two calls. } \begin{apiexamples} \apicexample - { The following \FUNC{shmem\_put\_signal} example is for \Cstd[11] programs:} + { The following example is for the \FUNC{shmem\_put\_signal} usage for + ping-pong programs:} {./example_code/shmem_put_signal_example.c} {} \end{apiexamples} From 83fd993cfc0b9b1f96171b0be6a2a284a20a9eee Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Sat, 26 May 2018 03:36:10 -0500 Subject: [PATCH 090/319] RM memory segment reference from put_signal notes --- content/shmem_put_signal.tex | 8 ++++---- utils/defs.tex | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 5efd326f2..fa00ccd43 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -59,15 +59,15 @@ } \apinotes{ - The \VAR{sig\_addr} data object can be in the same or different memory - segment as the \VAR{dest} data object. + The \VAR{sig\_addr} data object can be placed in the symmetric data segment + or the symmetric heap which can be same or different from the \VAR{dest} + data object. The delivery of \signal{} flag on the remote \ac{PE} guarantees only the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. For example, two successive put with signal routines or a successive put followed by a put with signal routine may deliver data - out of order unless a call to \FUNC{shmem\_fence} is introduced between - the two calls. + out of order. } \begin{apiexamples} diff --git a/utils/defs.tex b/utils/defs.tex index b261712bd..a33d57764 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -409,14 +409,14 @@ \textbf{C11:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t, uint64_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisCol} { \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, uint64_t}, aboveskip=0pt, belowskip=0pt}}{} @@ -425,7 +425,7 @@ \textbf{C/C++:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, uint64_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisST} From 5680e752631f3703bda64df4b0d8304639c63664 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Sat, 26 May 2018 04:13:39 -0500 Subject: [PATCH 091/319] rm ping-pong and add ring-bcast for put_sig example --- content/shmem_put_signal.tex | 4 +- example_code/shmem_put_signal_example.c | 77 +++++++++++-------------- utils/defs.tex | 2 +- 3 files changed, 36 insertions(+), 47 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index fa00ccd43..c5b12772b 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -73,8 +73,8 @@ \begin{apiexamples} \apicexample - { The following example is for the \FUNC{shmem\_put\_signal} usage for - ping-pong programs:} + { The following example shows a simple ring-based broacast operation using + \FUNC{shmem\_put\_signal}:} {./example_code/shmem_put_signal_example.c} {} \end{apiexamples} diff --git a/example_code/shmem_put_signal_example.c b/example_code/shmem_put_signal_example.c index f43e615e3..5d0fd6c4d 100644 --- a/example_code/shmem_put_signal_example.c +++ b/example_code/shmem_put_signal_example.c @@ -1,60 +1,49 @@ #include #include -#include #include -#define ITERATIONS (100) -#define MAX_SIZE (2<<18) +#define MAX_SIZE (2<<10) +#define VAL_USED 10 int main(int argc, char* argv[]) { + int i, err_count = 0; + shmem_init(); + size_t size = MAX_SIZE; int me = shmem_my_pe(); int n = shmem_n_pes(); - int r = ITERATIONS; - size_t bloat = MAX_SIZE; - size_t size; - - for (size = 1; size < bloat; size*=2) { - uint64_t* message = malloc(size * sizeof(uint64_t)); - uint64_t* data = shmem_malloc(r * size * sizeof(uint64_t)); - uint64_t* signals = shmem_malloc(r * sizeof(uint64_t)); - - memset(message, 0, size * sizeof(uint64_t)); - memset(data, 0, r * size * sizeof(uint64_t)); - memset(signals, 0, r * sizeof(uint64_t)); - shmem_barrier_all(); - - message[0] = 10; - int i; - for (i = 0; i < r; i++) { - int j = i - (me == 0); - if (j >= 0) { - shmem_long_wait_until((long *)&signals[j], - SHMEM_CMP_EQ, 1); - message[0] = data[j * size] + 10; - } - int pe = (me + 1) % n; - shmemx_putmem_signal(&data[i * size], message, - size * sizeof(uint64_t), - &signals[i], 1, pe); - } - if (me == 0) { - shmem_long_wait_until((long *)&signals[r-1], - SHMEM_CMP_EQ, 1); - printf("Final message = %lu for size %zu\n", - data[(r-1) * size], size); - } - - shmem_barrier_all(); - shmem_free(signals); - shmem_free(data); - free(message); - shmem_barrier_all(); + int pe = (me + 1)%n; + + uint64_t* message = malloc(size * sizeof(uint64_t)); + uint64_t* data = shmem_malloc(size * sizeof(uint64_t)); + uint64_t* signals = shmem_malloc(sizeof(uint64_t)); + + signals[0] = 0; + for (i = 0; i < size; i++) { + message[i] = VAL_USED; + data[i] = 0; + } + shmem_barrier_all(); + + if (me != 0) { + shmem_long_wait_until((long *)&signals[0], SHMEM_CMP_EQ, 1); } + shmemx_putmem_signal(data, message, size*sizeof(uint64_t), + &signals[0], 1, pe); + + if (me == 0) { + shmem_long_wait_until((long *)&signals[0], SHMEM_CMP_EQ, 1); + printf("BCAST with put with signal is complete\n"); + } + + free(message); + shmem_free(data); + shmem_free(signals); + shmem_finalize(); - return 0; + return 0; } diff --git a/utils/defs.tex b/utils/defs.tex index a33d57764..60e8cd101 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -517,7 +517,7 @@ ##1 \lstinputlisting[language={C}, tabsize=2, basicstyle=\ttfamily\footnotesize, - morekeywords={size_t, ptrdiff_t, shmem_ctx_t, _Thread_local}]{##2} + morekeywords={size_t, ptrdiff_t, shmem_ctx_t, _Thread_local, uint64_t}]{##2} ##3 } \newcommand{\apifexample}[3]{ ##1 From 7c8e3bb21da5e66536c5dbf234d8864d87ee5a43 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Sat, 26 May 2018 04:23:04 -0500 Subject: [PATCH 092/319] add changelog for blocking put_signal --- content/backmatter.tex | 3 +++ 1 file changed, 3 insertions(+) diff --git a/content/backmatter.tex b/content/backmatter.tex index 647278eac..107652419 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -510,6 +510,9 @@ \section{Version 1.5} The following list describes the specific changes in \openshmem[1.5]: \begin{itemize} % +\item Added support for blocking put with signal functions. +\\ See Section \ref{subsec:shmem_put_signal}. +% \item Specified the validity of communication contexts, added the constant \CONST{SHMEM\_CTX\_INVALID}, and clarified the behavior of \FUNC{shmem\_ctx\_*} routines on invalid contexts. From 86981ad9a433664c1118a082fafd1520374b5549 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 29 Jun 2018 09:43:31 -0500 Subject: [PATCH 093/319] Implement initial reviews for put-with-sig --- content/shmem_put_signal.tex | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index c5b12772b..dd8e7c3da 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -1,8 +1,8 @@ \color{Green} \apisummary{ - The put with signal routines provide a method for copying data from a + The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} - and set a remote flag to signal completion. + and subsequently setting a remote flag to signal completion. } \begin{apidefinition} @@ -59,21 +59,23 @@ } \apinotes{ - The \VAR{sig\_addr} data object can be placed in the symmetric data segment - or the symmetric heap which can be same or different from the \VAR{dest} - data object. + The \VAR{dest} and \VAR{sig\_addr} data object must both be remotely + accessible, but may each be allocated from the symmetric heap or global/ + static memory. - The delivery of \signal{} flag on the remote \ac{PE} guarantees only the + The delivery of \signal{} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on - the remote \ac{PE}. For example, two successive put with signal routines - or a successive put followed by a put with signal routine may deliver data - out of order. + the remote \ac{PE}. Without a memory-ordering operation, there is no implied + ordering between the delivery of the signal word of a put-with-signal + routine and another data transfer. For example, the delivery of the signal + word in a sequence consisting of a put routine followed by a put-with-signal + routine does not imply delivery of the put routine's data. } \begin{apiexamples} \apicexample - { The following example shows a simple ring-based broacast operation using + { The following example shows a simple ring-based broadcast operation using \FUNC{shmem\_put\_signal}:} {./example_code/shmem_put_signal_example.c} {} From 643f4d0d0f89351fa232faeb49665da0ae13585a Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 29 Jun 2018 09:46:13 -0500 Subject: [PATCH 094/319] Implement reviews for put-with-sig - draft 2 --- content/shmem_put_signal.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index dd8e7c3da..6e366193d 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -50,7 +50,7 @@ \apidescription{ The routines return after the data has been copied out of the \source{} array on the local \ac{PE}. The delivery of \signal{} flag on the remote - \ac{PE} guarantees the delivery of its corresponding \dest{} data words + \ac{PE} indicates the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. } From fce2079fce5cd0c7ab3ccd38655b21d6fff4697b Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Mon, 23 Jul 2018 16:01:49 -0500 Subject: [PATCH 095/319] Fix example code after put-with-signal reading --- example_code/shmem_put_signal_example.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/example_code/shmem_put_signal_example.c b/example_code/shmem_put_signal_example.c index 5d0fd6c4d..cd10da1f9 100644 --- a/example_code/shmem_put_signal_example.c +++ b/example_code/shmem_put_signal_example.c @@ -1,48 +1,41 @@ #include -#include #include +#include -#define MAX_SIZE (2<<10) -#define VAL_USED 10 - -int -main(int argc, char* argv[]) +int main(void) { int i, err_count = 0; shmem_init(); - size_t size = MAX_SIZE; + size_t size = (2<<10); int me = shmem_my_pe(); int n = shmem_n_pes(); int pe = (me + 1)%n; - uint64_t* message = malloc(size * sizeof(uint64_t)); uint64_t* data = shmem_malloc(size * sizeof(uint64_t)); - uint64_t* signals = shmem_malloc(sizeof(uint64_t)); + static uint64_t sig_addr = 0; - signals[0] = 0; for (i = 0; i < size; i++) { - message[i] = VAL_USED; + message[i] = me; data[i] = 0; } shmem_barrier_all(); if (me != 0) { - shmem_long_wait_until((long *)&signals[0], SHMEM_CMP_EQ, 1); + shmem_uint64_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); } - shmemx_putmem_signal(data, message, size*sizeof(uint64_t), - &signals[0], 1, pe); + shmem_putmem_signal(data, message, size*sizeof(uint64_t), + &sig_addr, 1, pe); if (me == 0) { - shmem_long_wait_until((long *)&signals[0], SHMEM_CMP_EQ, 1); + shmem_uint64_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); printf("BCAST with put with signal is complete\n"); } free(message); shmem_free(data); - shmem_free(signals); shmem_finalize(); return 0; From a67a5f3b084a53f6b45ffedb59c15e155392ccb6 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Tue, 7 Aug 2018 13:09:03 -0500 Subject: [PATCH 096/319] Reword signal argument defn as per review --- content/shmem_put_signal.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 6e366193d..dfa214c20 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -42,8 +42,8 @@ \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote \ac{PE} as the signal. This signal data object must be remotely accessible.} - \apiargument{IN}{signal}{Unsigned 64-bit value used to set the remote - \VAR{sig\_addr} signal data object.} + \apiargument{IN}{signal}{Unsigned 64-bit value that is assigned to the + remote \VAR{sig\_addr} signal data object.} \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} \end{apiarguments} From 8f55c039d4a6b9891628b8a4ac70ca13ff429500 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Wed, 8 Aug 2018 08:08:14 -0500 Subject: [PATCH 097/319] Update sig_addr dst data object kind explanation --- content/shmem_put_signal.tex | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index dfa214c20..86481bd6b 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -59,9 +59,10 @@ } \apinotes{ - The \VAR{dest} and \VAR{sig\_addr} data object must both be remotely - accessible, but may each be allocated from the symmetric heap or global/ - static memory. + The \VAR{dest} and \VAR{sig\_addr} data objects must both be remotely + accessible. The \VAR{sig\_addr} and \VAR{dest} could be of different kinds, + for example, one could be a global/static \Cstd variable and the other could + be allocated on the symmetric heap. The delivery of \signal{} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on From dc36e68facac42b9b3e9a8d3530e6bc77f524d5d Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Mon, 10 Sep 2018 12:55:41 -0500 Subject: [PATCH 098/319] Add put-with-signal in p2p-sync intro --- content/p2p_sync_intro.tex | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/content/p2p_sync_intro.tex b/content/p2p_sync_intro.tex index 8855716ab..11cf4474b 100644 --- a/content/p2p_sync_intro.tex +++ b/content/p2p_sync_intro.tex @@ -3,8 +3,9 @@ object. The point-to-point synchronization routines can be used to portably ensure that memory access operations observe remote updates in the order enforced by -the initiator \ac{PE} using the \FUNC{shmem\_fence} and \FUNC{shmem\_quiet} -routines. +the initiator \ac{PE} using the put-with-signal(refer +section~\ref{subsec:shmem_put_signal}, \FUNC{shmem\_fence} and +\FUNC{shmem\_quiet} routines. Where appropriate compiler support is available, \openshmem provides type-generic point-to-point synchronization interfaces via \Cstd[11] generic From 34a33e5f8e2554be3da711ed196b97c4d4f120e7 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Mon, 10 Sep 2018 13:06:33 -0500 Subject: [PATCH 099/319] Add restrict qualifier to sig_addr We are adding the restricy qualifier to sig_addr data objects and expects it to not overlap or be same as the dest or source data objects. --- content/shmem_put_signal.tex | 21 +++++++++++---------- utils/defs.tex | 9 ++++++--- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 86481bd6b..f4e6bb992 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -1,4 +1,3 @@ -\color{Green} \apisummary{ The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} @@ -8,26 +7,26 @@ \begin{apidefinition} \begin{C11synopsis} -void @\FuncDecl{shmem\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_putmem\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_putmem\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_putmem\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_putmem\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); \end{CsynopsisCol} \begin{apiarguments} @@ -64,6 +63,9 @@ for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. + The restrict qualifier in \VAR{sig\_addr} expects the data object to be + distinct from \VAR{dest} and \VAR{source} data objects. + The delivery of \signal{} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. Without a memory-ordering operation, there is no implied @@ -83,4 +85,3 @@ \end{apiexamples} \end{apidefinition} -\color{Black} diff --git a/utils/defs.tex b/utils/defs.tex index 60e8cd101..d39b6303e 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -409,14 +409,16 @@ \textbf{C11:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t, uint64_t}, + morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t, + uint64_t, restrict}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisCol} { \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, uint64_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, + uint64_t, restrict}, aboveskip=0pt, belowskip=0pt}}{} @@ -425,7 +427,8 @@ \textbf{C/C++:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, uint64_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, + uint64_t, restrict}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisST} From ecfdb213adc1116073829960390374b5da1cc131 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 8 Oct 2018 16:19:35 -0500 Subject: [PATCH 100/319] RM reference to put-with-signal from p2p sync intro Previously, we had a statement (Refer Section...) in the p2p sync intro section. Removing that reference and just using the routine name directly. --- content/p2p_sync_intro.tex | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/content/p2p_sync_intro.tex b/content/p2p_sync_intro.tex index 11cf4474b..c2a2e1dbd 100644 --- a/content/p2p_sync_intro.tex +++ b/content/p2p_sync_intro.tex @@ -3,8 +3,7 @@ object. The point-to-point synchronization routines can be used to portably ensure that memory access operations observe remote updates in the order enforced by -the initiator \ac{PE} using the put-with-signal(refer -section~\ref{subsec:shmem_put_signal}, \FUNC{shmem\_fence} and +the initiator \ac{PE} using the put-with-signal, \FUNC{shmem\_fence} and \FUNC{shmem\_quiet} routines. Where appropriate compiler support is available, \openshmem provides From 5c67d334fb7c4d44cc5e3d7d382b5d245bbb19ac Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 8 Oct 2018 16:23:06 -0500 Subject: [PATCH 101/319] RM restrict qualifier from the put-with-signal usage --- content/shmem_put_signal.tex | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index f4e6bb992..f446e9b1f 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -7,26 +7,26 @@ \begin{apidefinition} \begin{C11synopsis} -void @\FuncDecl{shmem\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_putmem\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_putmem\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_putmem\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_putmem\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); \end{CsynopsisCol} \begin{apiarguments} @@ -63,9 +63,6 @@ for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. - The restrict qualifier in \VAR{sig\_addr} expects the data object to be - distinct from \VAR{dest} and \VAR{source} data objects. - The delivery of \signal{} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. Without a memory-ordering operation, there is no implied From 8cd7a55420435b1afeb7caec6736389cd4a8155c Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 8 Oct 2018 16:27:12 -0500 Subject: [PATCH 102/319] Add new context arg explanation in PWS --- content/shmem_put_signal.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index f446e9b1f..3360d1c30 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -30,9 +30,9 @@ \end{CsynopsisCol} \begin{apiarguments} - \apiargument{IN}{ctx}{The context on which to perform the operation. - When this argument is not provided, the operation is performed on - \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the operation. When this argument is not provided, the operation is + performed on the default context.} \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This data object must be remotely accessible.} \apiargument{IN}{source}{Data object containing the data to be copied.} From 47bc318046c3c543e64f076e3b664c105dab29cd Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 8 Oct 2018 17:02:33 -0500 Subject: [PATCH 103/319] Fix review comments for PWS example 1. Use calloc and avoid barriers from malloc and explicit calls 2. use all C11 generic shmem calls 3. Follow shmem bcast semantics - to bcast to source itself 4. convert wavefront-like transfer semantics to true bcast --- example_code/shmem_put_signal_example.c | 31 +++++++++++-------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/example_code/shmem_put_signal_example.c b/example_code/shmem_put_signal_example.c index cd10da1f9..22c1f4dff 100644 --- a/example_code/shmem_put_signal_example.c +++ b/example_code/shmem_put_signal_example.c @@ -1,37 +1,34 @@ -#include #include +#include #include +#include int main(void) { - int i, err_count = 0; + int i, err_count = 0; shmem_init(); - size_t size = (2<<10); - int me = shmem_my_pe(); - int n = shmem_n_pes(); - int pe = (me + 1)%n; - uint64_t* message = malloc(size * sizeof(uint64_t)); - uint64_t* data = shmem_malloc(size * sizeof(uint64_t)); + size_t size = 2048; + int me = shmem_my_pe(); + int n = shmem_n_pes(); + int pe = (me + 1) % n; + uint64_t * message = malloc(size * sizeof(uint64_t)); static uint64_t sig_addr = 0; for (i = 0; i < size; i++) { message[i] = me; - data[i] = 0; } - shmem_barrier_all(); + + uint64_t *data = shmem_calloc(size, sizeof(uint64_t)); if (me != 0) { - shmem_uint64_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); + shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); + memcpy(message, data, size * sizeof(uint64_t)); } - shmem_putmem_signal(data, message, size*sizeof(uint64_t), - &sig_addr, 1, pe); - - if (me == 0) { - shmem_uint64_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); - printf("BCAST with put with signal is complete\n"); + if (me != (n - 1)) { + shmem_put_signal(data, message, size, &sig_addr, 1, pe); } free(message); From 246605310212340f9ec0b2c6bb972e3437682554 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 8 Oct 2018 17:11:56 -0500 Subject: [PATCH 104/319] Duplicate PSW explanation from API summary to description --- content/shmem_put_signal.tex | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 3360d1c30..5a7a5fbd4 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -47,10 +47,13 @@ \end{apiarguments} \apidescription{ - The routines return after the data has been copied out of the \source{} - array on the local \ac{PE}. The delivery of \signal{} flag on the remote - \ac{PE} indicates the delivery of its corresponding \dest{} data words - into the data object on the remote \ac{PE}. + The put-with-signal routines provide a method for copying data from a + contiguous local data object to a data object on a specified \ac{PE} + and subsequently setting a remote flag to signal completion. The routines + return after the data has been copied out of the \source{} array on the + local \ac{PE}. The delivery of \signal{} flag on the remote \ac{PE} + indicates the delivery of its corresponding \dest{} data words into the + data object on the remote \ac{PE}. } \apireturnvalues{ From 2662d767241a080699562c44d3dd850000d7671a Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Thu, 11 Oct 2018 11:23:04 -0500 Subject: [PATCH 105/319] Update the put-with-signal example Previously, we performed bcast example with SHMEM bcast semantics, without any transfer to PE-0. Now, we perform bcast from PE-0 to all other PEs and itself. --- content/shmem_put_signal.tex | 6 ++++-- example_code/shmem_put_signal_example.c | 6 ++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 5a7a5fbd4..e8edb013d 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -78,8 +78,10 @@ \begin{apiexamples} \apicexample - { The following example shows a simple ring-based broadcast operation using - \FUNC{shmem\_put\_signal}:} + {The following example demonstrates the usage of \FUNC{shmem\_put\_signal}. + It shows the implementation of a broadcast operation from \ac{PE} 0 to + itself and all other \acp{PE} in the job as a simple ring-based algorithm + using \FUNC{shmem\_put\_signal}:} {./example_code/shmem_put_signal_example.c} {} \end{apiexamples} diff --git a/example_code/shmem_put_signal_example.c b/example_code/shmem_put_signal_example.c index 22c1f4dff..202ccbf0d 100644 --- a/example_code/shmem_put_signal_example.c +++ b/example_code/shmem_put_signal_example.c @@ -24,10 +24,8 @@ int main(void) if (me != 0) { shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); - memcpy(message, data, size * sizeof(uint64_t)); - } - - if (me != (n - 1)) { + shmem_put_signal(data, data, size, &sig_addr, 1, pe); + } else { shmem_put_signal(data, message, size, &sig_addr, 1, pe); } From 266564e2ab17bc5c066d20888bcb9ce7c22c9dac Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Thu, 18 Oct 2018 12:30:41 -0500 Subject: [PATCH 106/319] Adding overlapping semantics in put-with-signal --- content/shmem_put_signal.tex | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index e8edb013d..e657a7a00 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -66,6 +66,8 @@ for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. + The \VAR{sig\_addr} and \VAR{dest} may not be overlapping in memory. + The delivery of \signal{} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. Without a memory-ordering operation, there is no implied From 9e879c41709b0ffa2d8d9c9d4f6c4c6d50b0fc4b Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 22 Oct 2018 13:04:18 -0500 Subject: [PATCH 107/319] Reorder RMA operations in put-with-signal example Performing some quick cleanup on the put-with-signal example. --- example_code/shmem_put_signal_example.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/example_code/shmem_put_signal_example.c b/example_code/shmem_put_signal_example.c index 202ccbf0d..a0a4ed36b 100644 --- a/example_code/shmem_put_signal_example.c +++ b/example_code/shmem_put_signal_example.c @@ -22,11 +22,11 @@ int main(void) uint64_t *data = shmem_calloc(size, sizeof(uint64_t)); - if (me != 0) { + if (me == 0) { + shmem_put_signal(data, message, size, &sig_addr, 1, pe); + } else { shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); shmem_put_signal(data, data, size, &sig_addr, 1, pe); - } else { - shmem_put_signal(data, message, size, &sig_addr, 1, pe); } free(message); From 6c20a843aec34ef623af9f30eb94fd3298d28f1d Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Tue, 8 Jan 2019 12:18:58 -0600 Subject: [PATCH 108/319] Explicitly state the signal update is AMO Based on recent review comments, it looks like it would be more clear if we state that the signal update is an atomic operation. We have added this as part of the Notes to Implementers section. --- content/shmem_put_signal.tex | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index e657a7a00..15f9f0e4a 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -77,6 +77,13 @@ routine does not imply delivery of the put routine's data. } +\apiimpnotes{ + Implementations must ensure that put-with-signal routines are compatible + with all point-to-point synchronization interfaces. The delivery of + \signal{} flag on the remote \ac{PE} must not cause partial updates. This + requires the update on \signal{} flag to be an atomic memory operation. +} + \begin{apiexamples} \apicexample From 441d681208548d0b32a76f97424cc8fd9ff14a0f Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Wed, 9 Jan 2019 14:42:28 -0600 Subject: [PATCH 109/319] Move NBI put-with-signal apiimpnotes to apinotes Previously, we had the information about the signal updates atomicity guarantees in the notes to implementors section for put-with-signal. We are not now moving this into main notes section. We have also clarified the atomicity guarantees by refering to atomicty section. --- content/shmem_put_signal.tex | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 15f9f0e4a..17c461b7c 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -75,13 +75,12 @@ routine and another data transfer. For example, the delivery of the signal word in a sequence consisting of a put routine followed by a put-with-signal routine does not imply delivery of the put routine's data. -} -\apiimpnotes{ - Implementations must ensure that put-with-signal routines are compatible - with all point-to-point synchronization interfaces. The delivery of - \signal{} flag on the remote \ac{PE} must not cause partial updates. This - requires the update on \signal{} flag to be an atomic memory operation. + The put-with-signal routines are compatible with all point-to-point + synchronization interfaces. The delivery of \VAR{signal} flag on the remote + \ac{PE} must not cause partial updates. This requires the update on + \VAR{signal} flag to be an atomic operation, with atomicity guarantees + described in Section~\ref{subsec:amo_guarantees}. } \begin{apiexamples} From c8ab4837c7f8feccb9b36cdab8c533cbae011036 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Sat, 12 Jan 2019 12:32:54 -0600 Subject: [PATCH 110/319] Change put-with-signal in backmatter --- content/backmatter.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 107652419..915d9947a 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -510,7 +510,7 @@ \section{Version 1.5} The following list describes the specific changes in \openshmem[1.5]: \begin{itemize} % -\item Added support for blocking put with signal functions. +\item Added support for blocking put-with-signal functions. \\ See Section \ref{subsec:shmem_put_signal}. % \item Specified the validity of communication contexts, added the constant From 45fef4a5c41bc6410676819278049070de5c1428 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Sat, 12 Jan 2019 12:57:18 -0600 Subject: [PATCH 111/319] RM unnecessary \signal and use \VAR{signal} instead --- content/shmem_put_signal.tex | 4 ++-- utils/defs.tex | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 17c461b7c..38ba8f51f 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -51,7 +51,7 @@ contiguous local data object to a data object on a specified \ac{PE} and subsequently setting a remote flag to signal completion. The routines return after the data has been copied out of the \source{} array on the - local \ac{PE}. The delivery of \signal{} flag on the remote \ac{PE} + local \ac{PE}. The delivery of \VAR{signal} flag on the remote \ac{PE} indicates the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. } @@ -68,7 +68,7 @@ The \VAR{sig\_addr} and \VAR{dest} may not be overlapping in memory. - The delivery of \signal{} flag on the remote \ac{PE} indicates only the + The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. Without a memory-ordering operation, there is no implied ordering between the delivery of the signal word of a put-with-signal diff --git a/utils/defs.tex b/utils/defs.tex index d39b6303e..8bc13b224 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -59,7 +59,6 @@ \newcommand{\source}{\textit{source}} \newcommand{\dest}{\textit{dest}} -\newcommand{\signal}{\textit{signal}} \newcommand{\PUT}{\textit{Put}} \newcommand{\GET}{\textit{Get}} \newcommand{\OPR}[1]{\textit{#1}} From 5ad32c7ec612dd170c721718f8d7b41ed0b704a0 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Sat, 12 Jan 2019 13:00:46 -0600 Subject: [PATCH 112/319] Fix \VAR usage correctly for dest and source --- content/shmem_put_signal.tex | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 38ba8f51f..ddffa1a00 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -36,7 +36,7 @@ \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This data object must be remotely accessible.} \apiargument{IN}{source}{Data object containing the data to be copied.} - \apiargument{IN}{nelems}{Number of elements in the \VAR{dest} and \VAR{source} + \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd.} \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote \ac{PE} as the signal. This signal data object must be @@ -61,12 +61,12 @@ } \apinotes{ - The \VAR{dest} and \VAR{sig\_addr} data objects must both be remotely - accessible. The \VAR{sig\_addr} and \VAR{dest} could be of different kinds, + The \dest{} and \VAR{sig\_addr} data objects must both be remotely + accessible. The \VAR{sig\_addr} and \dest{} could be of different kinds, for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. - The \VAR{sig\_addr} and \VAR{dest} may not be overlapping in memory. + The \VAR{sig\_addr} and \dest{} may not be overlapping in memory. The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on From ada81c9327c699b72688ba586652f00b87b8b9ce Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Sat, 12 Jan 2019 13:06:34 -0600 Subject: [PATCH 113/319] RM unnecessary restrict qualifier from macros --- utils/defs.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/defs.tex b/utils/defs.tex index 8bc13b224..7772da02d 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -409,7 +409,7 @@ \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t, - uint64_t, restrict}, + uint64_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisCol} @@ -417,7 +417,7 @@ \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, - uint64_t, restrict}, + uint64_t}, aboveskip=0pt, belowskip=0pt}}{} @@ -427,7 +427,7 @@ \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, - uint64_t, restrict}, + uint64_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisST} From 0908edaacb1f0ab2d57cf5ddfeb6328942c08de7 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 14 Jan 2019 22:27:45 -0600 Subject: [PATCH 114/319] Reframe signal-put compatibility with p2p syncs --- content/shmem_put_signal.tex | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index ddffa1a00..2956ae879 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -76,11 +76,11 @@ word in a sequence consisting of a put routine followed by a put-with-signal routine does not imply delivery of the put routine's data. - The put-with-signal routines are compatible with all point-to-point - synchronization interfaces. The delivery of \VAR{signal} flag on the remote - \ac{PE} must not cause partial updates. This requires the update on - \VAR{signal} flag to be an atomic operation, with atomicity guarantees - described in Section~\ref{subsec:amo_guarantees}. + The signal set by the put-with-signal routines is compatible + with all point-to-point synchronization interfaces. The delivery of + \VAR{signal} flag on the remote \ac{PE} must not cause partial updates. This + requires the update on \VAR{signal} flag to be an atomic operation, with + atomicity guarantees described in Section~\ref{subsec:amo_guarantees}. } \begin{apiexamples} From 5f582566c66c3e2e8220fae5fa7eb19e8712528f Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Thu, 7 Feb 2019 10:23:21 -0600 Subject: [PATCH 115/319] Update collective intro to describe teams synch and threading model --- content/collective_intro.tex | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index 5365794f4..447a69ffa 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -28,15 +28,20 @@ or \LibConstRef{SHMEM\_TEAM\_NULL} is passed to a team-based collective routine, the behavior is undefined. -Team objects encapsulate the system resources required to complete team-based collective routines. -On completion of a team-based collective call, the team resources on the calling -\ac{PE} will be ready for the next collective call. However, other \acp{PE} in the -team may still be participating in the collective call, and therefore team -resources may still be in use on some \acp{PE} in the team after others have returned from -the collective routine. Before a subsequent call to a collective routine by the team, -the previous collective operation must be complete on all \acp{PE} in the team, -which can be ensured by a call to a synchronization routine, like \FUNC{shmem\_sync}, -by the team. +Team objects encapsulate the per \ac{PE} system resources required to complete +team-based collective routines. +All \openshmem teams-based collective calls are blocking routines which may use those +system resources. On completion of a team-based collective call, the \ac{PE} may +immediately call another collective on that same team without any other intervening +synchronization across the team. + +While \openshmem routines provide thread safety as per the requesting threading model +(see \ref{subsec:thread_support}), the teams object itself is not thread-safe. It is up +to the program to ensure that on a given \ac{PE}, there are no simutanously calls to routines +on a given team object, including all team based collective calls. + +Collective operations are matched across a given team based on ordering. So for a given team, +collectives must occur in the same order across all PEs in a team. The team-based collective routines defined in the \openshmem Specification are: @@ -50,6 +55,10 @@ \item \FUNC{shmem\_team\_alltoalls\{32, 64\}} \end{itemize} +In addition, all team creation functions are collective operations. In addition to the ordering +and thread safety requirements described here, there are additional synchronization requirements +on team creation operations. See section \ref{subsec:team} for more details. + The deprecated function \FUNC{shmem\_sync\_all} is provided for backward compatibility to synchronize all \acp{PE} in the computation. This should be replaced in applications by the equivalent \FUNC{shmem\_sync(SHMEM\_TEAM\_WORLD)}. From 5707d64dff2b490fdb75c2619c0e1205d10f2d52 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Thu, 7 Feb 2019 13:19:35 -0600 Subject: [PATCH 116/319] Remove no collectives team options --- content/collective_intro.tex | 4 +- content/library_constants.tex | 20 ---------- content/shmem_alltoall.tex | 5 +-- content/shmem_alltoalls.tex | 3 +- content/shmem_broadcast.tex | 7 ++-- content/shmem_collect.tex | 7 +--- content/shmem_reductions.tex | 6 +-- content/shmem_sync.tex | 5 +-- content/shmem_team_config_t.tex | 28 +------------- content/shmem_team_get_config.tex | 7 +--- content/shmem_teams_intro.tex | 63 ++++++++++--------------------- 11 files changed, 33 insertions(+), 122 deletions(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index 447a69ffa..de636c4c8 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -22,9 +22,7 @@ The team-based collective routines are performed with respect to a valid \openshmem team, which is specified by a team handle argument. Team-based collective operations require all \acp{PE} in the team to call -the routine in order for the operation to complete. Team-based collective routines -should not be passed team handles to teams created with a configuration -that disables support for collective operations. If such a team +the routine in order for the operation to complete. If an invalid team handle or \LibConstRef{SHMEM\_TEAM\_NULL} is passed to a team-based collective routine, the behavior is undefined. diff --git a/content/library_constants.tex b/content/library_constants.tex index e38f94622..5e38a9aa4 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -40,26 +40,6 @@ \tabularnewline \hline %% \color{Green} -\LibConstDecl{SHMEM\_TEAM\_NO\_COLLECTIVES} & -\color{Green} -The bitwise flag which specifies that a team creation routine should use the -\VAR{no\_collectives} member of the provided -\CTYPE{shmem\_team\_config\_t} configuration parameter as a requirement. -See Sections~\ref{subsec:shmem_team_config_t} and -\ref{subsec:shmem_team_split_strided} for more detail about its use. -\tabularnewline \hline -%% -\color{Green} -\LibConstDecl{SHMEM\_TEAM\_NO\_COLLECTIVES\_HINT} & -\color{Green} -The bitwise flag which specifies that a team creation routine should use the -\VAR{no\_collectives} member of the provided -\CTYPE{shmem\_team\_config\_t} configuration parameter as a hint. -See Sections~\ref{subsec:shmem_team_config_t} and -\ref{subsec:shmem_team_split_strided} for more detail about its use. -\tabularnewline \hline -%% -\color{Green} \LibConstDecl{SHMEM\_TEAM\_NUM\_CONTEXTS} & \color{Green} The bitwise flag which specifies that a team creation routine should use the diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index ce68d0f94..c3c3a500b 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -48,8 +48,7 @@ \Fortran, it must be a default integer value.} \newtext{% -\apiargument{IN}{team}{A valid \openshmem team handle to a team which has been - created without disabling support for collective operations.} +\apiargument{IN}{team}{A valid \openshmem team handle to a team.} } \begin{DeprecateBlock} @@ -104,8 +103,6 @@ {\color{Green} Team-based collect routines operate over all \acp{PE} in the provided team argument. All \acp{PE} in the provided team must participate in the collective. - If a team created without support for collectives is passed to this or any other - team collective routine, the behavior is undefined. Active-set-based collective routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. diff --git a/content/shmem_alltoalls.tex b/content/shmem_alltoalls.tex index 3ac07b4e8..779103ea8 100644 --- a/content/shmem_alltoalls.tex +++ b/content/shmem_alltoalls.tex @@ -56,8 +56,7 @@ default integer value.} \newtext{% -\apiargument{IN}{team}{A valid \openshmem team handle to a team which has been - created without disabling support for collective operations.} +\apiargument{IN}{team}{A valid \openshmem team handle.} } \begin{DeprecateBlock} diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 98c7a33ec..543fa29fd 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -91,10 +91,9 @@ passed by all \acp{PE} particpating in the collective operation. Team-based broadcast routines operate over all \acp{PE} in the provided team argument. All - \acp{PE} in the provided team must participate in the operation. If a team created without - support for collectives is passed to this or any other team collective routine, the - behavior is undefined. If an invalid team handle or \LibConstRef{SHMEM\_TEAM\_NULL} - is passed to this routine, the behavior is undefined. + \acp{PE} in the provided team must participate in the operation. + If an invalid team handle or \LibConstRef{SHMEM\_TEAM\_NULL} is passed to this routine, + the behavior is undefined. As with all team-based \openshmem routines, \ac{PE} numbering is relative to the team. The specified root \ac{PE} must be a valid \ac{PE} diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index fdebe6fd2..beb881135 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -60,8 +60,7 @@ a default integer value.} \newtext{% -\apiargument{IN}{team}{A valid \openshmem team handle to a team which has been - created without disabling support for collective operations.} +\apiargument{IN}{team}{A valid \openshmem team handle.} } \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of @@ -110,9 +109,7 @@ {\color{Green} Team-based collect routines operate over all \acp{PE} in the provided team argument. All - \acp{PE} in the provided team must participate in the operation. If a team created without - support for collective operations is passed to this or any other team collective routine, the - behavior is undefined. + \acp{PE} in the provided team must participate in the operation. Active-set-based collective routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index b1d65b73e..751f258d5 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -336,10 +336,8 @@ {\color{Green} Team-based reduction routines operate over all \acp{PE} in the provided team argument. All - \acp{PE} in the provided team must participate in the reduction. If a team created without - support for collectives is passed to this or any other team collective routine, the - behavior is undefined. If an invalid team handle or \LibConstRef{SHMEM\_TEAM\_NULL} - is passed to this routine, the behavior is undefined. + \acp{PE} in the provided team must participate in the reduction. If an invalid team handle + or \LibConstRef{SHMEM\_TEAM\_NULL} is passed to this routine, the behavior is undefined. Active-set-based sync routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index acd51db97..66c81c006 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -56,9 +56,8 @@ program, only the calling thread is blocked. Team-based sync routines operate over all \acp{PE} in the provided team argument. All - \acp{PE} in the provided team must participate in the sync operation. If a team created without - support for collectives is passed to this or any other team collective routine, the - behavior is undefined. If an invalid team handle or \LibConstRef{SHMEM\_TEAM\_NULL} + \acp{PE} in the provided team must participate in the sync operation. + If an invalid team handle or \LibConstRef{SHMEM\_TEAM\_NULL} is passed to this routine, the behavior is undefined. Active-set-based sync routines operate over all \acp{PE} in the active set diff --git a/content/shmem_team_config_t.tex b/content/shmem_team_config_t.tex index 7cd5bfded..f4cf7b438 100644 --- a/content/shmem_team_config_t.tex +++ b/content/shmem_team_config_t.tex @@ -6,7 +6,6 @@ \begin{Csynopsis} typedef struct { - int no_collectives; int num_contexts; } shmem_team_config_t; \end{Csynopsis} @@ -26,14 +25,6 @@ the actual configuration that the library was able to provide during team creation. - The \VAR{no\_collectives} member allows for teams to be created - without support for collective communications, which allows implementations - to reduce team creation overheads for those teams. - When its value is zero, it specifies that the team should have collective - operations enabled. - When nonzero, the team will not support collective operations, which - allows implementations to reduce team creation overheads. - The \VAR{num\_contexts} member specifies the total number of contexts created from this team that can simultaneously exist. These contexts may be created in any number of threads. A program @@ -56,16 +47,6 @@ masks with through a bitwise OR operation of the following library constants: { - \apitablerow{\LibConstRef{SHMEM\_TEAM\_NO\_COLLECTIVES}}{ - The team should be created using the value of the - \VAR{no\_collectives} member of the configuration parameter - \VAR{config} as a requirement. - } - \apitablerow{\LibConstRef{SHMEM\_TEAM\_NO\_COLLECTIVES\_HINT}}{ - The team should be created using the value of the - \VAR{no\_collectives} member of the configuration parameter - \VAR{config} as a hint. - } \apitablerow{\LibConstRef{SHMEM\_TEAM\_NUM\_CONTEXTS}}{ The team should be created using the value of the \VAR{num\_contexts} member of the configuration parameter @@ -86,9 +67,6 @@ parameters, as follows: { - \apitablerow{no\_collectives = \CONST{0}}{ - By default, teams support collective operations - } \apitablerow{num\_contexts = \CONST{0}}{ By default, no contexts can be created on a new team } @@ -97,11 +75,7 @@ } \apinotes{ - The library can set a team configuration to indicate that collective operations are - disabled without providing some optimized version of teams without support for - collective operations. Disabling collectives is a portable way to - provide optimizations on some implementations, not a promise that teams with - this option will reduce overheads on all implementations. + None. } \end{apidefinition} diff --git a/content/shmem_team_get_config.tex b/content/shmem_team_get_config.tex index 5a8e6f2e2..30827b7fb 100644 --- a/content/shmem_team_get_config.tex +++ b/content/shmem_team_get_config.tex @@ -30,12 +30,7 @@ } \apinotes{ -A use case for this function is to determine whether a given team can -support collective operations. -When teams are created without support for collective operations, they may still use -point to point operations to communicate and synchronize. So programmers may wish -to design frameworks with functions that provide alternative algorithms -for teams based on whether they do or do not support collectives. + None. } \end{apidefinition} diff --git a/content/shmem_teams_intro.tex b/content/shmem_teams_intro.tex index 3eae15b1b..b7d449411 100644 --- a/content/shmem_teams_intro.tex +++ b/content/shmem_teams_intro.tex @@ -29,8 +29,8 @@ the parent team when creating new \openshmem teams. Every \ac{PE} is a member of the default team, which may be referenced -through the team handle \LibHandleRef{SHMEM\_TEAM\_WORLD}, -and its number in the default team is equal to the +through the team handle \LibHandleRef{SHMEM\_TEAM\_WORLD}. +The \ac{PE} number in the default team is equal to the value of its \ac{PE} number as returned by \FUNC{shmem\_my\_pe}. A special team handle value, \LibConstRef{SHMEM\_TEAM\_NULL}, may be used to @@ -39,52 +39,27 @@ declared team handles as a sentinel value. Teams that are created by a \FUNC{shmem\_team\_split\_*} routine may be -provided a configuration argument that specifies options that may affect -a team's capabilities and may allow for optimized performance. +provided a configuration argument that specifies team creation options. This configuration argument is of type \CTYPE{shmem\_team\_config\_t}, which is detailed further in Section~\ref{subsec:shmem_team_config_t}. Team creation is a collective operation. As such, team creation in a multithreaded environment follows the same semantics as discussed in section -\ref{subsec:thread_support}. Like other collectives, team creation is an action -of the \ac{PE} as a whole, and it is up to the programmer to ensure that each -\ac{PE} has consistent and predictable ordering of team creation calls over -all of its threads. +\ref{subsec:coll}. That is, while \openshmem routines are thread-safe as +per threading model (see section \ref{subsec:thread_support}),\openshmem +teams objects are not themselves thread-safe. For team creation, this means +that the program must ensure that there are no simultaneous split operations +occuring on the same parent team on a given \ac{PE}. -In addition to ordering requirements on team creation, there may also be -synchronization requirements. -\begin{itemize} -\item There is a special case where all new teams resulting from a split operation are -created are configured without support for collective operations and indicating that \CONST{0} new contexts will be created from the team. -This means the team will \emph{not} be used -directly for point-to-point or collective communication routines, but only for -\ac{PE} number translation and further split operations. When creating new teams -of this type, no synchronization will be required around team creation operations. -\item In the case where a parent team uses multiple team split calls to create -multiple new teams which do not meet the above criteria for avoiding synchronization, -\emph{and} there is an overlap in team membership in the resulting new teams, all \acp{PE} -in the parent team must perform a barrier between team creation calls. -In other words, undefined behavior will result from allowing simultaneous execution of team -creation collective operations that split some team $T_0$ into multiple new -teams, $T_1, T_2...$, where some \ac{PE} $p$ exists such that $p \in T_1, p \in T_2, ...$. -By executing a barrier among the set of -all \acp{PE} in team $T_0$ between team creation operations, simultaneous execution -of split operations by those \acp{PE} is prevented. \ac{PE} $p$ will be ensured to -complete each split operation on $T_0$ before the next split operation on $T_0$ commences. -\item In the case where the above conditions hold to require a barrier on $T_0$, -but the parent team $T_0$ cannot be used for barriers due to having -been created with collective operations disabled, the program may use any -ancestor team, e.g. \LibHandleRef{SHMEM\_TEAM\_WORLD}, for synchronization around new -team creation. The program may alternatively use some other custom synchronization method -as long as it ensures that each split collective completes for all \acp{PE} -in team $T_0$ before the next split on $T_0$ commences. -\end{itemize} +Like other collectives, team creation is matched across PEs based +on ordering. So, team creation events must occur in the same order on all \acp{PE} +in the resulting child teams. Additionally, there must not be team creation +operations from the same parent team simultaneously occuring that involve +the same \acp{PE} in any resulting child teams. In practice, this means that when a parent team +is split multiple times, and the resulting child teams have overlapping membership, +the program must call the \FUNC{shmem\_team\_sync} routine on the parent team +between subsequent calls to split routines. -%% Teams may be created with options that change team behavior and may allow for -%% more optimized performance. These options are described in -%% Section~\ref{subsec:library_constants} and in the various descriptions of the -%% team split routines below. In particular, teams may be created with an option -%% to disable support for collective communications, which allows implementations -%% to reduce team creation overheads for those teams. In that case, the team is -%% just a local reindexing of some set of \acp{PE} that can be used for -%% point to point communications or as parent teams in subsequent split operations. +Upon completion of a team creation operation, the resulting child teams will be +immediately usable for any team-based operations, including creating new child teams, +without any intervening synchronization. From cb60b1ffb7e06c61a500ad966329f1e7e298a8aa Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 11 Feb 2019 12:16:05 -0600 Subject: [PATCH 117/319] Update error handling section to clarify behavior --- content/error_handling.tex | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/content/error_handling.tex b/content/error_handling.tex index d2bd93317..c3f0196b6 100644 --- a/content/error_handling.tex +++ b/content/error_handling.tex @@ -4,16 +4,10 @@ Because \openshmem defines asynchronous communication operations, errors may arise at any time as communications proceed. In these cases, the implementation might generate error messages or abort the application when errors occur. The \openshmem specification cannot define these types of errors, and leaves it to the implementation to determine how these types of errors should be handled. -Collective operations involving many \acp{PE} may return values indicating success while other \acp{PE} are still executing the collective operation. So, return values indicating success of a collective routine on one \ac{PE} do not by default indicate that all \acp{PE} involved in the collective operation will return from the routine successfully. If a collective routine provides any such global error checking behavior, it will be explicitly stated in the description of that routine. - -\openshmem implementations for production environments may do minimal internal error checking for performance reasons. Also, some errors may be undetectable or uncorrectable due to limitations in underlying technologies, resulting in undefined behaviors. For these reasons, \openshmem does not provide strict error checking guarantees in most cases. For \CorCpp routines returning integer error codes, implementations are guided, but not required, to do the following: - -\begin{itemize} -\item Return nonzero error codes and avoid application abort behaviors for resource allocation failure. -\item[] Examples of resource allocation failure are: context creation failure due to lack of network resources, team context creation failure due to lack of team resources, team creation failure due to lack of network or symmetric memory resources. -\item Return nonzero error codes when encountering errors that are likely transient and are not likely to result in the application or library becoming inconsistent. -\item Abort the application with an informative message if allowing it to continue would likely result in the application or library state becoming inconsistent. -\end{itemize} +Collective operations involving many \acp{PE} may return values indicating success while other \acp{PE} are still executing the collective operation. By default, return values indicating success of a collective routine on one \ac{PE} do not indicate that all \acp{PE} involved in the collective operation will return from the routine successfully. If a collective routine provides any such global error checking behavior, it will be explicitly stated in the description of that routine. +\openshmem implementations for production environments may do minimal internal error checking for performance reasons. Also, some errors may be undetectable or uncorrectable due to limitations in underlying technologies, resulting in undefined behaviors. For these reasons, \openshmem generally considers that when routine parameters are invalid or violate requirements specified in this document, behavior is undefined. Likewise, when ordering or synchronization of communication operations violates requirements specified in this document, behavior is undefined. +Applications are encouraged but not required to attempt to continue execution in the face of resource allocations errors, such as lack of network resources or memory resources. In these cases, if resource allocation fails inside a routine with an integer return code, applications should return some nonzero value, which may have implementation specific definition. If the routine has some other out parameter, such as pointer to a new memory allocation, routines may specify that the out parameter has some sentinel value to indicate failure to complete the operation. +If some routine specified in this document does not explicity state resulting error behavior when a program violates the routine assumptions and requirements, then the behavior is undefined, and could include continuing execution regardless, aborting the application with an informative message, returning sentinel values in outgoing parameters, launching a debugger, tweeting the failure information as emoji, sounding a rather loud siren, or any other behavior which the implementation might find desirable. From 0624f69ac37bf31ae7370cbc254aba15c975c3c6 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 11 Feb 2019 12:16:41 -0600 Subject: [PATCH 118/319] Add detail of behavior for RMA with invalid PE number --- content/rma_intro.tex | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/content/rma_intro.tex b/content/rma_intro.tex index af4a3dacc..7d2f9445a 100644 --- a/content/rma_intro.tex +++ b/content/rma_intro.tex @@ -16,6 +16,15 @@ routine, \GET{}, the origin \ac{PE} provides the \dest{} data object and the destination \ac{PE} provides the \source{} data object. +\color{Green} +The destination \ac{PE} is specified as an integer representing the \ac{PE} number. +This \ac{PE} number is relative to the \openshmem team associated with the +communication context being using for the operation. If no context is passed to the routine, +then the \ac{PE} number is relative to the default team, and is the global \ac{PE} +number. If the \ac{PE} number passed to the routine is invalid, being either negative +or greater than the size of the \openshmem team, routine behavior is undefined. +\color{Black} + Where appropriate compiler support is available, \openshmem provides type-generic one-sided communication interfaces via \Cstd[11] generic selection (\Cstd[11]~\S6.5.1.1\footnote{Formally, the \Cstd[11] specification is ISO/IEC 9899:2011(E).}) From 25f7485e40997147e279ee7bd6996440cb52e03f Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 20 Feb 2019 10:16:42 -0600 Subject: [PATCH 119/319] correct phrasing for pe number out of range --- content/rma_intro.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/rma_intro.tex b/content/rma_intro.tex index 7d2f9445a..479ca843f 100644 --- a/content/rma_intro.tex +++ b/content/rma_intro.tex @@ -21,8 +21,8 @@ This \ac{PE} number is relative to the \openshmem team associated with the communication context being using for the operation. If no context is passed to the routine, then the \ac{PE} number is relative to the default team, and is the global \ac{PE} -number. If the \ac{PE} number passed to the routine is invalid, being either negative -or greater than the size of the \openshmem team, routine behavior is undefined. +number. If the \ac{PE} number passed to the routine is invalid, being negative +or greater than or equal to the size of the \openshmem team, then routine behavior is undefined. \color{Black} Where appropriate compiler support is available, \openshmem provides type-generic From d7e08ae4aca3793bfa944cc31b5f31f2fbf29067 Mon Sep 17 00:00:00 2001 From: "David M. Ozog" Date: Wed, 20 Feb 2019 16:42:59 -0500 Subject: [PATCH 120/319] Add an updated shmem_sync example using teams API Signed-off-by: David M. Ozog --- example_code/shmem_sync_example.c | 43 ++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/example_code/shmem_sync_example.c b/example_code/shmem_sync_example.c index 8c447beee..3a2d65350 100644 --- a/example_code/shmem_sync_example.c +++ b/example_code/shmem_sync_example.c @@ -4,25 +4,48 @@ int main(void) { static int x = 10101; - static long pSync[SHMEM_BARRIER_SYNC_SIZE]; + shmem_team_config_t conf; + shmem_team_t twos_team, threes_team; shmem_init(); int me = shmem_my_pe(); int npes = shmem_n_pes(); - for (int i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i++) - pSync[i] = SHMEM_SYNC_VALUE; + int odd_npes = npes % 2; - shmem_sync_all(); + shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, npes / 2, &conf, 0, &twos_team); + shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 3, npes / 3 + odd_npes, &conf, 0, &threes_team); - if (me % 2 == 0) { - /* put to next even PE in a circular fashion */ - shmem_p(&x, 4, (me + 2) % npes); - /* synchronize all even pes */ + /* The teams overlap, so synchronize on the parent team */ + shmem_sync(SHMEM_TEAM_WORLD); + + int my_pe_twos = shmem_team_my_pe(twos_team); + int my_pe_threes = shmem_team_my_pe(threes_team); + + if (my_pe_twos != SHMEM_TEAM_NULL) { + /* put the value 2 to the next team member in a circular fashion */ + shmem_p(&x, 2, (my_pe_twos + 2) % npes); + shmem_quiet(); + shmem_sync(twos_team); + } + + if (my_pe_threes != SHMEM_TEAM_NULL) { + /* put the value 3 to the next team member in a circular fashion */ + shmem_p(&x, 3, (my_pe_threes + 3) % npes); shmem_quiet(); - shmem_sync(0, 1, (npes / 2 + npes % 2), pSync); + shmem_sync(threes_team); } - printf("%d: x = %d\n", me, x); + + if (me % 3 == 0 && x != 3) { + shmem_global_exit(3); + } + else if (me % 2 == 0 && x != 2) { + shmem_global_exit(2); + } + else if (x != 10101) { + shmem_global_exit(1); + } + shmem_finalize(); return 0; } From 01ba772c3966f5d9484e28b0cb0635d0253b0252 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Thu, 21 Feb 2019 10:56:10 -0600 Subject: [PATCH 121/319] Add split strided and translate team example --- example_code/shmem_team_split_strided.c | 35 +++++++++++++++++++++++++ example_code/shmem_team_translate.c | 35 +++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 example_code/shmem_team_split_strided.c create mode 100644 example_code/shmem_team_translate.c diff --git a/example_code/shmem_team_split_strided.c b/example_code/shmem_team_split_strided.c new file mode 100644 index 000000000..dfb0c8137 --- /dev/null +++ b/example_code/shmem_team_split_strided.c @@ -0,0 +1,35 @@ +/* + * OpenSHMEM shmem_team_split_strided example to create a team of all even + * ranked PEs from SHMEM_TEAM_WORLD + */ + +#include +#include + +int main(int argc, char *argv[]) +{ + int rank, npes; + int t_pe, t_size; + shmem_team_t new_team; + shmem_team_config_t *config; + + shmem_init(); + config = NULL; + rank = shmem_my_pe(); + npes = shmem_n_pes(); + + shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, npes / 2, config, 0, + &new_team); + + if (new_team != SHMEM_TEAM_NULL) { + t_size = shmem_team_n_pes(new_team); + t_pe = shmem_team_my_pe(new_team); + + if ((rank % 2 != 0) || (rank / 2 != t_pe) || (npes / 2 != t_size)) { + shmem_global_exit(1); + } + } + + shmem_finalize(); + return 0; +} diff --git a/example_code/shmem_team_translate.c b/example_code/shmem_team_translate.c new file mode 100644 index 000000000..bde9c8e4f --- /dev/null +++ b/example_code/shmem_team_translate.c @@ -0,0 +1,35 @@ +/* + * OpenSHMEM shmem_team_translate example to verify the team formed by even + * ranked PEs from SHMEM_TEAM_WORLD using the team created from + * shmem_team_split_stride operation + */ + +#include +#include + +int main(int argc, char *argv[]) +{ + int rank, npes; + int t_pe, t_size; + shmem_team_t new_team; + shmem_team_config_t *config; + + shmem_init(); + config = NULL; + rank = shmem_my_pe(); + + shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, npes / 2, config, 0, + &new_team); + + if (new_team != SHMEM_TEAM_NULL) { + t_pe = shmem_team_my_pe(new_team); + t_global = shmem_team_translate(new_team, t_pe, SHMEM_TEAM_WORLD); + + if (t_global != rank) { + shmem_global_exit(1); + } + } + + shmem_finalize(); + return 0; +} From 56166360223fac2db1dc8a6c954f84027e851280 Mon Sep 17 00:00:00 2001 From: "David M. Ozog" Date: Thu, 21 Feb 2019 13:00:59 -0500 Subject: [PATCH 122/319] Fix bugs in shmem_sync example, improve formatting Signed-off-by: David M. Ozog --- example_code/shmem_sync_example.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/example_code/shmem_sync_example.c b/example_code/shmem_sync_example.c index 3a2d65350..27c3d6f47 100644 --- a/example_code/shmem_sync_example.c +++ b/example_code/shmem_sync_example.c @@ -4,34 +4,39 @@ int main(void) { static int x = 10101; - shmem_team_config_t conf; - shmem_team_t twos_team, threes_team; + + shmem_team_t twos_team, threes_team; + shmem_team_config_t *config; shmem_init(); + config = NULL; int me = shmem_my_pe(); int npes = shmem_n_pes(); int odd_npes = npes % 2; - shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, npes / 2, &conf, 0, &twos_team); - shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 3, npes / 3 + odd_npes, &conf, 0, &threes_team); + shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, npes / 2, config, 0, + &twos_team); /* The teams overlap, so synchronize on the parent team */ shmem_sync(SHMEM_TEAM_WORLD); + shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 3, npes / 3 + odd_npes, + config, 0, &threes_team); + int my_pe_twos = shmem_team_my_pe(twos_team); int my_pe_threes = shmem_team_my_pe(threes_team); if (my_pe_twos != SHMEM_TEAM_NULL) { /* put the value 2 to the next team member in a circular fashion */ - shmem_p(&x, 2, (my_pe_twos + 2) % npes); + shmem_p(&x, 2, (me + 2) % npes); shmem_quiet(); shmem_sync(twos_team); } if (my_pe_threes != SHMEM_TEAM_NULL) { /* put the value 3 to the next team member in a circular fashion */ - shmem_p(&x, 3, (my_pe_threes + 3) % npes); + shmem_p(&x, 3, (me + 3) % npes); shmem_quiet(); shmem_sync(threes_team); } From d935d3fcef52c99ca774b83879fbfb289201c2d3 Mon Sep 17 00:00:00 2001 From: "David M. Ozog" Date: Thu, 21 Feb 2019 13:12:18 -0500 Subject: [PATCH 123/319] Clarify shmem_sync comment, improve formatting Signed-off-by: David M. Ozog --- example_code/shmem_sync_example.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/example_code/shmem_sync_example.c b/example_code/shmem_sync_example.c index 27c3d6f47..e6fb53e3e 100644 --- a/example_code/shmem_sync_example.c +++ b/example_code/shmem_sync_example.c @@ -9,8 +9,8 @@ int main(void) shmem_team_config_t *config; shmem_init(); - config = NULL; - int me = shmem_my_pe(); + config = NULL; + int me = shmem_my_pe(); int npes = shmem_n_pes(); int odd_npes = npes % 2; @@ -18,7 +18,8 @@ int main(void) shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, npes / 2, config, 0, &twos_team); - /* The teams overlap, so synchronize on the parent team */ + /* The "threes" team below overlaps with the "twos" team, so + * synchronize on the parent team */ shmem_sync(SHMEM_TEAM_WORLD); shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 3, npes / 3 + odd_npes, From b84b099ac064213305a5d86c5d310ad76e5112a7 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Thu, 21 Feb 2019 12:38:15 -0600 Subject: [PATCH 124/319] RM unused var from team translate example --- example_code/shmem_team_translate.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/example_code/shmem_team_translate.c b/example_code/shmem_team_translate.c index bde9c8e4f..37472f0d3 100644 --- a/example_code/shmem_team_translate.c +++ b/example_code/shmem_team_translate.c @@ -9,8 +9,9 @@ int main(int argc, char *argv[]) { - int rank, npes; - int t_pe, t_size; + int rank; + int t_pe; + int t_global; shmem_team_t new_team; shmem_team_config_t *config; From 8bc966019cfb8b51069392fb27ae6f0153e2112b Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Thu, 21 Feb 2019 12:39:52 -0600 Subject: [PATCH 125/319] Indent team translate example --- example_code/shmem_team_translate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example_code/shmem_team_translate.c b/example_code/shmem_team_translate.c index 37472f0d3..d1fb796fd 100644 --- a/example_code/shmem_team_translate.c +++ b/example_code/shmem_team_translate.c @@ -11,7 +11,7 @@ int main(int argc, char *argv[]) { int rank; int t_pe; - int t_global; + int t_global; shmem_team_t new_team; shmem_team_config_t *config; From aadf84e07e26311723c142371201fd21ebde521b Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 4 Mar 2019 18:45:24 -0600 Subject: [PATCH 126/319] RM unnecessary log file --- content/shmem_atomic_compare_swap_nbi.log | 2286 --------------------- 1 file changed, 2286 deletions(-) delete mode 100644 content/shmem_atomic_compare_swap_nbi.log diff --git a/content/shmem_atomic_compare_swap_nbi.log b/content/shmem_atomic_compare_swap_nbi.log deleted file mode 100644 index 1ac90f425..000000000 --- a/content/shmem_atomic_compare_swap_nbi.log +++ /dev/null @@ -1,2286 +0,0 @@ -This is pdfTeX, Version 3.14159265-2.6-1.40.16 (MiKTeX 2.9) (preloaded format=pdflatex 2016.4.29) 6 DEC 2018 11:36 -entering extended mode -**C:/Users/nravi/Documents/documents/wrk/src/std/nravi/content/shmem_atomic_com -pare_swap_nbi.tex - -(C:/Users/nravi/Documents/documents/wrk/src/std/nravi/content/shmem_atomic_comp -are_swap_nbi.tex -LaTeX2e <2015/10/01> patch level 2 -Babel <3.9n> and hyphenation patterns for 69 languages loaded. -! Undefined control sequence. -l.1 \color - {Green} -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - - -! LaTeX Error: Missing \begin{document}. - -See the LaTeX manual or LaTeX Companion for explanation. -Type H for immediate help. - ... - -l.1 \color{G - reen} -You're in trouble here. Try typing to proceed. -If that doesn't work, type X to quit. - -Missing character: There is no G in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no n in font nullfont! -! Undefined control sequence. -l.2 \apisummary - { -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no T in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no k in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no g in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no w in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no j in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no . in font nullfont! - -Overfull \hbox (20.0pt too wide) in paragraph at lines 1--6 -[] - [] - - -! LaTeX Error: Environment apidefinition undefined. - -See the LaTeX manual or LaTeX Companion for explanation. -Type H for immediate help. - ... - -l.7 \begin{apidefinition} - -Your command was ignored. -Type I to replace it with another command, -or to continue without it. - - -! LaTeX Error: Environment C11synopsis undefined. - -See the LaTeX manual or LaTeX Companion for explanation. -Type H for immediate help. - ... - -l.9 \begin{C11synopsis} - -Your command was ignored. -Type I to replace it with another command, -or to continue without it. - - -! LaTeX Error: Missing \begin{document}. - -See the LaTeX manual or LaTeX Companion for explanation. -Type H for immediate help. - ... - -l.10 v - oid @\FuncDecl{shmem\_atomic\_compare\_swap\_nbi}@(TYPE *fetch, TYPE *... - -You're in trouble here. Try typing to proceed. -If that doesn't work, type X to quit. - -Missing character: There is no v in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no @ in font nullfont! -! Undefined control sequence. -l.10 void @\FuncDecl - {shmem\_atomic\_compare\_swap\_nbi}@(TYPE *fetch, TYPE *... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no s in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no w in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no @ in font nullfont! -Missing character: There is no ( in font nullfont! -Missing character: There is no T in font nullfont! -Missing character: There is no Y in font nullfont! -Missing character: There is no P in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no * in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no , in font nullfont! -Missing character: There is no T in font nullfont! -Missing character: There is no Y in font nullfont! -Missing character: There is no P in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no * in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no , in font nullfont! -Missing character: There is no T in font nullfont! -Missing character: There is no Y in font nullfont! -Missing character: There is no P in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no , in font nullfont! -Missing character: There is no T in font nullfont! -Missing character: There is no Y in font nullfont! -Missing character: There is no P in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no v in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no , in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no ) in font nullfont! -Missing character: There is no ; in font nullfont! -Missing character: There is no v in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no @ in font nullfont! -! Undefined control sequence. -l.11 void @\FuncDecl - {shmem\_atomic\_compare\_swap\_nbi}@(shmem_ctx_t ctx, TY... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no s in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no w in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no @ in font nullfont! -Missing character: There is no ( in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -! Missing $ inserted. - - $ -l.11 ...shmem\_atomic\_compare\_swap\_nbi}@(shmem_ - ctx_t ctx, TYPE *fetch, TY... -I've inserted a begin-math/end-math symbol since I think -you left one out. Proceed, with fingers crossed. - -LaTeX Font Info: External font `cmex10' loaded for size -(Font) <7> on input line 11. -LaTeX Font Info: External font `cmex10' loaded for size -(Font) <5> on input line 11. - -! LaTeX Error: \begin{document} ended by \end{C11synopsis}. - -See the LaTeX manual or LaTeX Companion for explanation. -Type H for immediate help. - ... - -l.12 \end{C11synopsis} - -Your command was ignored. -Type I to replace it with another command, -or to continue without it. - -! Missing $ inserted. - - $ -l.12 \end{C11synopsis} - -I've inserted something that you may have forgotten. -(See the above.) -With luck, this will get me unwedged. But if you -really didn't forget anything, try typing `2' now; then -my insertion and my current dilemma will both disappear. - -Missing character: There is no w in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -! Undefined control sequence. -l.13 where \TYPE - {} is one of the standard \ac{AMO} types specified by -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no i in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no d in font nullfont! -! Undefined control sequence. -l.13 where \TYPE{} is one of the standard \ac - {AMO} types specified by -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no A in font nullfont! -Missing character: There is no M in font nullfont! -Missing character: There is no O in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no y in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no y in font nullfont! -Missing character: There is no T in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no e in font nullfont! -! Undefined control sequence. - ...ference `stdamotypes' on page \thepage - \space undefined\on@line . -l.14 Table~\ref{stdamotypes} - . -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - - -LaTeX Warning: Reference `stdamotypes' on page undefined on input line 14. - -Missing character: There is no . in font nullfont! - -Overfull \hbox (20.0pt too wide) in paragraph at lines 10--15 -[] - [] - - -Overfull \hbox (71.04713pt too wide) in paragraph at lines 10--15 -[][][][]$[]\OML/cmm/m/it/10 tx[]ctx; TYPE \OMS/cmsy/m/n/10  - [] - - -Overfull \hbox (64.801pt too wide) in paragraph at lines 10--15 -\OML/cmm/m/it/10 fetch; TYPE \OMS/cmsy/m/n/10  - [] - - -Overfull \hbox (167.39667pt too wide) in paragraph at lines 10--15 -\OML/cmm/m/it/10 dest; TYPEcond; TYPEvalue; intpe\OT1/cmr/m/n/10 );$ - [] - - -Overfull \hbox (10.86105pt too wide) in paragraph at lines 10--15 -[] - [] - - -! LaTeX Error: Environment Csynopsis undefined. - -See the LaTeX manual or LaTeX Companion for explanation. -Type H for immediate help. - ... - -l.16 \begin{Csynopsis} - -Your command was ignored. -Type I to replace it with another command, -or to continue without it. - - -! LaTeX Error: Missing \begin{document}. - -See the LaTeX manual or LaTeX Companion for explanation. -Type H for immediate help. - ... - -l.17 v - oid @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_compare\_swap\_nbi... - -You're in trouble here. Try typing to proceed. -If that doesn't work, type X to quit. - -Missing character: There is no v in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no @ in font nullfont! -! Undefined control sequence. -l.17 void @\FuncDecl - {shmem\_\FuncParam{TYPENAME}\_atomic\_compare\_swap\_nbi... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no s in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -! Undefined control sequence. -l.17 void @\FuncDecl{shmem\_\FuncParam - {TYPENAME}\_atomic\_compare\_swap\_nbi... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no T in font nullfont! -Missing character: There is no Y in font nullfont! -Missing character: There is no P in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no N in font nullfont! -Missing character: There is no A in font nullfont! -Missing character: There is no M in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no w in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no @ in font nullfont! -Missing character: There is no ( in font nullfont! -Missing character: There is no T in font nullfont! -Missing character: There is no Y in font nullfont! -Missing character: There is no P in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no * in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no , in font nullfont! -Missing character: There is no T in font nullfont! -Missing character: There is no Y in font nullfont! -Missing character: There is no P in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no * in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no , in font nullfont! -Missing character: There is no T in font nullfont! -Missing character: There is no Y in font nullfont! -Missing character: There is no P in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no , in font nullfont! -Missing character: There is no T in font nullfont! -Missing character: There is no Y in font nullfont! -Missing character: There is no P in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no v in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no , in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no ) in font nullfont! -Missing character: There is no ; in font nullfont! -Missing character: There is no v in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no @ in font nullfont! -! Undefined control sequence. -l.18 void @\FuncDecl - {shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_compare\_swap... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no s in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no x in font nullfont! -! Undefined control sequence. -l.18 void @\FuncDecl{shmem\_ctx\_\FuncParam - {TYPENAME}\_atomic\_compare\_swap... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no T in font nullfont! -Missing character: There is no Y in font nullfont! -Missing character: There is no P in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no N in font nullfont! -Missing character: There is no A in font nullfont! -Missing character: There is no M in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no w in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no @ in font nullfont! -Missing character: There is no ( in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -! Missing $ inserted. - - $ -l.18 ...NAME}\_atomic\_compare\_swap\_nbi}@(shmem_ - ctx_t ctx, TYPE *fetch, TY... -I've inserted a begin-math/end-math symbol since I think -you left one out. Proceed, with fingers crossed. - - -! LaTeX Error: \begin{document} ended by \end{Csynopsis}. - -See the LaTeX manual or LaTeX Companion for explanation. -Type H for immediate help. - ... - -l.19 \end{Csynopsis} - -Your command was ignored. -Type I to replace it with another command, -or to continue without it. - -! Missing $ inserted. - - $ -l.19 \end{Csynopsis} - -I've inserted something that you may have forgotten. -(See the above.) -With luck, this will get me unwedged. But if you -really didn't forget anything, try typing `2' now; then -my insertion and my current dilemma will both disappear. - -Missing character: There is no w in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -! Undefined control sequence. -l.20 where \TYPE - {} is one of the standard \ac{AMO} types and has a correspon... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no i in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no d in font nullfont! -! Undefined control sequence. -l.20 where \TYPE{} is one of the standard \ac - {AMO} types and has a correspon... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no A in font nullfont! -Missing character: There is no M in font nullfont! -Missing character: There is no O in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no y in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no g in font nullfont! -! Undefined control sequence. -l.21 \TYPENAME - {} specified by Table~\ref{stdamotypes}. -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no s in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no y in font nullfont! -Missing character: There is no T in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no e in font nullfont! -! Undefined control sequence. - ...ference `stdamotypes' on page \thepage - \space undefined\on@line . -l.21 ...AME{} specified by Table~\ref{stdamotypes} - . -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - - -LaTeX Warning: Reference `stdamotypes' on page undefined on input line 21. - -Missing character: There is no . in font nullfont! - -Overfull \hbox (20.0pt too wide) in paragraph at lines 17--22 -[] - [] - - -Overfull \hbox (71.04713pt too wide) in paragraph at lines 17--22 -[][][][][][]$[]\OML/cmm/m/it/10 tx[]ctx; TYPE \OMS/cmsy/m/n/10  - [] - - -Overfull \hbox (64.801pt too wide) in paragraph at lines 17--22 -\OML/cmm/m/it/10 fetch; TYPE \OMS/cmsy/m/n/10  - [] - - -Overfull \hbox (167.39667pt too wide) in paragraph at lines 17--22 -\OML/cmm/m/it/10 dest; TYPEcond; TYPEvalue; intpe\OT1/cmr/m/n/10 );$ - [] - - -Overfull \hbox (10.86105pt too wide) in paragraph at lines 17--22 -[] - [] - - -! LaTeX Error: Environment apiarguments undefined. - -See the LaTeX manual or LaTeX Companion for explanation. -Type H for immediate help. - ... - -l.23 \begin{apiarguments} - -Your command was ignored. -Type I to replace it with another command, -or to continue without it. - -! Undefined control sequence. -l.24 \apiargument - {IN}{ctx}{A context handle specifying the context on wh... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - - -! LaTeX Error: Missing \begin{document}. - -See the LaTeX manual or LaTeX Companion for explanation. -Type H for immediate help. - ... - -l.24 \apiargument{I - N}{ctx}{A context handle specifying the context on wh... - -You're in trouble here. Try typing to proceed. -If that doesn't work, type X to quit. - -Missing character: There is no I in font nullfont! -Missing character: There is no N in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no x in font nullfont! -Missing character: There is no A in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no x in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no y in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no g in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no x in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no w in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no . in font nullfont! -Missing character: There is no W in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no g in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no v in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no , in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no x in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no . in font nullfont! -! Undefined control sequence. -l.27 \apiargument - {OUT}{fetch}{Local data object to be updated.} -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no O in font nullfont! -Missing character: There is no U in font nullfont! -Missing character: There is no T in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no L in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no j in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no . in font nullfont! -! Undefined control sequence. -l.28 \apiargument - {OUT}{dest}{The remotely accessible data object to be u... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no O in font nullfont! -Missing character: There is no U in font nullfont! -Missing character: There is no T in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no T in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no y in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no j in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -! Undefined control sequence. -l.29 the remote \ac - {PE}. } -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no P in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no . in font nullfont! -! Undefined control sequence. -l.30 \apiargument - {IN}{cond}{\VAR{cond} is compared to the remote \VAR{dest} -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no I in font nullfont! -Missing character: There is no N in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -! Undefined control sequence. -l.30 \apiargument{IN}{cond}{\VAR - {cond} is compared to the remote \VAR{dest} -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -! Undefined control sequence. -l.30 ...{\VAR{cond} is compared to the remote \VAR - {dest} -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no d in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no v in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no . in font nullfont! -Missing character: There is no I in font nullfont! -Missing character: There is no f in font nullfont! -! Undefined control sequence. -l.31 value. If \VAR - {cond} and the remote \VAR{dest} are equal, then ... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -! Undefined control sequence. -l.31 ... value. If \VAR{cond} and the remote \VAR - {dest} are equal, then \VA... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no d in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no q in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no , in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no n in font nullfont! -! Undefined control sequence. -l.31 ...the remote \VAR{dest} are equal, then \VAR - {value} -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no v in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no w in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -! Undefined control sequence. -l.32 is swapped into the remote \VAR - {dest}; otherwise, the remote \V... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no d in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no ; in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no w in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no , in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -! Undefined control sequence. -l.32 ...ote \VAR{dest}; otherwise, the remote \VAR - {dest} is -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no d in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no g in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no . in font nullfont! -! Undefined control sequence. -l.33 unchanged. \VAR - {cond} must be of the same data type as \VAR{des... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no y in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no s in font nullfont! -! Undefined control sequence. -l.33 ...ond} must be of the same data type as \VAR - {dest}.} -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no d in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no . in font nullfont! -! Undefined control sequence. -l.34 \apiargument - {IN}{value}{The value to be atomically written to the r... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no I in font nullfont! -Missing character: There is no N in font nullfont! -Missing character: There is no v in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no T in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no v in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no y in font nullfont! -Missing character: There is no w in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -! Undefined control sequence. -l.35 \ac - {PE}. \VAR{value} must be the same data type as \VAR{dest}.} -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no P in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no . in font nullfont! -! Undefined control sequence. -l.35 \ac{PE}. \VAR - {value} must be the same data type as \VAR{dest}.} -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no v in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no y in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no s in font nullfont! -! Undefined control sequence. -l.35 ...{value} must be the same data type as \VAR - {dest}.} -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no d in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no . in font nullfont! -! Undefined control sequence. -l.36 \apiargument - {IN}{pe}{An integer that indicates the \ac{PE} number u... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no I in font nullfont! -Missing character: There is no N in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no A in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no g in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -! Undefined control sequence. -l.36 ...{IN}{pe}{An integer that indicates the \ac - {PE} number upon which -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no P in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no w in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no h in font nullfont! -! Undefined control sequence. -l.37 \VAR - {dest} is to be updated.} -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no d in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no . in font nullfont! - -! LaTeX Error: \begin{document} ended by \end{apiarguments}. - -See the LaTeX manual or LaTeX Companion for explanation. -Type H for immediate help. - ... - -l.38 \end{apiarguments} - -Your command was ignored. -Type I to replace it with another command, -or to continue without it. - - -Overfull \hbox (20.0pt too wide) in paragraph at lines 24--39 -[] - [] - -! Undefined control sequence. -l.40 \apidescription - { -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - - -! LaTeX Error: Missing \begin{document}. - -See the LaTeX manual or LaTeX Companion for explanation. -Type H for immediate help. - ... - -l.41 T - he nonblocking conditional swap routines conditionally update \VAR... - -You're in trouble here. Try typing to proceed. -If that doesn't work, type X to quit. - -Missing character: There is no T in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no k in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no g in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no w in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no y in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -! Undefined control sequence. -l.41 ...al swap routines conditionally update \VAR - {dest} -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no d in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no j in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -! Undefined control sequence. -l.42 data object on the specified \ac - {PE} and fetches the prior contents... -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no P in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no j in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -! Undefined control sequence. -l.43 data object into the \VAR - {fetch} local data object as a -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no f in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no j in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no g in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no . in font nullfont! -Missing character: There is no T in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no g in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no . in font nullfont! -Missing character: There is no T in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no q in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -! Undefined control sequence. -l.46 \FUNC - {shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, prior -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no s in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no q in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no . in font nullfont! -Missing character: There is no A in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no f in font nullfont! -! Undefined control sequence. -l.46 ...{shmem\_quiet}. At the completion of \FUNC - {shmem\_quiet}, prior -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no s in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no q in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no , in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -! Undefined control sequence. -l.47 contents of the \VAR - {dest} data object is atomically fetched into -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no d in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no j in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no y in font nullfont! -Missing character: There is no f in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -! Undefined control sequence. -l.48 \VAR - {fetch} local data object and the contents of \VAR{value} -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no f in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no b in font nullfont! -Missing character: There is no j in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no f in font nullfont! -! Undefined control sequence. -l.48 ...local data object and the contents of \VAR - {value} -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no v in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no y in font nullfont! -Missing character: There is no u in font nullfont! -Missing character: There is no p in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no d in font nullfont! -Missing character: There is no i in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -! Undefined control sequence. -l.49 is conditionally updated into \VAR - {dest} on to the remote \ac{PE}. -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no d in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no s in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no h in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no r in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no m in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no t in font nullfont! -Missing character: There is no e in font nullfont! -! Undefined control sequence. -l.49 ...dated into \VAR{dest} on to the remote \ac - {PE}. -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - -Missing character: There is no P in font nullfont! -Missing character: There is no E in font nullfont! -Missing character: There is no . in font nullfont! - -Overfull \hbox (20.0pt too wide) in paragraph at lines 41--51 -[] - [] - -! Undefined control sequence. -l.52 \apireturnvalues - { -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - - -! LaTeX Error: Missing \begin{document}. - -See the LaTeX manual or LaTeX Companion for explanation. -Type H for immediate help. - ... - -l.53 N - one. -You're in trouble here. Try typing to proceed. -If that doesn't work, type X to quit. - -Missing character: There is no N in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no . in font nullfont! - -Overfull \hbox (20.0pt too wide) in paragraph at lines 53--55 -[] - [] - -! Undefined control sequence. -l.56 \apinotes - { -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - - -! LaTeX Error: Missing \begin{document}. - -See the LaTeX manual or LaTeX Companion for explanation. -Type H for immediate help. - ... - -l.57 N - one. -You're in trouble here. Try typing to proceed. -If that doesn't work, type X to quit. - -Missing character: There is no N in font nullfont! -Missing character: There is no o in font nullfont! -Missing character: There is no n in font nullfont! -Missing character: There is no e in font nullfont! -Missing character: There is no . in font nullfont! - -Overfull \hbox (20.0pt too wide) in paragraph at lines 57--59 -[] - [] - - -! LaTeX Error: \begin{document} ended by \end{apidefinition}. - -See the LaTeX manual or LaTeX Companion for explanation. -Type H for immediate help. - ... - -l.60 \end{apidefinition} - -Your command was ignored. -Type I to replace it with another command, -or to continue without it. - -! Undefined control sequence. -l.61 \color - {Black} -The control sequence at the end of the top line -of your error message was never \def'ed. If you have -misspelled it (e.g., `\hobx'), type `I' and the correct -spelling (e.g., `I\hbox'). Otherwise just continue, -and I'll forget about whatever was undefined. - - -! LaTeX Error: Missing \begin{document}. - -See the LaTeX manual or LaTeX Companion for explanation. -Type H for immediate help. - ... - -l.61 \color{B - lack} -You're in trouble here. Try typing to proceed. -If that doesn't work, type X to quit. - -Missing character: There is no B in font nullfont! -Missing character: There is no l in font nullfont! -Missing character: There is no a in font nullfont! -Missing character: There is no c in font nullfont! -Missing character: There is no k in font nullfont! -) -! Emergency stop. -<*> ...i/content/shmem_atomic_compare_swap_nbi.tex - -*** (job aborted, no legal \end found) - - -Here is how much of TeX's memory you used: - 22 strings out of 493634 - 624 string characters out of 3135239 - 53604 words of memory out of 3000000 - 3505 multiletter control sequences out of 15000+200000 - 3948 words of font info for 15 fonts, out of 3000000 for 9000 - 1025 hyphenation exceptions out of 8191 - 18i,2n,14p,265b,95s stack positions out of 5000i,500n,10000p,200000b,50000s -! ==> Fatal error occurred, no output PDF file produced! From 790d76295966de5253e2dfd1675d96d63f8e2460 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 11 Mar 2019 07:03:08 -0500 Subject: [PATCH 127/319] Update NBI atomic fetch texts --- content/shmem_atomic_fetch_nbi.tex | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/content/shmem_atomic_fetch_nbi.tex b/content/shmem_atomic_fetch_nbi.tex index 98e5f0651..e5dad654a 100644 --- a/content/shmem_atomic_fetch_nbi.tex +++ b/content/shmem_atomic_fetch_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - The nonblocking atomic fetch routine provides an option for atomically + The nonblocking atomic fetch routine provides a method for atomically fetching the value of a remote data object. } @@ -23,22 +23,23 @@ \begin{apiarguments} \apiargument{IN}{ctx}{A context handle specifying the context on which to - perform the operation. When this argument is not provided, the - operation is performed on the default context.} + perform the operation. When this argument is not provided, the operation is + performed on the default context.} \apiargument{OUT}{fetch}{Local data object to be updated.} - \apiargument{IN}{source}{The remotely accessible data object to be fetched from - the remote \ac{PE}.} + \apiargument{IN}{source}{The remotely accessible data object to be fetched + from the remote \ac{PE}.} \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number from which \VAR{source} is to be fetched.} \end{apiarguments} \apidescription{ - \FUNC{shmem\_atomic\_fetch\_nbi} performs a nonblocking atomic fetch - operation. This routine returns after posting the operation. The operation - is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At - the completion of \FUNC{shmem\_quiet}, contents of the \source{} data object - from \ac{PE} is atomically fetched into \VAR{fetch} local data object. + \FUNC{shmem\_atomic\_fetch\_nbi} performs a nonblocking fetching of a value + atomically from a remote data object. This routine returns after posting the + operation. The operation is considered complete after a subsequent call to + \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, contents of + the \source{} data object from \ac{PE} is atomically fetched into + \VAR{fetch} local data object. } \apireturnvalues{ From d6e5dcf5b0381c5a5b3568280c78b6f86ab02be0 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 11 Mar 2019 07:03:38 -0500 Subject: [PATCH 128/319] Update NBI atomic compare swap texts --- content/shmem_atomic_compare_swap_nbi.tex | 48 +++++++++++------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/content/shmem_atomic_compare_swap_nbi.tex b/content/shmem_atomic_compare_swap_nbi.tex index c7569429d..dbca8b7c8 100644 --- a/content/shmem_atomic_compare_swap_nbi.tex +++ b/content/shmem_atomic_compare_swap_nbi.tex @@ -1,7 +1,7 @@ \color{Green} \apisummary{ - This nonblocking routine performs an atomic conditional swap on a remote - data object. + The nonblocking atomic routine provides a method for performing an atomic + conditional swap on a remote data object. } \begin{apidefinition} @@ -21,32 +21,32 @@ \TYPENAME{} specified by Table~\ref{stdamotypes}. \begin{apiarguments} - \apiargument{IN}{ctx}{A context handle specifying the context on which to - perform the operation. When this argument is not provided, the - operation is performed on the default context.} - \apiargument{OUT}{fetch}{Local data object to be updated.} - \apiargument{OUT}{dest}{The remotely accessible data object to be updated on - the remote \ac{PE}. } - \apiargument{IN}{cond}{\VAR{cond} is compared to the remote \VAR{dest} - value. If \VAR{cond} and the remote \VAR{dest} are equal, then \VAR{value} - is swapped into the remote \VAR{dest}; otherwise, the remote \VAR{dest} is - unchanged.} - \apiargument{IN}{value}{The value to be atomically written to the remote - \ac{PE}. } - \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number upon which - \VAR{dest} is to be updated.} + \apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the operation. When this argument is not provided, the operation + is performed on the default context.} + \apiargument{OUT}{fetch}{Local data object to be updated.} + \apiargument{OUT}{dest}{The remotely accessible data object to be updated on + the remote \ac{PE}. } + \apiargument{IN}{cond}{\VAR{cond} is compared to the remote \VAR{dest} + value. If \VAR{cond} and the remote \VAR{dest} are equal, then \VAR{value} + is swapped into the remote \VAR{dest}; otherwise, the remote \VAR{dest} is + unchanged.} + \apiargument{IN}{value}{The value to be atomically written to the remote + \ac{PE}. } + \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number upon which + \VAR{dest} is to be updated.} \end{apiarguments} \apidescription{ - The nonblocking conditional swap routines conditionally update \VAR{dest} + The nonblocking conditional swap routines conditionally update a \VAR{dest} data object on the specified \ac{PE} and fetches the prior contents of the - data object into the \VAR{fetch} local data object as a - single atomic operation. This routine returns after posting the operation. - The operation is considered complete after a subsequent call to - \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, prior - contents of the \VAR{dest} data object is atomically fetched into - \VAR{fetch} local data object and the contents of \VAR{value} - is conditionally updated into \VAR{dest} on to the remote \ac{PE}. + \VAR{dest} data object into the \VAR{fetch} local data object as one atomic + operation. This routine returns after posting the operation. The operation + is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the + completion of \FUNC{shmem\_quiet}, prior contents of the \VAR{dest} data + object is atomically fetched into \VAR{fetch} local data object and the + contents of \VAR{value} is conditionally updated into \VAR{dest} on the + remote \ac{PE}. } \apireturnvalues{ From 4328b3142d291b3d55109d6208936b04dc6a1cf4 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 11 Mar 2019 07:04:13 -0500 Subject: [PATCH 129/319] Update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 721f3a3f6..3415f4bba 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ main_spec.log main_spec.out main_spec.pdf main_spec.toc +*.log From 6c39d4ef287355e076f4e6c479699ffca760f1fb Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 11 Mar 2019 07:07:08 -0500 Subject: [PATCH 130/319] Update NBI atomic fetch texts --- content/shmem_atomic_fetch_nbi.tex | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/content/shmem_atomic_fetch_nbi.tex b/content/shmem_atomic_fetch_nbi.tex index e5dad654a..e8ffa160d 100644 --- a/content/shmem_atomic_fetch_nbi.tex +++ b/content/shmem_atomic_fetch_nbi.tex @@ -34,12 +34,12 @@ \end{apiarguments} \apidescription{ - \FUNC{shmem\_atomic\_fetch\_nbi} performs a nonblocking fetching of a value - atomically from a remote data object. This routine returns after posting the - operation. The operation is considered complete after a subsequent call to - \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, contents of - the \source{} data object from \ac{PE} is atomically fetched into - \VAR{fetch} local data object. + The nonblocking atomic fetch routine performs a nonblocking fetching of a + value atomically from a remote data object. This routine returns after + posting the operation. The operation is considered complete after a + subsequent call to \FUNC{shmem\_quiet}. At the completion of + \FUNC{shmem\_quiet}, contents of the \source{} data object from \ac{PE} is + atomically fetched into \VAR{fetch} local data object. } \apireturnvalues{ From 7d0d87803b1aff20d45643a6008c856ae31c07fc Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 11 Mar 2019 07:13:31 -0500 Subject: [PATCH 131/319] Update NBI atomic fetch add texts --- content/shmem_atomic_fetch_add_nbi.tex | 29 +++++++++++++------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/content/shmem_atomic_fetch_add_nbi.tex b/content/shmem_atomic_fetch_add_nbi.tex index 7c43ec024..4fbf2872a 100644 --- a/content/shmem_atomic_fetch_add_nbi.tex +++ b/content/shmem_atomic_fetch_add_nbi.tex @@ -1,7 +1,7 @@ \color{Green} \apisummary{ - This nonblocking routine performs an atomic fetch-and-add operation on a - remote data object. + The nonblocking atomic routine performs a atomic fetch-and-add operation on + a remote data object. } \begin{apidefinition} @@ -22,15 +22,15 @@ \begin{apiarguments} -\apiargument{IN}{ctx}{A context handle specifying the context on which to - perform the operation. When this argument is not provided, the operation - is performed on the default context.} -\apiargument{OUT}{fetch}{Local data object to be updated.} -\apiargument{OUT}{dest}{The remotely accessible data object to be updated on - the remote \ac{PE}.} -\apiargument{IN}{value}{The value to be atomically added to \VAR{dest}.} -\apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which - \VAR{dest} is to be updated.} + \apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the operation. When this argument is not provided, the operation + is performed on the default context.} + \apiargument{OUT}{fetch}{Local data object to be updated.} + \apiargument{OUT}{dest}{The remotely accessible data object to be updated on + the remote \ac{PE}.} + \apiargument{IN}{value}{The value to be atomically added to \VAR{dest}.} + \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which + \VAR{dest} is to be updated.} \end{apiarguments} @@ -41,10 +41,9 @@ possibility of another atomic operation on the \VAR{dest} between the time of the fetch and the update. This routine returns after posting the operation. The operation is considered complete after a subsequent call to - \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, these - routines add \VAR{value} to \VAR{dest} on \VAR{pe} and fetch the previous - contents of \VAR{dest} as an atomic operation into the \VAR{fetch} local - data object. + \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, \VAR{value} + is added to \VAR{dest} on \VAR{pe} and the previous contents of \VAR{dest} + are fetched into the \VAR{fetch} local data object as one atomic operation. } \apireturnvalues{ From bcc703e84b351055b14564b037fd64fb3314c093 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 11 Mar 2019 07:18:24 -0500 Subject: [PATCH 132/319] Update NBI atomic fetch add texts --- content/shmem_atomic_fetch_add_nbi.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_atomic_fetch_add_nbi.tex b/content/shmem_atomic_fetch_add_nbi.tex index 4fbf2872a..1d54ca8b5 100644 --- a/content/shmem_atomic_fetch_add_nbi.tex +++ b/content/shmem_atomic_fetch_add_nbi.tex @@ -42,7 +42,7 @@ of the fetch and the update. This routine returns after posting the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, \VAR{value} - is added to \VAR{dest} on \VAR{pe} and the previous contents of \VAR{dest} + is added to \VAR{dest} on \VAR{pe} and the prior contents of \VAR{dest} are fetched into the \VAR{fetch} local data object as one atomic operation. } From a00f0de8521619327597c8f610ab55dbd2036e59 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 11 Mar 2019 07:24:19 -0500 Subject: [PATCH 133/319] Update NBI atomic swap operation --- content/shmem_atomic_swap_nbi.tex | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/content/shmem_atomic_swap_nbi.tex b/content/shmem_atomic_swap_nbi.tex index d45c2aca7..620b1feb3 100644 --- a/content/shmem_atomic_swap_nbi.tex +++ b/content/shmem_atomic_swap_nbi.tex @@ -1,6 +1,7 @@ \color{Green} \apisummary{ - This nonblocking operation performs an atomic swap to a remote data object. + This nonblocking atomic operation performs an atomic swap to a remote data + object. } \begin{apidefinition} @@ -9,13 +10,15 @@ void @\FuncDecl{shmem\_atomic\_swap\_nbi}@(TYPE *fetch, TYPE *dest, TYPE value, int pe); void @\FuncDecl{shmem\_atomic\_swap\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, TYPE *dest, TYPE value, int pe); \end{C11synopsis} -where \TYPE{} is one of the extended \ac{AMO} types specified by Table \ref{extamotypes}. +where \TYPE{} is one of the extended \ac{AMO} types specified by +Table~\ref{extamotypes}. \begin{Csynopsis} void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_swap\_nbi}@(TYPE *fetch, TYPE *dest, TYPE value, int pe); void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_swap\_nbi}@(shmem_ctx_t ctx, TYPE *fetch, TYPE *dest, TYPE value, int pe); \end{Csynopsis} -where \TYPE{} is one of the extended \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table \ref{extamotypes}. +where \TYPE{} is one of the extended \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{extamotypes}. \begin{apiarguments} \apiargument{IN}{ctx}{A context handle specifying the context on which to @@ -26,17 +29,17 @@ updated on the remote \ac{PE}.} \apiargument{IN}{value}{The value to be atomically written to the remote \ac{PE}.} - \apiargument{IN}{pe}{ An integer that indicates the \ac{PE} number on which + \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which \dest{} is to be updated.} \end{apiarguments} \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_swap\_nbi} performs an atomic swap - operation. This routine returns after posting the operation. The operation - is considered complete after a subsequent call to \FUNC{shmem\_quiet}. - At the completion of \FUNC{shmem\_quiet}, it writes \VAR{value} into - \dest{} on \ac{PE} and fetches the contents of \dest{} as an atomic - operation into \VAR{fetch} local data object. + The nonblocking \FUNC{shmem\_atomic\_swap\_nbi} routines perform an atomic + swap operation. This routine returns after posting the operation. The + operation is considered complete after a subsequent call to + \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, it writes + \VAR{value} into \dest{} on \ac{PE} and fetches the prior contents of + \dest{} into \VAR{fetch} local data object as one atomic operation. } \apireturnvalues{ From 356d86ab3d47af5b24fa5e8fe1bebbeb2718d36a Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 11 Mar 2019 07:29:21 -0500 Subject: [PATCH 134/319] Update NBI fetch add and inc texts --- content/shmem_atomic_fetch_add_nbi.tex | 2 +- content/shmem_atomic_fetch_inc_nbi.tex | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/content/shmem_atomic_fetch_add_nbi.tex b/content/shmem_atomic_fetch_add_nbi.tex index 1d54ca8b5..ff59fedfc 100644 --- a/content/shmem_atomic_fetch_add_nbi.tex +++ b/content/shmem_atomic_fetch_add_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - The nonblocking atomic routine performs a atomic fetch-and-add operation on + The nonblocking atomic routine performs an atomic fetch-and-add operation on a remote data object. } diff --git a/content/shmem_atomic_fetch_inc_nbi.tex b/content/shmem_atomic_fetch_inc_nbi.tex index 471a4c4d5..869fe59f3 100644 --- a/content/shmem_atomic_fetch_inc_nbi.tex +++ b/content/shmem_atomic_fetch_inc_nbi.tex @@ -1,7 +1,7 @@ \color{Green} \apisummary{ - This nonblocking routine performs an atomic fetch-and-increment operation - on a remote data object. + This nonblocking atomic routine performs an atomic fetch-and-increment + operation on a remote data object. } \begin{apidefinition} @@ -26,8 +26,8 @@ perform the operation. When this argument is not provided, the operation is performed on the default context.} \apiargument{OUT}{fetch}{Local data object to be updated.} -\apiargument{OUT}{dest}{The remotely accessible data object to be updated - on the remote \ac{PE}.} +\apiargument{OUT}{dest}{The remotely accessible data object to be updated on the + remote \ac{PE}.} \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which \dest{} is to be updated.} @@ -35,12 +35,12 @@ \apidescription{ - These nonblocking routines perform an atomic fetch-and-increment operation. - This routine returns after posting the operation. The operation is considered - complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, the \dest{} on \ac{PE} \VAR{pe} is increased by one and - the routine fetches the previous contents of \dest{} as an atomic operation - into the \VAR{fetch} local data object. + These nonblocking \FUNC{shmem\_atomic\_fetch\_inc\_nbi} routines perform an + atomic fetch-and-increment operation. This routine returns after posting the + operation. The operation is considered complete after a subsequent call to + \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, the \dest{} on + \ac{PE} \VAR{pe} is increased by one and the previous contents of \dest{} are + fetched into the \VAR{fetch} local data object as one atomic operation. } \apireturnvalues{ From 435bfa0031aa6a4b6a670fc39c481a45cbea119b Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 11 Mar 2019 07:37:24 -0500 Subject: [PATCH 135/319] Update NBI atomic bitwise routines texts --- content/shmem_atomic_fetch_and_nbi.tex | 22 +++++++++++----------- content/shmem_atomic_fetch_or_nbi.tex | 22 +++++++++++----------- content/shmem_atomic_fetch_xor_nbi.tex | 20 ++++++++++---------- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/content/shmem_atomic_fetch_and_nbi.tex b/content/shmem_atomic_fetch_and_nbi.tex index eda95a3a0..49bc81601 100644 --- a/content/shmem_atomic_fetch_and_nbi.tex +++ b/content/shmem_atomic_fetch_and_nbi.tex @@ -1,7 +1,7 @@ \color{Green} \apisummary{ - This nonblocking operation atomically performs a fetching bitwise AND - operation on a remote data object. + This nonblocking atomic operation performs an atomic fetching bitwise AND + operation on a remote data object. } \begin{apidefinition} @@ -35,15 +35,15 @@ \end{apiarguments} \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_fetch\_and\_nbi} routines atomically - performs a fetching bitwise AND on the remotely accessible data object pointed - to by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine - returns after posting the operation. The operation is considered complete - after a subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, these routines atomically performs a fetching bitwise - AND on \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the - previous contents of \VAR{dest} as an atomic operation into the \VAR{fetch} - local data object. + The nonblocking \FUNC{shmem\_atomic\_fetch\_and\_nbi} routines performs an + atomic fetching bitwise AND on the remotely accessible data object pointed + by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine + returns after posting the operation. The operation is considered complete + after a subsequent call to \FUNC{shmem\_quiet}. At the completion of + \FUNC{shmem\_quiet}, these routines performs a fetching bitwise AND on + \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the prior + contents of \VAR{dest} into the \VAR{fetch} local data object as one atomic + operation. } \apireturnvalues{ diff --git a/content/shmem_atomic_fetch_or_nbi.tex b/content/shmem_atomic_fetch_or_nbi.tex index 52617bbc5..d66fdf8ca 100644 --- a/content/shmem_atomic_fetch_or_nbi.tex +++ b/content/shmem_atomic_fetch_or_nbi.tex @@ -1,7 +1,7 @@ \color{Green} \apisummary{ - This nonblocking operation atomically performs a fetching bitwise OR - operation on a remote data object. + This nonblocking atomic operation performs an atomic fetching bitwise OR + operation on a remote data object. } \begin{apidefinition} @@ -35,15 +35,15 @@ \end{apiarguments} \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_fetch\_or\_nbi} routines atomically - performs a fetching bitwise OR on the remotely accessible data object pointed - to by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine - returns after posting the operation. The operation is considered complete - after a subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, these routines atomically performs a fetching bitwise - OR on \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the - previous contents of \VAR{dest} as an atomic operation into the \VAR{fetch} - local data object. + The nonblocking \FUNC{shmem\_atomic\_fetch\_or\_nbi} routines performs an + atomic fetching bitwise OR on the remotely accessible data object pointed + by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine + returns after posting the operation. The operation is considered complete + after a subsequent call to \FUNC{shmem\_quiet}. At the completion of + \FUNC{shmem\_quiet}, these routines performs a fetching bitwise OR on + \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the prior + contents of \VAR{dest} into the \VAR{fetch} local data object as one atomic + operation. } \apireturnvalues{ diff --git a/content/shmem_atomic_fetch_xor_nbi.tex b/content/shmem_atomic_fetch_xor_nbi.tex index 089b58113..b0a2ba4fb 100644 --- a/content/shmem_atomic_fetch_xor_nbi.tex +++ b/content/shmem_atomic_fetch_xor_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - This nonblocking operation atomically performs a fetching bitwise XOR + This nonblocking atomic operation performs an atomic fetching bitwise XOR operation on a remote data object. } @@ -35,15 +35,15 @@ \end{apiarguments} \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_fetch\_xor\_nbi} routines atomically - performs a fetching bitwise XOR on the remotely accessible data object pointed - to by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine - returns after posting the operation. The operation is considered complete - after a subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, these routines atomically performs a fetching bitwise - XOR on \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the - previous contents of \VAR{dest} as an atomic operation into the \VAR{fetch} - local data object. + The nonblocking \FUNC{shmem\_atomic\_fetch\_xor\_nbi} routines performs an + atomic fetching bitwise XOR on the remotely accessible data object pointed + by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine + returns after posting the operation. The operation is considered complete + after a subsequent call to \FUNC{shmem\_quiet}. At the completion of + \FUNC{shmem\_quiet}, these routines performs a fetching bitwise XOR on + \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the prior + contents of \VAR{dest} into the \VAR{fetch} local data object as one atomic + operation. } \apireturnvalues{ From 7ae05a21d7e15267e70f383b7c4d59742d561faf Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 13 Mar 2019 14:45:05 -0500 Subject: [PATCH 136/319] Remove team configuration hints --- content/library_constants.tex | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/content/library_constants.tex b/content/library_constants.tex index 5e38a9aa4..b89b85ddd 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -50,16 +50,6 @@ \tabularnewline \hline %% \color{Green} -\LibConstDecl{SHMEM\_TEAM\_NUM\_CONTEXTS\_HINT} & -\color{Green} -The bitwise flag which specifies that a team creation routine should use the -\VAR{num\_contexts} member of the provided -\CTYPE{shmem\_team\_config\_t} configuration parameter as a hint. -See Sections~\ref{subsec:shmem_team_config_t} and -\ref{subsec:shmem_team_split_strided} for more detail about its use. -\tabularnewline \hline -%% -\color{Green} \LibConstDecl{SHMEM\_TEAM\_NULL} & \color{Green} Predefined constant that can be compared against handles of type From 8b2d6ef070e70a961c8d3f4400ca580077119094 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 13 Mar 2019 14:45:57 -0500 Subject: [PATCH 137/319] update team config to indicate num_contexts is request --- content/shmem_team_config_t.tex | 49 ++++++++++++--------------------- 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/content/shmem_team_config_t.tex b/content/shmem_team_config_t.tex index f4cf7b438..d8dcddd59 100644 --- a/content/shmem_team_config_t.tex +++ b/content/shmem_team_config_t.tex @@ -13,34 +13,27 @@ \vspace{1.0em} \apidescription{ - A team configuration argument acts as both input and output to the + A team configuration argument acts as an input \FUNC{shmem\_team\_split\_*} routines. - As an input, it specifies the requested capabilities of the team to be - created. Capabilities can be requested as either hints or requirements. + It specifies the requested capabilities of the team to be + created. - If given configuration parameter input is a requirement, and the team creation - cannot provide this capability, then team creation fails. - If a given configuration parameter input is a hint, and the library - succeeds in creating the team, the parameter will be updated to - the actual configuration that the library was able to provide - during team creation. - - The \VAR{num\_contexts} member specifies the total number of contexts - created from this team that can simultaneously exist. These contexts - may be created in any number of threads. A program - may destroy any number of contexts made from this team and make - any number of new ones so long as the total existing at any point - remains less than \VAR{num\_contexts}. Any contexts created from this - team must be destroyed before the team is destroyed, or the - behavior is undefined. + The \VAR{num\_contexts} member specifies the total number of simultaneously + existing contexts that the program requests to create from this team. + These contexts may be created in any number of threads. Successful + creation of a team configured with \VAR{num\_contexts} of $N$ means + that the implementation will make a best effort to reserve enough + resources to allow the team to have $N$ contexts created from the team + in existance at any given time. It is not a guaruntee that $N$ + calls to \FUNC{shmem\_team\_create\_ctx} will succeed. See Section~\ref{sec:ctx} for more on communication contexts and Section~\ref{subsec:shmem_team_create_ctx} for team-based context creation. When using the configuration structure to create teams, a mask parameter - controls which fields to use and whether they are hints or requirements. - Any configuration parameter that is not indicated in the mask will be - ignored. - So, a program does not have to set all fields in the config struct; + controls which fields to use. + Any configuration parameter value that is not indicated in the mask will be + ignored, and the default value will be used instead. + Therefore, a program does not have to set all fields in the config struct; only those for which it does not want the default values. A configuration mask value is created by combining individual field @@ -52,19 +45,13 @@ \VAR{num\_contexts} member of the configuration parameter \VAR{config} as a requirement. } - \apitablerow{\LibConstRef{SHMEM\_TEAM\_NUM\_CONTEXTS\_HINT}}{ - The team should be created using the value of the - \VAR{num\_contexts} member of the configuration parameter - \VAR{config} as a hint. - } } - If a program creates a mask using both the requirement and hint flag - for a given parameter, the behavior is undefined. - A configuration mask value of \CONST{0} indicates that the team should be created with the default values for all configuration - parameters, as follows: + parameters. + + The default values for configuration parameters are: { \apitablerow{num\_contexts = \CONST{0}}{ From 8c092f8a7b1b5964a3ab757550c1fb704d1a457f Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 13 Mar 2019 14:46:35 -0500 Subject: [PATCH 138/319] remove upper bound of num_contexts on team create context --- content/shmem_team_create_ctx.tex | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/content/shmem_team_create_ctx.tex b/content/shmem_team_create_ctx.tex index e068440e8..3bfa0c706 100644 --- a/content/shmem_team_create_ctx.tex +++ b/content/shmem_team_create_ctx.tex @@ -26,23 +26,26 @@ the same arguments and provides all the same return conditions as the \FUNC{shmem\_ctx\_create} routine. - The \FUNC{shmem\_team\_create\_ctx} routine may be called any number of times, - but the total number of simultaneously existing contexts created from a team - must be no more than were specified by the \VAR{num\_contexts} member of the - \CTYPE{shmem\_team\_config\_t} configuration parameters that were specified - when the team was created. Calling \FUNC{shmem\_team\_create\_ctx} on a - team for which the maximum number of contexts currently exists results in a - failure with nonzero return code. + The \FUNC{shmem\_team\_create\_ctx} routine may be called any number of times + to create multiple simultaneously existing contexts for the team. Programs + should request the total number of simultaneous contexts to be created from + the team during team creation. See Section~\ref{subsec:shmem_team_config_t} + for more information on how to request contexts during team creation. + + A call to \FUNC{shmem\_team\_create\_ctx} on a team may fail, regardless + of the configuration request for contexts, if the implementation is unable + to create a context at the time when \FUNC{shmem\_team\_create\_ctx} is + called. All explicitly created resources associated with a team must be destroyed before the \FUNC{shmem\_team\_destroy} routine is called. If a context returned from \FUNC{shmem\_team\_create\_ctx} is not explicitly destroyed before the team is destroyed, behavior is undefined. - %% All \openshmem routines that operate on this context will do so with - %% respect to the associated \ac{PE} team. - %% That is, all point-to-point routines operating on this context will use - %% team-relative \ac{PE} numbering. + All \openshmem routines that operate on this context will do so with + respect to the associated \ac{PE} team. + That is, all point-to-point routines operating on this context will use + team-relative \ac{PE} numbering. } \apireturnvalues{ From 81ee0a20b5805eba1263a5a7e4c590f729fc7991 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 13 Mar 2019 15:50:44 -0500 Subject: [PATCH 139/319] Fix typo in barrier all regarding deprecation --- content/shmem_barrier_all.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_barrier_all.tex b/content/shmem_barrier_all.tex index 9f405069f..548152d44 100644 --- a/content/shmem_barrier_all.tex +++ b/content/shmem_barrier_all.tex @@ -36,7 +36,7 @@ \FUNC{shmem\_put\_nbi}, and \FUNC{shmem\_get\_nbi}. {\color{Green} - \FUNC{shmem\_barrier} has been deprecated in favor of the equivalent + \FUNC{shmem\_barrier\_all} has been deprecated in favor of the equivalent call to \FUNC{shmem\_quiet} followed by a call to \FUNC{shmem\_sync(SHMEM\_TEAM\_WORLD)}. } From 7f7ed9434aaac43667b6e0766071bf831cb80aaf Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Thu, 14 Mar 2019 15:37:55 -0500 Subject: [PATCH 140/319] Added unsigned types to reduction operations --- content/shmem_reductions.tex | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 751f258d5..01fc82ed8 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -12,10 +12,15 @@ \begin{tabular}{|l|l|l|l|l|} \hline \TYPE & \TYPENAME & \multicolumn{3}{c|}{Operations Supporting \TYPE}\\ \hline + unsigned char & uchar & AND, OR, XOR & & \\ \hline short & short & AND, OR, XOR & MAX, MIN & SUM, PROD \\ \hline + unsigned short & ushort & AND, OR, XOR & MAX, MIN & SUM, PROD \\ \hline int & int & AND, OR, XOR & MAX, MIN & SUM, PROD \\ \hline + unsigned int & uint & AND, OR, XOR & MAX, MIN & SUM, PROD \\ \hline long & long & AND, OR, XOR & MAX, MIN & SUM, PROD \\ \hline + unsigned long & ulong & AND, OR, XOR & MAX, MIN & SUM, PROD \\ \hline long long & longlong & AND, OR, XOR & MAX, MIN & SUM, PROD \\ \hline + unsigned long long & ulonglong & AND, OR, XOR & MAX, MIN & SUM, PROD \\ \hline float & float & & MAX, MIN & SUM, PROD \\ \hline double & double & & MAX, MIN & SUM, PROD \\ \hline long double & longdouble & & MAX, MIN & SUM, PROD \\ \hline From 593374a4c46d6241e9752334d8ec9c49a011c4dd Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Thu, 14 Mar 2019 15:48:28 -0500 Subject: [PATCH 141/319] Change from to_all to reduce and move team argument to front --- content/shmem_reductions.tex | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 01fc82ed8..6007df375 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -40,7 +40,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_and\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_and\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); \end{C11synopsis} where \TYPE{} is one of the integer types supported for the AND operation as specified by Table \ref{reducetypes}. } @@ -50,7 +50,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_and\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_and\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -73,7 +73,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_or\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_or\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); \end{C11synopsis} where \TYPE{} is one of the integer types supported for the OR operation as specified by Table \ref{reducetypes}. } @@ -83,7 +83,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_or\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_or\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -106,7 +106,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_xor\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_xor\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce; \end{C11synopsis} where \TYPE{} is one of the integer types supported for the XOR operation as specified by Table \ref{reducetypes}. } @@ -116,7 +116,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_xor\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_xor\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -139,7 +139,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_max\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_max\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); \end{C11synopsis} where \TYPE{} is one of the integer or real types supported for the MAX operation as specified by Table \ref{reducetypes}. } @@ -149,7 +149,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_max\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_max\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -174,7 +174,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_min\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_min\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); \end{C11synopsis} where \TYPE{} is one of the integer or real types supported for the MIN operation as specified by Table \ref{reducetypes}. } @@ -184,7 +184,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_min\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_min\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -209,7 +209,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_sum\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_sum\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); \end{C11synopsis} where \TYPE{} is one of the integer, real, or complex types supported for the SUM operation as specified by Table \ref{reducetypes}. } @@ -219,7 +219,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_sum\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -246,7 +246,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_prod\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_prod\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); \end{C11synopsis} where \TYPE{} is one of the integer, real, or complex types supported for the PROD operation as specified by Table \ref{reducetypes}. } @@ -256,7 +256,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_team\_\FuncParam{TYPENAME}\_prod\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, shmem_team_t team); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_prod\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -281,6 +281,10 @@ \begin{apiarguments} +\newtext{% +\apiargument{IN}{team}{The team over which to perform the operation.}% +} + \apiargument{OUT}{dest}{A symmetric array, of length \VAR{nreduce} elements, to receive the result of the reduction routines. The data type of \dest{} varies with the version of the reduction routine being called. When calling from @@ -292,9 +296,6 @@ arrays. \VAR{nreduce} must be of type integer. When using \Fortran, it must be a default integer value.} -\newtext{% -\apiargument{IN}{team}{The team over which to perform the operation.}% -} \begin{DeprecateBlock} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of From dddb78614fc42c01541fa0d8ae0681c26a0fa801 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Thu, 14 Mar 2019 16:19:43 -0500 Subject: [PATCH 142/319] Broadcast routine add typed, mem versions and change team param order --- content/shmem_broadcast.tex | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 543fa29fd..581488a9e 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -8,9 +8,9 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_broadcast32}@(void *dest, const void *source, size_t nelems, int PE_root, shmem_team_t team); -int @\FuncDecl{shmem\_broadcast64}@(void *dest, const void *source, size_t nelems, int PE_root, shmem_team_t team); +int @\FuncDecl{shmem\_broadcast}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems, int PE_root) \end{C11synopsis} +where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. } %% C/C++ @@ -18,8 +18,12 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_team\_broadcast32}@(void *dest, const void *source, size_t nelems, int PE_root, shmem_team_t team); -int @\FuncDecl{shmem\_team\_broadcast64}@(void *dest, const void *source, size_t nelems, int PE_root, shmem_team_t team); +nt @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_broadcast}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems, int PE_root); +\end{CsynopsisCol} +where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. + +\begin{CsynopsisCol} +int @\FuncDecl{shmem\_broadcastmem}@(shmem_team_t team, void *dest, const void *source, size_t nelems, int PE_root); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -40,23 +44,22 @@ \begin{apiarguments} +\newtext{% +\apiargument{IN}{team}{The team over which to perform the operation.}% +} + \apiargument{OUT}{dest}{A symmetric data object. \newtext{See the table below in this description for allowable types.}} \apiargument{IN}{source}{A symmetric data object that can be of any data type that is permissible for the \dest{} argument.} -\apiargument{IN}{nelems}{The number of elements in \source. For - \FUNC{shmem\_broadcast32} and \FUNC{shmem\_broadcast4}, this is the number of - 32-bit halfwords. nelems must be of type \VAR{size\_t} in \Cstd. When +\apiargument{IN}{nelems}{The number of elements in \source. + nelems must be of type \VAR{size\_t} in \Cstd. When using \Fortran, it must be a default integer value.} \apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to the \newtext{team or} active set, from which the data is copied. \VAR{PE\_root} must be of type \CTYPE{int}. When using \Fortran, it must be a default integer value.} -\newtext{% -\apiargument{IN}{team}{The team over which to perform the operation.}% -} - \begin{DeprecateBlock} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}. \VAR{PE\_start} must be of type integer. When using \Fortran, @@ -143,6 +146,7 @@ constraints, which are as follows: }{Routine}{Data type of \VAR{dest} and \VAR{source}} +\apitablerow{shmem\_broadcastmem}{\Cstd: Any data type. nelems is scaled in bytes.} \apitablerow{shmem\_broadcast8, shmem\_broadcast64}{Any noncharacter type that has an element size of \CONST{64} bits. No \Fortran derived types \newtext{nor} \oldtext{or} \CorCpp{} structures are allowed.} From 4ffb767431ac9d69f37e9b8dc31b9c0d416eb131 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Fri, 15 Mar 2019 09:51:48 -0500 Subject: [PATCH 143/319] Add types version of alltoall and change team param order --- content/shmem_alltoall.tex | 42 ++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index c3c3a500b..6448dbe27 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -7,17 +7,21 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_alltoall32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); -int @\FuncDecl{shmem\_alltoall64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_alltoall}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); \end{C11synopsis} +where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. } \begin{Csynopsis} \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_team\_alltoall32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); -int @\FuncDecl{shmem\_team\_alltoall64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_alltoall}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); +\end{CsynopsisCol} +where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. + +\begin{CsynopsisCol} +int @\FuncDecl{shmem\_alltoallmem}@(shmem_team_t team, void *dest, const void *source, size_t nelems); \end{CsynopsisCol} } @@ -37,6 +41,10 @@ \begin{apiarguments} +\newtext{% +\apiargument{IN}{team}{A valid \openshmem team handle to a team.} +} + \apiargument{OUT}{dest}{A symmetric data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the active set.} @@ -47,10 +55,6 @@ \VAR{nelems} must be of type size\_t for \CorCpp. When using \Fortran, it must be a default integer value.} -\newtext{% -\apiargument{IN}{team}{A valid \openshmem team handle to a team.} -} - \begin{DeprecateBlock} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}. \VAR{PE\_start} must be of type integer. When using \Fortran, @@ -73,16 +77,24 @@ \end{apiarguments} \apidescription{ +{\color{Green} The \FUNC{shmem\_alltoall} routines are collective routines. Each \ac{PE} - \oldtext{in the active set} \newtext{participating in the operation} - exchanges \VAR{nelems} data elements of size - 32 bits (for \FUNC{shmem\_alltoall32}) or 64 bits (for \FUNC{shmem\_alltoall64}) - with all other \acp{PE} \oldtext{in the set} - \newtext{participating in the operation}. The data being sent and received are + participating in the operation exchanges \VAR{nelems} data elements + with all other \acp{PE} participating in the operation. + The size of a data element is: + \begin{itemize} + \item 32 bits for \FUNC{shmem\_alltoall32} + \item 64 bits for \FUNC{shmem\_alltoall64} + \item 8 bits for \FUNC{shmem\_alltoallmem} + \item \FUNC{sizeof}(\TYPE{}) for alltoall routines taking typed \VAR{source} and \VAR{dest} + \end{itemize} +} + + The data being sent and received are stored in a contiguous symmetric data object. The total size of each \acp{PE} \VAR{source} object and \VAR{dest} object is \VAR{nelems} times the size of - an element (32 bits or 64 bits) times \oldtext{\VAR{PE\_size}} - \newtext{\VAR{N}, where \VAR{N} equals the number of \acp{PE} participating + an element \oldtext{(32 bits or 64 bits) times \VAR{PE\_size}} + \newtext{times \VAR{N}, where \VAR{N} equals the number of \acp{PE} participating in the operation}. The \VAR{source} object contains oldtext{\VAR{PE\_size}} \VAR{N} blocks of data (where the size of each block is defined by \VAR{nelems}) and each block of data From d37c5f685c3e51f5b212d94085791630a7a9d118 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Fri, 15 Mar 2019 09:52:16 -0500 Subject: [PATCH 144/319] Update alltoalls to match alltoall, fix missing stride params --- content/shmem_alltoalls.tex | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/content/shmem_alltoalls.tex b/content/shmem_alltoalls.tex index 779103ea8..439a8da3d 100644 --- a/content/shmem_alltoalls.tex +++ b/content/shmem_alltoalls.tex @@ -7,17 +7,21 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_alltoall32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); -int @\FuncDecl{shmem\_alltoall64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_alltoalls}@(shmem_team_t team, TYPE *dest, const TYPE *source, ,ptrdiff_t dst, ptrdiff_t sst, size_t nelems); \end{C11synopsis} +where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. } \begin{Csynopsis} \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_team\_alltoall32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); -int @\FuncDecl{shmem\_team\_alltoall64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_alltoalls}@(shmem_team_t team, TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +\end{CsynopsisCol} +where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. + +\begin{CsynopsisCol} +int @\FuncDecl{shmem\_team\_alltoallsmem}@(shmem_team_t team, void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); \end{CsynopsisCol} } @@ -38,6 +42,10 @@ \begin{apiarguments} +\newtext{% +\apiargument{IN}{team}{A valid \openshmem team handle.} +} + \apiargument{OUT}{dest}{A symmetric data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the active set.} @@ -55,10 +63,6 @@ of type \CTYPE{ptrdiff\_t}. When using \Fortran, it must be a default integer value.} -\newtext{% -\apiargument{IN}{team}{A valid \openshmem team handle.} -} - \begin{DeprecateBlock} \apiargument{IN}{nelems}{The number of elements to exchange for each \ac{PE}. \VAR{nelems} must be of type size\_t for \CorCpp. When using @@ -91,8 +95,8 @@ access in \FUNC{shmem\_alltoall} is always with a stride of \CONST{1}.} Each \ac{PE} \oldtext{in the active set} \newtext{participating in the operation} - exchanges \VAR{nelems} strided data elements of size - 32 bits (for \FUNC{shmem\_alltoalls32}) or 64 bits (for \FUNC{shmem\_alltoalls64}) + exchanges \VAR{nelems} strided data elements \oldtext{of size + 32 bits (for \FUNC{shmem\_alltoalls32}) or 64 bits (for \FUNC{shmem\_alltoalls64})} with all other \acp{PE} \oldtext{in the set} \newtext{participating in the operation}. Both strides, \VAR{dst} and \VAR{sst}, must be greater than or equal to \CONST{1}. @@ -113,6 +117,7 @@ See the description of \FUNC{shmem\_alltoall} in section \ref{subsec:shmem_alltoall} for: \begin{itemize} + \item Data element sizes for the different sized and typed \FUNC{shmem\_alltoalls} variants. \item Rules for \ac{PE} participation in the collective routine. \item The pre- and post-conditions for symmetric objects. \item Typing constraints for \dest{} and \source{} data objects. From a5d83f6d576325a343ab6bde75f062536b345f9e Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Fri, 15 Mar 2019 10:12:56 -0500 Subject: [PATCH 145/319] collect,fcollect add typed and mem version and fix team arg order --- content/shmem_collect.tex | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index beb881135..8eab24585 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -8,22 +8,26 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_collect32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); -int @\FuncDecl{shmem\_collect64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); -int @\FuncDecl{shmem\_fcollect32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); -int @\FuncDecl{shmem\_fcollect64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_collect}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); +int @\FuncDecl{shmem\_fcollect}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); \end{C11synopsis} +where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. } \begin{Csynopsis} \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_team\_collect32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); -int @\FuncDecl{shmem\_team\_collect64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); -int @\FuncDecl{shmem\_team\_fcollect32}@(void *dest, const void *source, size_t nelems, shmem_team_t team); -int @\FuncDecl{shmem\_team\_fcollect64}@(void *dest, const void *source, size_t nelems, shmem_team_t team); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_collect}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_fcollect}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); \end{CsynopsisCol} +where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. + +\begin{CsynopsisCol} +int @\FuncDecl{shmem\_team\_collectmem}@(shmem_team_t team, void *dest, const void *source, size_t nelems); +int @\FuncDecl{shmem\_team\_fcollectmem}@(shmem_team_t team, void *dest, const void *source, size_t nelems); +\end{CsynopsisCol} + } \begin{DeprecateBlock} \begin{CsynopsisCol} @@ -50,6 +54,10 @@ \begin{apiarguments} +\newtext{% +\apiargument{IN}{team}{A valid \openshmem team handle.} +} + \apiargument{OUT}{dest}{A symmetric array large enough to accept the concatenation of the \source{} arrays on all participating \acp{PE}. \newtext{See table below in this description for allowable data types.}} @@ -59,10 +67,7 @@ must be of type \VAR{size\_t} for \Cstd. When using \Fortran, it must be a default integer value.} -\newtext{% -\apiargument{IN}{team}{A valid \openshmem team handle.} -} - +\begin{DeprecateBlock} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}. \VAR{PE\_start} must be of type integer. When using \Fortran, it must be a default integer value.} @@ -79,6 +84,7 @@ Every element of this array must be initialized with the value \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the active set enter \FUNC{shmem\_collect} or \FUNC{shmem\_fcollect}.} +\end{DeprecateBlock} \end{apiarguments} @@ -86,7 +92,7 @@ {\color{Green} \openshmem \FUNC{collect} and \FUNC{fcollect} routines perform a collective operation to concatenate \VAR{nelems} - \CONST{64}-bit or \CONST{32}-bit data items from the \source{} array into the + data items from the \source{} array into the \dest{} array, over an \openshmem team or active set in processor number order. The resultant \dest{} array contains the contribution from \acp{PE} as follows: @@ -138,6 +144,7 @@ The \dest{} and \source{} data objects must conform to certain typing constraints, which are as follows: }{Routine}{Data type of \VAR{dest} and \VAR{source}} +\apitablerow{\FUNC{shmem\_collectmem}, \FUNC{shmem\_fcollectmem}}{\Cstd: Any data type. nelems is scaled in bytes.}% \apitablerow{\FUNC{shmem\_collect8}, \FUNC{shmem\_collect64}, \FUNC{shmem\_fcollect8}, \FUNC{shmem\_fcollect64}}% {Any noncharacter type that has an element size of \CONST{64} bits. No \Fortran derived types nor \CorCpp{} structures are allowed.} From cff090a52665a411e917ba0e7ac2bc948b5c8a89 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Fri, 15 Mar 2019 10:36:45 -0500 Subject: [PATCH 146/319] Update collective intro to match collective routine updates --- content/collective_intro.tex | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index de636c4c8..a8a0c4c0a 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -11,7 +11,7 @@ These routines will be the standard for \openshmem moving forward. \item Collective routines that operate on active sets use a set of parameters to determine which \acp{PE} will participate and what resources are used to perform operations. These routines -are the legacy API for collectives which will be deprecated and phased out of +are the legacy API for collectives which are deprecated and will be phased out of implementations moving forward. \end{enumerate} @@ -45,12 +45,12 @@ \begin{itemize} \item \FUNC{shmem\_team\_sync} -\item \FUNC{shmem\_team\_broadcast\{32, 64\}} -\item \FUNC{shmem\_team\_collect\{32, 64\}} -\item \FUNC{shmem\_team\_fcollect\{32, 64\}} -\item Reductions for the following operations: AND, MAX, MIN, SUM, PROD, OR, XOR -\item \FUNC{shmem\_team\_alltoall\{32, 64\}} -\item \FUNC{shmem\_team\_alltoalls\{32, 64\}} +\item \FUNC{shmem\_\{TYPE\_\}broadcast\{mem\}} +\item \FUNC{shmem\_\{TYPE\_\}collect\{mem\}} +\item \FUNC{shmem\_\{TYPE\_\}fcollect\{mem\}} +\item Reduction routines for the following operations: AND, MAX, MIN, SUM, PROD, OR, XOR +\item \FUNC{shmem\_\{TYPE\_\}alltoall\{mem\}} +\item \FUNC{shmem\_\{TYPE\_\}alltoalls\{mem\}} \end{itemize} In addition, all team creation functions are collective operations. In addition to the ordering @@ -108,7 +108,7 @@ \item \FUNC{shmem\_broadcast\{32, 64\}} \item \FUNC{shmem\_collect\{32, 64\}} \item \FUNC{shmem\_fcollect\{32, 64\}} -\item Reductions for the following operations: AND, MAX, MIN, SUM, PROD, OR, XOR +\item Reduction routines for the following operations: AND, MAX, MIN, SUM, PROD, OR, XOR \item \FUNC{shmem\_alltoall\{32, 64\}} \item \FUNC{shmem\_alltoalls\{32, 64\}} \end{itemize} From fb0cfa912100b9a2793ff628fba2aea7f9cacce0 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Fri, 15 Mar 2019 12:42:47 -0500 Subject: [PATCH 147/319] Add deprecation rationale for active set routine removal --- content/backmatter.tex | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 647278eac..ec996bc57 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -358,6 +358,14 @@ \section{Overview}\label{subsec:dep_overview} \\ \CorCpp: \FuncRef{shmem\_\FuncParam{TYPENAME}\_add}} & 1.4 & Current & \hyperref[subsec:shmem_atomic_add]{\FUNC{shmem\_atomic\_add}} \\ \hline Entire \Fortran API & 1.4 & Current & (none) \\ \hline + All active-set-based collective routines & 1.5 & Current & Teams-based collective routines \\ \hline + \CorCpp: \FuncRef{shmem\_barrier} & 1.5 & Current & + \FUNC{shmem\_quiet}; \FUNC{shmem\_team\_sync} \\ \hline + \CorCpp: \FuncRef{shmem\_barrier\_all} & 1.5 & Current & + \FUNC{shmem\_quiet}; \FUNC{shmem\_team\_sync}(\LibConstRef{SHMEM\_TEAM\_WORLD}) \\ \hline + \CorCpp: \FuncRef{shmem\_sync\_all} & 1.5 & Current & + \FUNC{shmem\_team\_sync}(\LibConstRef{SHMEM\_TEAM\_WORLD}) \\ \hline + \end{longtable} \end{center} @@ -499,8 +507,35 @@ \subsection{\textit{Fortran} API}\label{subsec:deprecate-fortran} %% WARNING: Is \footnote{Formally, \Fortran[2003] is known as ISO/IEC~1539-1:2004(E).}. - - +\subsection{Active-set-based collective routines} +With the addition of \openshmem teams, the previous methods for performing collective +operations has been superseded by a more readable, flexible method for +organizing and communicating between groups of \acp{PE}. All collective routines +which previously indicated subgroups of \acp{PE} with a list of +parameters to describe the subgroup composition should be phased +out in favor of using collective operations with a team parameter + +\subsection{\CorCpp: \FUNC{shmem\_barrier}} +Each \openshmem team might +be associated with some number of communication contexts. The \FUNC{shmem\_barrier} +functions imply that the default context is quiesced after synchronizing +some set of \acp{PE}. Since teams may have some number of contexts associated +with the team, it becomes less clear which context would be the ``default'' context +for that particular team. Rather than continue to support \FUNC{shmem\_barrier} +for active-sets or teams, programs should use a call to \FUNC{shmem\_quiet} +followed by a call to \FUNC{shmem\_sync} in order to explicitly +indicate which context to quiesce. + +\subsection{\CorCpp: \FUNC{shmem\_barrier\_all}, \FUNC{shmem\_sync\_all}} +With the addition of \openshmem teams combined, the notion of all \acp{PE} has +been encapsulated as \LibConstRef{SHMEM\_TEAM\_WORLD}. The previous +method of specifying active sets to \FUNC{shmem\_barrier} and \FUNC{shmem\_sync} +was verbose. So, shorthand versions of these functions were helpful both in +readability and ability to improve performance by not requiring tests of +active set parameters to determine that the routine involved all \acp{PE}. +With teams, becomes readable in a program to simply call a barrier or sync +on \LibConstRef{SHMEM\_TEAM\_WORLD}. Implementations need only test one constant +to determine that the operation involves all \acp{PE}. \chapter{Changes to this Document}\label{sec:changelog} From 0e4c68c7cb04c4c6c83d5314c340a18b33c34159 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 18 Mar 2019 13:03:11 -0500 Subject: [PATCH 148/319] Clarify 2d split text and add example --- content/shmem_team_split_2d.tex | 51 ++++++++---- example_code/shmem_team_split_2D.c | 121 +++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+), 15 deletions(-) create mode 100644 example_code/shmem_team_split_2D.c diff --git a/content/shmem_team_split_2d.tex b/content/shmem_team_split_2d.tex index 246a24a7b..86957d798 100644 --- a/content/shmem_team_split_2d.tex +++ b/content/shmem_team_split_2d.tex @@ -47,15 +47,27 @@ new teams by splitting an existing parent team into up to two subsets based on a 2D Cartesian space. The user provides the size of the \VAR{x} dimension, which is then used to derive the size of the \VAR{y} dimension based on the size of the parent team. -The size of the \VAR{y} dimension will be equal to $ceiling ( N \div xrange )$, where +The size of the \VAR{y} dimension will be equal to $\lceil N \div xrange \rceil$, where \VAR{N} is the size of the parent team. In other words, $xrange \times yrange \geq N$, so that every \ac{PE} in the parent team has a unique \VAR{(x,y)} location the 2D Cartesian space. -After the split operation, each of the new teams will contain all \acp{PE} that -have the same coordinate along the \VAR{x}-axis and \VAR{y}-axis, respectively, as the calling -\ac{PE}. The \acp{PE} are numbered in the new teams based on the position of the -\ac{PE} along the given axis. +The mapping of \ac{PE} number to coordinates is $(x, y) = ( pe \mod xrange, \lfloor pe \div xdim \rfloor )$, +where $pe$ is the \ac{PE} number in the parent team. So, if $xdim = 3$, +then the first 3 \acp{PE} in the parent team will form the first +\VAR{xteam}, the second three \acp{PE} in the parent team form the second \VAR{xteam}, +and so on. + +Thus, after the split operation, each of the new \VAR{xteam}s will contain all \acp{PE} that +have the same coordinate along the \VAR{y}-axis as the calling \ac{PE}. Each of the +new \VAR{yteam}s will contain all \acp{PE} with the same coordinate along the +\VAR{x}-axis as the calling \ac{PE}. + +The \acp{PE} are numbered in the new teams based on the coordinate of the +\ac{PE} along the given axis. So, another way to think of the result of the split +operation is that the value returned by \FUNC{shmem\_team\_my\_pe}(\VAR(xteam)) is the +x-coordinate and the value returned by \FUNC{shmem\_team\_my\_pe}(\VAR(yteam)) +is the y-coordinate of the calling \ac{PE}. Any valid \openshmem team can be used as the parent team. This routine must be called by all \acp{PE} in the parent team. The value of \VAR{xrange} must be @@ -96,28 +108,29 @@ \apinotes{ Since the split may result in a 2D space with more points than there are members of the parent team, there may be a final, incomplete row of the 2D mapping of the parent -team. This means that the resultant \VAR{x}-axis teams may vary in size by up to 1 \ac{PE}, -and that there may be one resultant \VAR{y}-axis team of smaller size than all of the other -\VAR{y}-axis teams. +team. This means that the resultant \VAR{yteam}s may vary in size by up to 1 \ac{PE}, +and that there may be one resultant \VAR{xteam} of smaller size than all of the other +\VAR{xteam}s. The following grid shows the 12 teams that would result from splitting a parent team of size 10 with \VAR{xrange} of 3. The numbers in the grid cells are the \ac{PE} numbers -in the parent team. The rows are the \VAR{y}-axis teams. The columns are the \VAR{x}-axis teams. +in the parent team. The rows are the \VAR{xteam}s. The columns are the \VAR{yteam}s. \begin{center} \begin{tabular}{|l|l|l|l|} \hline - & x=0 & x=1 & x=2 \\ \hline - y=0 & 0 & 1 & 2 \\ \hline - y=1 & 3 & 4 & 5 \\ \hline - y=2 & 6 & 7 & 8 \\ \hline - y=3 & 9 \\ + & yteam & yteam & yteam \\ + & x=0 & x=1 & x=2 \\ \hline + xteam, y=0 & 0 & 1 & 2 \\ \hline + xteam, y=1 & 3 & 4 & 5 \\ \hline + xteam, y=2 & 6 & 7 & 8 \\ \hline + xteam, y=3 & 9 \\ \cline{0-1} \end{tabular} \end{center} It would be legal, for example, if \acp{PE} 0, 3, 6, 9 specified a different value -for \VAR{xaxis\_config} than all of the other \acp{PE}, as long as the configuration parameters match +for \VAR{yaxis\_config} than all of the other \acp{PE}, as long as the configuration parameters match for all \acp{PE} in each of the new teams. See the description of team handles and predefined teams at the top of section @@ -126,6 +139,14 @@ \begin{apiexamples} + \apicexample + {The following example demonstrates the use of 2D Cartesian split in a + \Cstd[11] program. This example shows how multiple 2D splits can be used + to generate a 3D Cartesian split. This method can be extrapolated to + generate splits of any number of dimensions.} + {./example_code/shmem_team_split_2D.c} + {} + \end{apiexamples} \end{apidefinition} diff --git a/example_code/shmem_team_split_2D.c b/example_code/shmem_team_split_2D.c new file mode 100644 index 000000000..678fb70e9 --- /dev/null +++ b/example_code/shmem_team_split_2D.c @@ -0,0 +1,121 @@ +#include +#include + +int main(void) +{ + int xdim = 3; + int ydim = 4; + + shmem_init(); + int pe = shmem_my_pe(); + int npes = shmem_n_pes(); + + if (npes < (xdim*ydim)) { + printf ("Not enough PEs to create 4x3xN layout\n"); + exit(1); + } + + int zdim = (npes / (xdim*ydim)) + ( ((npes % (xdim*ydim)) > 0) ? 1 : 0 ); + shmem_team_t xteam, yzteam, yteam, zteam; + + shmem_team_split_2d(SHMEM_TEAM_WORLD, xdim, NULL, 0, &xteam, NULL, 0, &yzteam); + // No synchronization is needed between these split operations + // yzteam is immediately ready to be used in collectives + shmem_team_split_2d(yzteam, ydim, NULL, 0, &yteam, NULL, 0, &zteam); + + // We don't need the yzteam anymore + shmem_team_destroy(yzteam); + + int my_x = shmem_team_my_pe(xteam); + int my_y = shmem_team_my_pe(yteam); + int my_z = shmem_team_my_pe(zteam); + + for (int zdx = 0; zdx < zdim; zdx++) + for (int ydx = 0; ydx < ydim; ydx++) + for (int xdx = 0; xdx < xdim; xdx++) { + if ((my_x == xdx) && (my_y == ydx) && (my_z == zdx)) { + printf ("(%d, %d, %d) is me = %d\n", my_x, my_y, my_z, pe); + } + shmem_team_sync(SHMEM_TEAM_WORLD); + } + + shmem_finalize(); +} + + +/* +/* Example split of SHMEM_TEAM_WORLD, size 16 into 3D +/* xdim = 3, ydim = 4 -> final dimensions are 3x4x2 +/* +/* First split of SHMEM_TEAM_WORLD, xdim=3 +/* results in 6 xteams and 3 yzteam +/********************************************** +/* x=0 | x=1 | x=2 | +/* ------------------- +/* yz=0 | 0 | 1 | 2 | <-- xteam +/* yz=1 | 3 | 4 | 5 | <-- xteam +/* yz=2 | 6 | 7 | 8 | <-- xteam +/* yz=3 | 9 | 10 | 11 | <-- xteam +/* yz=4 | 12 | 13 | 14 | <-- xteam +/* yz=5 | 15 | | <-- xteam +/* ^ ^ ^ +/* { yzteams are columns } +/********************************************** +/* +/* Second split of yzteam for x=0, ydim=4 +/* results in 2 yteams and 4 zteams +/********************************************** +/* y=0 | y=1 | y=2 | y=3 | +/* ------------------------- +/* z=0 | 0 | 3 | 6 | 9 | <-- yteam +/* z=1 | 12 | 15 | | <-- yteam +/* ^ ^ ^ ^ +/* { zteams are columns } +/********************************************** +/* +/* Second split of yzteam for x=1, ydim=4 +/* results in 2 yteams and 4 zteams +/********************************************** +/* y=0 | y=1 | y=2 | y=3 | +/* ------------------------- +/* z=0 | 1 | 4 | 7 | 10 | <-- yteam +/* z=1 | 13 | | | <-- yteam +/* ^ ^ ^ ^ +/* { zteams are columns } +/********************************************** +/* +/* Second split of yzteam for x=2, ydim=4 +/* results in 2 yteams and 4 zteams +/********************************************** +/* y=0 | y=1 | y=2 | y=3 | +/* ------------------------- +/* z=0 | 2 | 5 | 8 | 11 | <-- yteam +/* z=1 | 14 | | | <-- yteam +/* ^ ^ ^ ^ +/* { zteams are columns } +/********************************************** +/* +/* Final number of teams for each dimension: +/* 6 xteams, these are teams where (z,y) is fixed and x varies +/* 6 yteams, these are teams where (x,z) is fixed and y varies +/* 12 zteams, these are teams where (x,y) is fixed and z varies +/* +/* Expected output: +/* (0, 0, 0) is me = 0 +/* (1, 0, 0) is me = 1 +/* (2, 0, 0) is me = 2 +/* (0, 1, 0) is me = 3 +/* (1, 1, 0) is me = 4 +/* (2, 1, 0) is me = 5 +/* (0, 2, 0) is me = 6 +/* (1, 2, 0) is me = 7 +/* (2, 2, 0) is me = 8 +/* (0, 3, 0) is me = 9 +/* (1, 3, 0) is me = 10 +/* (2, 3, 0) is me = 11 +/* (0, 0, 1) is me = 12 +/* (1, 0, 1) is me = 13 +/* (2, 0, 1) is me = 14 +/* (0, 1, 1) is me = 15 +*/ + From c831fadbb31d73eeb850d59d4a8dac8164f4cfd6 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 18 Mar 2019 14:20:16 -0500 Subject: [PATCH 149/319] Added example code for team create/destroy/get context --- content/shmem_ctx_get_team.tex | 11 ++++ content/shmem_team_create_ctx.tex | 4 ++ example_code/shmem_team_context.c | 105 ++++++++++++++++++++++++++++++ 3 files changed, 120 insertions(+) create mode 100644 example_code/shmem_team_context.c diff --git a/content/shmem_ctx_get_team.tex b/content/shmem_ctx_get_team.tex index 2bc8b9b10..81f70cbdc 100644 --- a/content/shmem_ctx_get_team.tex +++ b/content/shmem_ctx_get_team.tex @@ -38,4 +38,15 @@ None. } +\begin{apiexamples} + + \apicexample + {The following example demonstrates the use of contexts for multiple teams in a + \Cstd[11] program. This example shows contexts being used to communicate within + a team using team \ac{PE} numbers, and across teams using translated \ac{PE} numbers.} + {./example_code/shmem_team_context.c} + {} + +\end{apiexamples} + \end{apidefinition} diff --git a/content/shmem_team_create_ctx.tex b/content/shmem_team_create_ctx.tex index 3bfa0c706..c5ed2a5e1 100644 --- a/content/shmem_team_create_ctx.tex +++ b/content/shmem_team_create_ctx.tex @@ -56,4 +56,8 @@ None. } +\begin{apiexamples} + See example in Section \ref{subsec:shmem_ctx_get_team} +\end{apiexamples} + \end{apidefinition} diff --git a/example_code/shmem_team_context.c b/example_code/shmem_team_context.c new file mode 100644 index 000000000..e93e47cbe --- /dev/null +++ b/example_code/shmem_team_context.c @@ -0,0 +1,105 @@ +#include +#include + +int isum, ival; + +int my_ctx_translate_pe(shmem_ctx_t src_ctx, int src_pe, shmem_ctx_t dest_ctx) +{ + shmem_team_t src_team, dest_team; + shmem_ctx_get_team(src_ctx, &src_team); + shmem_ctx_get_team(dest_ctx, &dest_team); + return shmem_team_translate(src_team, src_pe, dest_pe); +} + +shmem_ctx_t my_team_create_ctx(shmem_team_t team) { + if (team == SHMEM_TEAM_NULL) { + return SHMEM_CTX_INVALID; + } + + shmem_ctx_t ctx; + if (shmem_team_create_ctx(team, 0, &ctx) != 0) { + printf ("Failed to create context PE team!\n"); + shmem_global_exit(-1); + } + return ctx; +} + +void my_send_to_neighbor(shmem_ctx_t ctx, int *val) +{ + if (ctx == SHMEM_CTX_INVALID) + return; + + shmem_team_t team; + shmem_ctx_get_team(ctx, &team); + int pe = shmem_team_my_pe(team); + int npes = shmem_team_n_pes(team); + int rpe = (pe + 1) % npes; + + // put my pe number in the buffer on my right hand neighbor + shmem_ctx_int_put(ctx, val, &pe, 1, rpe); +} + + + +int main() +{ + shmem_init(); + + int npes = shmem_n_pes(); + isum = 0; + + shmem_team_t team_2s, team_3s; + shmem_ctx_t ctx_2s, ctx_3s; + shmem_team_config_t conf; + conf.num_contexts = 1; + long cmask = SHMEM_TEAM_NUM_CONTEXTS; + + // Create team with PEs numbered 0, 2, 4, ... + shmem_team_spit_strided(SHMEM_TEAM_WORLD, 0, 2, npes / 2, &conf, cmask, &team_2s); + // Sync between splits from same parent team into teams with overlapping membership + shmem_team_sync(SHMEM_TEAM_WORLD); + // Create team with PEs numbered 0, 3, 6, ... + shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 3, npes / 3, &conf, cmask, &team_3s); + + ctx_2s = my_team_create_ctx(team_2s); + ctx_3s = my_team_create_ctx(team_3s); + + // Send some values using the two team contexts contexts + my_send_to_neighbor(ctx_2s, &ival2); + my_send_to_neighbor(ctx_3s, &ival3); + + // Quiet all contexts and synchronize all PEs to complete the data transfers + shmem_ctx_quiet(ctx_2s); + shmem_ctx_quiet(ctx_3s); + shmem_team_sync(SHMEM_TEAM_WORLD); + + // We will add up some results on pe 4 of team_3s using ctx_2s + if ((team_3s != SHMEM_TEAM_NULL) && (team_2s != SHMEM_TEAM_NULL)) { + int _pe4_of_3s_in_2s = my_ctx_translate_pe(ctx_3s, 4, ctx_2s); + + if (_pe4_of_3s_in_2s < 0) { + printf ("There was no pe 4 of the 3s PE team in the 2s PE team\n"); + shmem_global_exit(-1); + } + + // Add up the results on pe 4 of the 3s team, using the 2s team context + shmem_ctx_int_atomic_add(ctx_2s, &isum, ival2 + ival3, _pe4_of_3s_in_2s); + } + + // Quiet the context and synchronize PEs to complete the operation + shmem_ctx_quiet(ctx_2s); + shmem_team_sync(SHMEM_TEAM_WORLD); + + if (shmem_team_my_pe(team_3s) == 4) { + printf ("The total value is %d\n", isum); + } + + // Destroy contexts before teams + shmem_ctx_destroy(ctx_2s); + shmem_team_destroy(team_2s); + + shmem_ctx_destroy(ctx_3s); + shmem_team_destroy(team_3s); + + shmem_finalize(); +} From 0778b18373345419bf60f36c61ff7627895092d7 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 19 Mar 2019 09:03:13 -0500 Subject: [PATCH 150/319] Fix missing paren typo in xor_reduce --- content/shmem_reductions.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 6007df375..c47f0bc72 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -106,7 +106,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_xor\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce; +int @\FuncDecl{shmem\_xor\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); \end{C11synopsis} where \TYPE{} is one of the integer types supported for the XOR operation as specified by Table \ref{reducetypes}. } From d112820ba543813c6fe3d17befec6d509911a26d Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 19 Mar 2019 09:15:38 -0500 Subject: [PATCH 151/319] Fix typos in broadcast routine prototypes --- content/shmem_broadcast.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 581488a9e..2c46e744f 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -8,7 +8,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_broadcast}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems, int PE_root) +int @\FuncDecl{shmem\_broadcast}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems, int PE_root); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. } @@ -18,7 +18,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -nt @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_broadcast}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems, int PE_root); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_broadcast}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems, int PE_root); \end{CsynopsisCol} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. From 64edf97c251d37145431416e58b07f203c3e3ff7 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 19 Mar 2019 09:18:40 -0500 Subject: [PATCH 152/319] Fix typos in alltoalls routine protoypes --- content/shmem_alltoalls.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_alltoalls.tex b/content/shmem_alltoalls.tex index 439a8da3d..35f366e02 100644 --- a/content/shmem_alltoalls.tex +++ b/content/shmem_alltoalls.tex @@ -7,7 +7,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_alltoalls}@(shmem_team_t team, TYPE *dest, const TYPE *source, ,ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +int @\FuncDecl{shmem\_alltoalls}@(shmem_team_t team, TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. } @@ -21,7 +21,7 @@ where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -int @\FuncDecl{shmem\_team\_alltoallsmem}@(shmem_team_t team, void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); +int @\FuncDecl{shmem\_alltoallsmem}@(shmem_team_t team, void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); \end{CsynopsisCol} } From 4aae4321ec1130a2f09030c2dcb2a478d4b4d3ae Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 19 Mar 2019 09:22:04 -0500 Subject: [PATCH 153/319] Remove word team from collect,fcollect mem routine name --- content/shmem_collect.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 8eab24585..bd331a9de 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -24,8 +24,8 @@ where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -int @\FuncDecl{shmem\_team\_collectmem}@(shmem_team_t team, void *dest, const void *source, size_t nelems); -int @\FuncDecl{shmem\_team\_fcollectmem}@(shmem_team_t team, void *dest, const void *source, size_t nelems); +int @\FuncDecl{shmem\_collectmem}@(shmem_team_t team, void *dest, const void *source, size_t nelems); +int @\FuncDecl{shmem\_fcollectmem}@(shmem_team_t team, void *dest, const void *source, size_t nelems); \end{CsynopsisCol} } From 101c33810f27408df2942e98f48cc92644c44e3b Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 19 Mar 2019 09:33:08 -0500 Subject: [PATCH 154/319] Remove program termination from team context example --- example_code/shmem_team_context.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/example_code/shmem_team_context.c b/example_code/shmem_team_context.c index e93e47cbe..dc41ae1ce 100644 --- a/example_code/shmem_team_context.c +++ b/example_code/shmem_team_context.c @@ -5,6 +5,13 @@ int isum, ival; int my_ctx_translate_pe(shmem_ctx_t src_ctx, int src_pe, shmem_ctx_t dest_ctx) { + if (src_ctx == SHMEM_CTX_INVALID) { + return -1; + } + if (dest_ctx == SHMEM_CTX_INVALID) { + return -1; + } + shmem_team_t src_team, dest_team; shmem_ctx_get_team(src_ctx, &src_team); shmem_ctx_get_team(dest_ctx, &dest_team); @@ -18,16 +25,18 @@ shmem_ctx_t my_team_create_ctx(shmem_team_t team) { shmem_ctx_t ctx; if (shmem_team_create_ctx(team, 0, &ctx) != 0) { - printf ("Failed to create context PE team!\n"); - shmem_global_exit(-1); + fprintf (stderr, "Failed to create context for PE team\n"); + return SHMEM_CTX_INVALID; } return ctx; } void my_send_to_neighbor(shmem_ctx_t ctx, int *val) { - if (ctx == SHMEM_CTX_INVALID) + if (ctx == SHMEM_CTX_INVALID) { + fprintf (stderr, "Send to neighbor fail due to invalid context\n"); return; + } shmem_team_t team; shmem_ctx_get_team(ctx, &team); @@ -78,12 +87,12 @@ int main() int _pe4_of_3s_in_2s = my_ctx_translate_pe(ctx_3s, 4, ctx_2s); if (_pe4_of_3s_in_2s < 0) { - printf ("There was no pe 4 of the 3s PE team in the 2s PE team\n"); - shmem_global_exit(-1); + fprintf (stderr, "Fail to translate pe 4 from 3s context to 2s context\n"); + } + else { + // Add up the results on pe 4 of the 3s team, using the 2s team context + shmem_ctx_int_atomic_add(ctx_2s, &isum, ival2 + ival3, _pe4_of_3s_in_2s); } - - // Add up the results on pe 4 of the 3s team, using the 2s team context - shmem_ctx_int_atomic_add(ctx_2s, &isum, ival2 + ival3, _pe4_of_3s_in_2s); } // Quiet the context and synchronize PEs to complete the operation @@ -91,7 +100,7 @@ int main() shmem_team_sync(SHMEM_TEAM_WORLD); if (shmem_team_my_pe(team_3s) == 4) { - printf ("The total value is %d\n", isum); + printf ("The total value on PE 4 of the 3s team is %d\n", isum); } // Destroy contexts before teams From fd24c3113af68ce93fc9481eabcde61bbdb7f148 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 19 Mar 2019 10:50:03 -0500 Subject: [PATCH 155/319] Teams intro clarification and restructure --- content/shmem_teams_intro.tex | 98 ++++++++++++++++++++++++----------- 1 file changed, 67 insertions(+), 31 deletions(-) diff --git a/content/shmem_teams_intro.tex b/content/shmem_teams_intro.tex index b7d449411..5bd40b41f 100644 --- a/content/shmem_teams_intro.tex +++ b/content/shmem_teams_intro.tex @@ -1,27 +1,36 @@ -The \acp{PE} in an \openshmem program can communicate either using +The \acp{PE} in an \openshmem program communicate using either point-to-point routines that specify the \ac{PE} number of the target -\ac{PE} or using collective routines which operate over some predefined +\ac{PE} or collective routines that operate over some predefined set of \acp{PE}. Teams in \openshmem allow programs to group subsets -of \acp{PE} for collective communications and provide a contiguous reindexing -of the \acp{PE} within that subset that can be used in point-to-point communication. +of \acp{PE} for communications. Collective communications operate on +teams objects across the \acp{PE} in the team. Point-to-point routines +can make use of team based renumbering of \acp{PE} by utilizing team +based contexts or \ac{PE} number translation. An \openshmem team is a set of \acp{PE} defined by calling a specific team -split routine with a parent team argument and other arguments to further +split routine with a parent team argument and other arguments to specify how the parent team is to be split into one or more new teams. -A team created by a \FUNC{shmem\_team\_split\_*} routine can be used as the parent team -for a subsequent call to a team split routine. A team persists and can -be used for multiple collective routine calls until it is destroyed by -\FUNC{shmem\_team\_destroy}. +Any team created by a \FUNC{shmem\_team\_split\_*} routine can subsequently +be used as the parent team for further calls to team split routines. +A team persists and can be used for team-based routine calls +until it is destroyed by \FUNC{shmem\_team\_destroy}. Every team must have a least one member. Any attempt to create a team over an empty set of \acp{PE} will result in no new team being created. +\subsubsection*{Team Handles and Predefined Teams} + A ``team handle'' is an opaque object with type \CTYPE{shmem\_team\_t} that is used to reference a defined team. Team handles are created by one of the team split routines and destroyed by the team destroy routine. Team handles have local semantics only. That is, team handles should not be stored in shared variables and used across other \acp{PE}. Doing so will result in undefined behavior. +A special team handle value, \LibConstRef{SHMEM\_TEAM\_NULL}, may be used to +indicate that a returned team handle is not valid. This value can be tested +against to check for successful split operations and can be assigned to user +declared team handles as a sentinel value. + By default, \openshmem creates predefined teams that will be available for use once the routine \FUNC{shmem\_init} has been called. See Section~\ref{subsec:library_handles} for a description of all predefined team handles @@ -33,33 +42,60 @@ The \ac{PE} number in the default team is equal to the value of its \ac{PE} number as returned by \FUNC{shmem\_my\_pe}. -A special team handle value, \LibConstRef{SHMEM\_TEAM\_NULL}, may be used to -indicate that a returned team handle is not valid. This value can be tested -against to check for successful split operations and can be assigned to user -declared team handles as a sentinel value. +\subsubsection*{Team Objects and Multithreading Within a \ac{PE}} -Teams that are created by a \FUNC{shmem\_team\_split\_*} routine may be -provided a configuration argument that specifies team creation options. +Team handles are passed as arguments to a variety of \openshmem routines, +including collective routines (see Section~\ref{subsec:coll}), include team +creation routines. While \openshmem routines are thread-safe as +per threading model (see section \ref{subsec:thread_support}),\openshmem +teams objects are not themselves thread-safe. It is the responsibility +of the application to ensure that there are no simultaneous collective +routines operating on the same \openshmem team on a given \ac{PE}. + +\subsubsection*{Team Objects and Collective Ordering across \acp{PE}} + +In \openshmem, a team object encapsulates resources uses to communicate +between \acp{PE} in collective operations. When calling multiple subsequent +collective operations on a team, the collective operations -- along with any +relevant team based resources -- are matched across the \acp{PE} in the team +based on ordering of collective routine calls. It is the responsibility +of the application to ensure a consistent ordering of collective routine calls +across all \acp{PE} in a team. + +There is no need for explicit synchronization between subsequent calls +to collective routines across the team, except in the special case discussed +below for team creation of overlapping child teams from a common parent team. + +A full discussion of collective semantics follows in Section~\ref{subsec:coll}. + +\subsubsection*{Team Creation} + +Team creation is a collective operation on the parent team object. New teams +result from a \FUNC{shmem\_team\_split\_*} routine, which takes a parent team +and other arguments and produces new teams that are a subset of the parent +team. Teams that are created by a \FUNC{shmem\_team\_split\_*} routine may be +provided a configuration argument that specifies attributes of each new team. This configuration argument is of type \CTYPE{shmem\_team\_config\_t}, which is detailed further in Section~\ref{subsec:shmem_team_config_t}. -Team creation is a collective operation. As such, team creation in a -multithreaded environment follows the same semantics as discussed in section -\ref{subsec:coll}. That is, while \openshmem routines are thread-safe as -per threading model (see section \ref{subsec:thread_support}),\openshmem -teams objects are not themselves thread-safe. For team creation, this means -that the program must ensure that there are no simultaneous split operations -occuring on the same parent team on a given \ac{PE}. +As with any collective routine on a team, the program must ensure that there +are no simultaneous split operations occurring on the same parent team on a +given \ac{PE}, i.e. in separate threads. -Like other collectives, team creation is matched across PEs based +As with any collective routine on a team, team creation is matched across PEs based on ordering. So, team creation events must occur in the same order on all \acp{PE} -in the resulting child teams. Additionally, there must not be team creation -operations from the same parent team simultaneously occuring that involve -the same \acp{PE} in any resulting child teams. In practice, this means that when a parent team -is split multiple times, and the resulting child teams have overlapping membership, -the program must call the \FUNC{shmem\_team\_sync} routine on the parent team -between subsequent calls to split routines. - -Upon completion of a team creation operation, the resulting child teams will be +in the parent team. Additionally, there must not be team creation +operations from the same parent team simultaneously occurring that involve +the same \acp{PE} in any resulting child teams. + +\begin{itemize} +\item[] The following rule of practice will avoid any conflicts on team +object resources during team creation: +\item[] \emph{When a parent team is split multiple times, and the resulting child teams +have overlapping membership, the program must call the \FUNC{shmem\_team\_sync} +routine on the parent team between subsequent calls to split routines.} +\end{itemize} + +Upon completion of a team creation operation, any resulting child teams will be immediately usable for any team-based operations, including creating new child teams, without any intervening synchronization. From 80ee5518d4b998eed04ea21cc8d47eae663247f7 Mon Sep 17 00:00:00 2001 From: "David M. Ozog" Date: Tue, 19 Mar 2019 12:25:33 -0400 Subject: [PATCH 156/319] Update collective examples to match teams API Not including any reduction examples for now. Signed-off-by: David M. Ozog --- content/shmem_alltoall.tex | 2 +- content/shmem_alltoalls.tex | 2 +- content/shmem_broadcast.tex | 2 +- example_code/shmem_alltoall_example.c | 10 +++------- example_code/shmem_alltoalls_example.c | 10 +++------- example_code/shmem_broadcast_example.c | 6 ++---- example_code/shmem_collect_example.c | 8 +++----- 7 files changed, 14 insertions(+), 26 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index c3c3a500b..b030d52c8 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -180,7 +180,7 @@ \begin{apiexamples} \apicexample - {This example shows a \FUNC{shmem\_alltoall64} on two long elements among all + {This \CorCpp{} example shows a \FUNC{shmem\_int64\_alltoall} on two 64-bit integers among all \acp{PE}.} {./example_code/shmem_alltoall_example.c} {} diff --git a/content/shmem_alltoalls.tex b/content/shmem_alltoalls.tex index 779103ea8..162a4e784 100644 --- a/content/shmem_alltoalls.tex +++ b/content/shmem_alltoalls.tex @@ -133,7 +133,7 @@ \begin{apiexamples} \apicexample - {This example shows a \FUNC{shmem\_alltoalls64} on two long elements among + {This \CorCpp{} example shows a \FUNC{shmem\_int64\_alltoalls} on two 64-bit integers among all \acp{PE}.} {./example_code/shmem_alltoalls_example.c} {} diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 543fa29fd..457fd91b9 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -188,7 +188,7 @@ \begin{apiexamples} \apicexample - {In the following example, the call to \FUNC{shmem\_broadcast64} copies \source{} + {In the following \Cstd[11] example, the call to \FUNC{shmem\_broadcast} copies \source{} on \ac{PE} $0$ to \dest{} on \acp{PE} $1\dots npes-1$. \CorCpp{} example:} diff --git a/example_code/shmem_alltoall_example.c b/example_code/shmem_alltoall_example.c index 13d11b75e..a5d102545 100644 --- a/example_code/shmem_alltoall_example.c +++ b/example_code/shmem_alltoall_example.c @@ -4,10 +4,6 @@ int main(void) { - static long pSync[SHMEM_ALLTOALL_SYNC_SIZE]; - for (int i = 0; i < SHMEM_ALLTOALL_SYNC_SIZE; i++) - pSync[i] = SHMEM_SYNC_VALUE; - shmem_init(); int me = shmem_my_pe(); int npes = shmem_n_pes(); @@ -23,11 +19,11 @@ int main(void) dest[(pe * count) + i] = 9999; } } - /* wait for all PEs to update source/dest */ - shmem_barrier_all(); + /* wait for all PEs to initialize source/dest */ + shmem_team_sync(SHMEM_TEAM_WORLD); /* alltoall on all PES */ - shmem_alltoall64(dest, source, count, 0, 0, npes, pSync); + shmem_int64_alltoall(SHMEM_TEAM_WORLD, dest, source, count); /* verify results */ for (int pe = 0; pe < npes; pe++) { diff --git a/example_code/shmem_alltoalls_example.c b/example_code/shmem_alltoalls_example.c index f78ff5687..5c135185a 100644 --- a/example_code/shmem_alltoalls_example.c +++ b/example_code/shmem_alltoalls_example.c @@ -4,10 +4,6 @@ int main(void) { - static long pSync[SHMEM_ALLTOALLS_SYNC_SIZE]; - for (int i = 0; i < SHMEM_ALLTOALLS_SYNC_SIZE; i++) - pSync[i] = SHMEM_SYNC_VALUE; - shmem_init(); int me = shmem_my_pe(); int npes = shmem_n_pes(); @@ -25,11 +21,11 @@ int main(void) dest[dst * ((pe * count) + i)] = 9999; } } - /* wait for all PEs to update source/dest */ - shmem_barrier_all(); + /* wait for all PEs to initialize source/dest */ + shmem_team_sync(SHMEM_TEAM_WORLD); /* alltoalls on all PES */ - shmem_alltoalls64(dest, source, dst, sst, count, 0, 0, npes, pSync); + shmem_int64_alltoalls(SHMEM_TEAM_WORLD, dest, source, dst, sst, count); /* verify results */ for (int pe = 0; pe < npes; pe++) { diff --git a/example_code/shmem_broadcast_example.c b/example_code/shmem_broadcast_example.c index a829448ea..8c0b84037 100644 --- a/example_code/shmem_broadcast_example.c +++ b/example_code/shmem_broadcast_example.c @@ -4,9 +4,6 @@ int main(void) { - static long pSync[SHMEM_BCAST_SYNC_SIZE]; - for (int i = 0; i < SHMEM_BCAST_SYNC_SIZE; i++) - pSync[i] = SHMEM_SYNC_VALUE; static long source[4], dest[4]; shmem_init(); @@ -17,7 +14,8 @@ int main(void) for (int i = 0; i < 4; i++) source[i] = i; - shmem_broadcast64(dest, source, 4, 0, 0, 0, npes, pSync); + shmem_broadcast(SHMEM_TEAM_WORLD, dest, source, 4, 0); + printf("%d: %ld, %ld, %ld, %ld\n", me, dest[0], dest[1], dest[2], dest[3]); shmem_finalize(); return 0; diff --git a/example_code/shmem_collect_example.c b/example_code/shmem_collect_example.c index b73733368..9eb569627 100644 --- a/example_code/shmem_collect_example.c +++ b/example_code/shmem_collect_example.c @@ -5,9 +5,6 @@ int main(void) { static long lock = 0; - static long pSync[SHMEM_COLLECT_SYNC_SIZE]; - for (int i = 0; i < SHMEM_COLLECT_SYNC_SIZE; i++) - pSync[i] = SHMEM_SYNC_VALUE; shmem_init(); int me = shmem_my_pe(); @@ -23,9 +20,10 @@ int main(void) for (int i = 0; i < total_nelem; i++) dest[i] = -9999; - shmem_barrier_all(); /* Wait for all PEs to update source/dest */ + /* Wait for all PEs to initialize source/dest: */ + shmem_team_sync(SHMEM_TEAM_WORLD); - shmem_collect32(dest, source, my_nelem, 0, 0, npes, pSync); + shmem_int_collect(SHMEM_TEAM_WORLD, dest, source, my_nelem); shmem_set_lock(&lock); /* Lock prevents interleaving printfs */ printf("%d: %d", me, dest[0]); From 112cfe35f37ba814dcdd63dfe9892534aeb62673 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 19 Mar 2019 15:02:33 -0500 Subject: [PATCH 157/319] Fix formatting in shmem_collectmem description --- content/shmem_collect.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index bd331a9de..276a097f9 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -144,7 +144,7 @@ The \dest{} and \source{} data objects must conform to certain typing constraints, which are as follows: }{Routine}{Data type of \VAR{dest} and \VAR{source}} -\apitablerow{\FUNC{shmem\_collectmem}, \FUNC{shmem\_fcollectmem}}{\Cstd: Any data type. nelems is scaled in bytes.}% +\apitablerow{\FUNC{shmem\_collectmem}, \FUNC{shmem\_fcollectmem}}{\Cstd: Any data type. \VAR{nelems} is scaled in bytes.}% \apitablerow{\FUNC{shmem\_collect8}, \FUNC{shmem\_collect64}, \FUNC{shmem\_fcollect8}, \FUNC{shmem\_fcollect64}}% {Any noncharacter type that has an element size of \CONST{64} bits. No \Fortran derived types nor \CorCpp{} structures are allowed.} From ebe62f2cc782dea5b3200205b3ad8e59493f70fa Mon Sep 17 00:00:00 2001 From: James Dinan Date: Wed, 20 Mar 2019 12:38:16 -0400 Subject: [PATCH 158/319] Update partial updates text in wait/test Signed-off-by: James Dinan --- content/shmem_test.tex | 4 ++++ content/shmem_wait_until.tex | 12 +++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/content/shmem_test.tex b/content/shmem_test.tex index a7dd67f6d..b81f554e2 100644 --- a/content/shmem_test.tex +++ b/content/shmem_test.tex @@ -30,6 +30,10 @@ \FUNC{shmem\_test} tests the numeric comparison of the symmetric object pointed to by \VAR{ivar} with the value \VAR{cmp\_value} according to the comparison operator \VAR{cmp}. + + Implementations must ensure that \FUNC{shmem\_test} does not return true before + the update of the memory indicated by \VAR{ivar} is fully complete. + Partial updates to the memory must not cause \FUNC{shmem\_test} to return true. } \apireturnvalues{ diff --git a/content/shmem_wait_until.tex b/content/shmem_wait_until.tex index e5943b468..7de79e516 100644 --- a/content/shmem_wait_until.tex +++ b/content/shmem_wait_until.tex @@ -68,6 +68,11 @@ to \FUNC{shmem\_wait\_until} blocks until the value of \VAR{ivar} at the calling \ac{PE} satisfies the wait condition specified by the comparison operator, \VAR{cmp}, and comparison value, \VAR{cmp\_value}. + + Implementations must ensure that \FUNC{shmem\_wait} and + \FUNC{shmem\_wait\_until} do not return before the update of the memory + indicated by \VAR{ivar} is fully complete. Partial updates to the memory + must not cause \FUNC{shmem\_wait} or \FUNC{shmem\_wait\_until} to return. } @@ -90,13 +95,6 @@ where \VAR{cmp} is \CONST{SHMEM\_CMP\_NE}. } -\apiimpnotes{ - Implementations must ensure that \FUNC{shmem\_wait} and - \FUNC{shmem\_wait\_until} do not return before the update of the memory - indicated by \VAR{ivar} is fully complete. Partial updates to the memory - must not cause \FUNC{shmem\_wait} or \FUNC{shmem\_wait\_until} to return. -} - \begin{apiexamples} From 5eb3d832b6ad3cec5aaa4102fd4c19e7809c279b Mon Sep 17 00:00:00 2001 From: James Dinan Date: Wed, 20 Mar 2019 12:48:13 -0400 Subject: [PATCH 159/319] Update atomicity text to allow wait/test Signed-off-by: James Dinan --- content/memory_model.tex | 3 +++ 1 file changed, 3 insertions(+) diff --git a/content/memory_model.tex b/content/memory_model.tex index 206f2b70c..f2cb3ce78 100644 --- a/content/memory_model.tex +++ b/content/memory_model.tex @@ -60,6 +60,9 @@ \subsection{Atomicity Guarantees}\label{subsec:amo_guarantees} guarantee that concurrent accesses by any of these routines to the same location and using the same datatype (specified in Tables~\ref{stdamotypes} and \ref{extamotypes}) will be exclusive. +Exclusivity is also guaranteed when the target \ac{PE} performs a wait or test +operation on the same location and with the same datatype as one or more atomic +operations. \openshmem atomic operations do not guarantee exclusivity in the following scenarios, all of which result in undefined behavior. \begin{enumerate} From f6c51fb3be814e8a0b82d360a0308d0b6b91231a Mon Sep 17 00:00:00 2001 From: James Dinan Date: Wed, 20 Mar 2019 12:52:51 -0400 Subject: [PATCH 160/319] Restrict wait/test to updates via AMOs Signed-off-by: James Dinan --- content/shmem_test.tex | 4 +++- content/shmem_wait_until.tex | 10 +++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/content/shmem_test.tex b/content/shmem_test.tex index b81f554e2..fca8eed10 100644 --- a/content/shmem_test.tex +++ b/content/shmem_test.tex @@ -29,7 +29,9 @@ \apidescription{ \FUNC{shmem\_test} tests the numeric comparison of the symmetric object pointed to by \VAR{ivar} with the value \VAR{cmp\_value} according to the - comparison operator \VAR{cmp}. + comparison operator \VAR{cmp}. The \VAR{ivar} object at the + calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located + within the calling \ac{PE} or within another \ac{PE}. Implementations must ensure that \FUNC{shmem\_test} does not return true before the update of the memory indicated by \VAR{ivar} is fully complete. diff --git a/content/shmem_wait_until.tex b/content/shmem_wait_until.tex index 7de79e516..90daf5a0c 100644 --- a/content/shmem_wait_until.tex +++ b/content/shmem_wait_until.tex @@ -53,13 +53,9 @@ \apidescription{ The \FUNC{shmem\_wait} and \FUNC{shmem\_wait\_until} operations block until the value contained in the symmetric data object, \VAR{ivar}, at the - calling \ac{PE} satisfies the wait condition. In an \openshmem program - with single-threaded \acp{PE}, the \VAR{ivar} object at the calling \ac{PE} - may be updated by an \ac{RMA}, \ac{AMO}, or store operation performed by another - \ac{PE}. In an \openshmem program with multithreaded \acp{PE}, the - \VAR{ivar} object at the calling \ac{PE} may be updated by an \ac{RMA}, \ac{AMO}, or - store operation performed by a thread located within the calling \ac{PE} or - within another \ac{PE}. + calling \ac{PE} satisfies the wait condition. The \VAR{ivar} object at the + calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located + within the calling \ac{PE} or within another \ac{PE}. These routines can be used to implement point-to-point synchronization between \acp{PE} or between threads within the same \ac{PE}. A call to From 70338e3d6846a2bfc03b65779b80d1b6aef5da3b Mon Sep 17 00:00:00 2001 From: James Dinan Date: Wed, 20 Mar 2019 13:04:11 -0400 Subject: [PATCH 161/319] Clarify that shmem_ptr returns a locally accessible address Signed-off-by: James Dinan --- content/shmem_ptr.tex | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/shmem_ptr.tex b/content/shmem_ptr.tex index 98d8c592a..81732013b 100644 --- a/content/shmem_ptr.tex +++ b/content/shmem_ptr.tex @@ -26,6 +26,8 @@ \FUNC{shmem\_ptr} returns an address that may be used to directly reference \dest{} on the specified \ac{PE}. This address can be assigned to a pointer. After that, ordinary loads and stores to this remote address may be performed. + The address returned by \FUNC{shmem\_ptr} is considered to be locally + accessible and is not valid where a remotely accessible address is required. The \FUNC{shmem\_ptr} routine can provide an efficient means to accomplish communication, for example when a sequence of reads and writes to a data From bb83a1c4d7ba4e3265ba47e7822299beb77d8a27 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 20 Mar 2019 13:46:58 -0500 Subject: [PATCH 162/319] Add reduction example --- content/shmem_reductions.tex | 48 +++------------------- example_code/shmem_reduce_example.c | 62 +++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 43 deletions(-) create mode 100644 example_code/shmem_reduce_example.c diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index c47f0bc72..f77c975a2 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -459,49 +459,11 @@ \begin{apiexamples} \apifexample - {This \Fortran reduction example statically initializes the \VAR{pSync} array - and finds the logical \OPR{AND} of the integer variable \VAR{FOO} across all - even \acp{PE}.} - {./example_code/shmem_and_example.f90} - {} - -\apifexample - {This \Fortran example statically initializes the \VAR{pSync} array and finds - the \OPR{maximum} value of real variable \VAR{FOO} across all even \acp{PE}.} - {./example_code/shmem_max_example.f90} - {} - -\apifexample - { This \Fortran example statically initializes the \VAR{pSync} array and finds - the \OPR{minimum} value of real variable \VAR{FOO} across all the even - \acp{PE}.} - {./example_code/shmem_min_example.f90} - {} - -\apifexample - {This \Fortran example statically initializes the \VAR{pSync} array and finds - the \OPR{sum} of the real variable \VAR{FOO} across all even \acp{PE}.} - {./example_code/shmem_sum_example.f90} - {} - -\apifexample - {This \Fortran example statically initializes the \VAR{pSync} array and finds - the \OPR{product} of the real variable \VAR{FOO} across all the even \acp{PE}.} - {./example_code/shmem_prod_example.f90} - {} - -\apifexample - {This \Fortran example statically initializes the \VAR{pSync} array and finds - the logical \OPR{OR} of the integer variable \VAR{FOO} across all even - \acp{PE}.} - {./example_code/shmem_or_example.f90} - {} - -\apifexample - {This \Fortran example statically initializes the \VAR{pSync} array and - computes the exclusive \OPR{XOR} of variable \VAR{FOO} across all even - \acp{PE}.} - {./example_code/shmem_xor_example.f90} + {This \CorCpp reduction example gets integers from an external + source (random genererator in this example), tests to see if the \ac{PE} got a valid + value, and outputs the sum of values for which all \acp{PE} got a valid + value.} + {./example_code/shmem_reduce_example.c} {} \end{apiexamples} diff --git a/example_code/shmem_reduce_example.c b/example_code/shmem_reduce_example.c new file mode 100644 index 000000000..3c2c7a0b8 --- /dev/null +++ b/example_code/shmem_reduce_example.c @@ -0,0 +1,62 @@ +#include +#include +#include + +/* As if we receive some value from external source */ +long recv_a_value(unsigned seed, int npes) { + srand(seed); + return rand() % npes; +} + +/* Validate the value we recieved */ +unsigned char is_valid(long value, int npes) { + if (value == (npes-1)) + return 0; + return 1; +} + +int main(void) +{ + + shmem_init(); + int me = shmem_my_pe(); + int npes = shmem_n_pes(); + size_t num = 32; + + long *values = shmem_malloc(num * sizeof(int)); + long *sums = shmem_malloc(num * sizeof(int)); + + unsigned char *valid_me = shmem_malloc(num * sizeof(int)); + unsigned char *valid_all = shmem_malloc(num * sizeof(int)); + + values[0] = recv_a_value((unsigned)me, npes); + valid_me[0] = is_valid(values[0], npes); + + for (int i=1; i < num; i++) { + values[i] = recv_a_value((unsigned)values[i-1], npes); + valid_me[i] = is_valid(values[i], npes); + } + + /* Wait for all PEs to initialize reductions arrays */ + shmem_sync(SHMEM_TEAM_WORLD); + +#if __STDC_VERSION__ >= 201112 + /* C11 generic interface */ + shmem_and_reduce(SHMEM_TEAM_WORLD, valid_all, valid_me, num); + shmem_sum_reduce(SHMEM_TEAM_WORLD, sums, values, num); +#else + /* C/C++ interface without generic support */ + shmem_uchar_and_reduce(SHMEM_TEAM_WORLD, valid_all, valid_me, num); + shmem_long_sum_reduce(SHMEM_TEAM_WORLD, sums, values, num); +#endif + + for (int i=0; i < num; i++) { + if (valid_all[i]) { + printf ("[%d] = %ld\n", i, sums[i]); + } + else { + printf ("[%d] = invalid on one or more pe\n", i); + } + } +} + From 247e82dccae5b88ac5f57121428d5e0498a43dee Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 20 Mar 2019 14:35:07 -0500 Subject: [PATCH 163/319] Add green text around new content --- content/backmatter.tex | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/content/backmatter.tex b/content/backmatter.tex index ec996bc57..0cb7dfa67 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -358,11 +358,16 @@ \section{Overview}\label{subsec:dep_overview} \\ \CorCpp: \FuncRef{shmem\_\FuncParam{TYPENAME}\_add}} & 1.4 & Current & \hyperref[subsec:shmem_atomic_add]{\FUNC{shmem\_atomic\_add}} \\ \hline Entire \Fortran API & 1.4 & Current & (none) \\ \hline + + \color{Green} All active-set-based collective routines & 1.5 & Current & Teams-based collective routines \\ \hline + \color{Green} \CorCpp: \FuncRef{shmem\_barrier} & 1.5 & Current & \FUNC{shmem\_quiet}; \FUNC{shmem\_team\_sync} \\ \hline + \color{Green} \CorCpp: \FuncRef{shmem\_barrier\_all} & 1.5 & Current & \FUNC{shmem\_quiet}; \FUNC{shmem\_team\_sync}(\LibConstRef{SHMEM\_TEAM\_WORLD}) \\ \hline + \color{Green} \CorCpp: \FuncRef{shmem\_sync\_all} & 1.5 & Current & \FUNC{shmem\_team\_sync}(\LibConstRef{SHMEM\_TEAM\_WORLD}) \\ \hline @@ -507,6 +512,7 @@ \subsection{\textit{Fortran} API}\label{subsec:deprecate-fortran} %% WARNING: Is \footnote{Formally, \Fortran[2003] is known as ISO/IEC~1539-1:2004(E).}. +{\color{Green} \subsection{Active-set-based collective routines} With the addition of \openshmem teams, the previous methods for performing collective operations has been superseded by a more readable, flexible method for @@ -536,6 +542,7 @@ \subsection{\CorCpp: \FUNC{shmem\_barrier\_all}, \FUNC{shmem\_sync\_all}} With teams, becomes readable in a program to simply call a barrier or sync on \LibConstRef{SHMEM\_TEAM\_WORLD}. Implementations need only test one constant to determine that the operation involves all \acp{PE}. +} \chapter{Changes to this Document}\label{sec:changelog} From 0ef49c2d99000a80a151d34279ebbb1255f09327 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 20 Mar 2019 15:57:45 -0500 Subject: [PATCH 164/319] Fix incorrect parameter name in team_get_config --- content/shmem_team_get_config.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_team_get_config.tex b/content/shmem_team_get_config.tex index 30827b7fb..2673f01d2 100644 --- a/content/shmem_team_get_config.tex +++ b/content/shmem_team_get_config.tex @@ -5,7 +5,7 @@ \begin{apidefinition} \begin{Csynopsis} -void @\FuncDecl{shmem\_team\_get\_config}@(shmem_team_t team, shmem_team_config_t *team); +void @\FuncDecl{shmem\_team\_get\_config}@(shmem_team_t team, shmem_team_config_t *config); \end{Csynopsis} \begin{apiarguments} From aeb425725152e3a5387b7e688859534be65bc4d3 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 20 Mar 2019 16:04:35 -0500 Subject: [PATCH 165/319] Remove condition that all PE in a team get back same config --- content/shmem_team_get_config.tex | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/content/shmem_team_get_config.tex b/content/shmem_team_get_config.tex index 2673f01d2..d031ffef0 100644 --- a/content/shmem_team_get_config.tex +++ b/content/shmem_team_get_config.tex @@ -11,7 +11,7 @@ \begin{apiarguments} \apiargument{IN}{team}{A valid \openshmem team handle.} \apiargument{OUT}{config}{ - A pointer to the configuration parameters for the new team.} + A pointer to the configuration parameters for the given team.} \end{apiarguments} \apidescription{ @@ -19,10 +19,8 @@ the configuration parameters of the given team, which were assigned according to input configuration parameters when the team was created. -All \acp{PE} in the team will get back the same parameter values for the team options. - If the \VAR{team} argument does not specify a valid team, the behavior is -undefined. +undefined. If \VAR{team} is equal to \LibConstRef{SHMEM\_TEAM\_NULL}, then config will be set to the null pointer. } \apireturnvalues{ From 5678e0ce41518f6135ba3586c9c3cec7ee444889 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 20 Mar 2019 16:19:14 -0500 Subject: [PATCH 166/319] Fix type in 2d split of xdim should be xrange --- content/shmem_team_split_2d.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_team_split_2d.tex b/content/shmem_team_split_2d.tex index 86957d798..c5e0d39ac 100644 --- a/content/shmem_team_split_2d.tex +++ b/content/shmem_team_split_2d.tex @@ -52,8 +52,8 @@ $xrange \times yrange \geq N$, so that every \ac{PE} in the parent team has a unique \VAR{(x,y)} location the 2D Cartesian space. -The mapping of \ac{PE} number to coordinates is $(x, y) = ( pe \mod xrange, \lfloor pe \div xdim \rfloor )$, -where $pe$ is the \ac{PE} number in the parent team. So, if $xdim = 3$, +The mapping of \ac{PE} number to coordinates is $(x, y) = ( pe \mod xrange, \lfloor pe \div xrange \rfloor )$, +where $pe$ is the \ac{PE} number in the parent team. So, if $xrange = 3$, then the first 3 \acp{PE} in the parent team will form the first \VAR{xteam}, the second three \acp{PE} in the parent team form the second \VAR{xteam}, and so on. From 902745afe2be6e6baf831fabbab8a24064e5b14f Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 25 Mar 2019 09:40:08 -0500 Subject: [PATCH 167/319] Change type of nreduce to size_t --- content/shmem_reductions.tex | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index f77c975a2..081750f71 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -40,7 +40,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_and\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); +int @\FuncDecl{shmem\_and\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{C11synopsis} where \TYPE{} is one of the integer types supported for the AND operation as specified by Table \ref{reducetypes}. } @@ -50,7 +50,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_and\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_and\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -73,7 +73,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_or\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); +int @\FuncDecl{shmem\_or\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{C11synopsis} where \TYPE{} is one of the integer types supported for the OR operation as specified by Table \ref{reducetypes}. } @@ -83,7 +83,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_or\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_or\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -106,7 +106,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_xor\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); +int @\FuncDecl{shmem\_xor\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{C11synopsis} where \TYPE{} is one of the integer types supported for the XOR operation as specified by Table \ref{reducetypes}. } @@ -116,7 +116,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_xor\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_xor\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -139,7 +139,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_max\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); +int @\FuncDecl{shmem\_max\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{C11synopsis} where \TYPE{} is one of the integer or real types supported for the MAX operation as specified by Table \ref{reducetypes}. } @@ -149,7 +149,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_max\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_max\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -174,7 +174,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_min\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); +int @\FuncDecl{shmem\_min\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{C11synopsis} where \TYPE{} is one of the integer or real types supported for the MIN operation as specified by Table \ref{reducetypes}. } @@ -184,7 +184,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_min\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_min\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -209,7 +209,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_sum\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); +int @\FuncDecl{shmem\_sum\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{C11synopsis} where \TYPE{} is one of the integer, real, or complex types supported for the SUM operation as specified by Table \ref{reducetypes}. } @@ -219,7 +219,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -246,7 +246,7 @@ %% C11 {\color{Green} \begin{C11synopsis} -int @\FuncDecl{shmem\_prod\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); +int @\FuncDecl{shmem\_prod\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{C11synopsis} where \TYPE{} is one of the integer, real, or complex types supported for the PROD operation as specified by Table \ref{reducetypes}. } @@ -256,7 +256,7 @@ \end{Csynopsis} {\color{Green} \begin{CsynopsisCol} -int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_prod\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, int nreduce); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_prod\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{CsynopsisCol} } \begin{DeprecateBlock} @@ -293,7 +293,9 @@ contains one element for each separate reduction routine. The \source{} argument must have the same data type as \dest.} \apiargument{IN}{nreduce}{The number of elements in the \dest{} and \source{} - arrays. \VAR{nreduce} must be of type integer. When using \Fortran, it + arrays. \newtext{In teams based API calls, \VAR{nreduce} must be of type size\_t. + In deprecated active-set based API calls,} + \VAR{nreduce} must be of type int. When using \Fortran, it must be a default integer value.} From 2b7de3583a0f2475bed806b6101d2b3ad23107bb Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 25 Mar 2019 09:53:58 -0500 Subject: [PATCH 168/319] Add rationale for deprecating fix size collectives --- content/backmatter.tex | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 0cb7dfa67..b8f70c1e0 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -519,7 +519,13 @@ \subsection{Active-set-based collective routines} organizing and communicating between groups of \acp{PE}. All collective routines which previously indicated subgroups of \acp{PE} with a list of parameters to describe the subgroup composition should be phased -out in favor of using collective operations with a team parameter +out in favor of using collective operations with a team parameter. + +When moving from active set routines to teams based routines, the fixed-size +versions of the routines, e.g. \FUNC{shmem\_broadcast32}, were not +carried forward. Instead, all teams based collective routines use standard +\Cstd types with the option to use generic \textit{C11} functions for more portable +and maintainable implementations. \subsection{\CorCpp: \FUNC{shmem\_barrier}} Each \openshmem team might From c9b21ac2151805d230eb1c17f47321280e8af877 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 26 Mar 2019 11:46:16 -0500 Subject: [PATCH 169/319] Add include and caption for split strided example --- content/shmem_team_split_strided.tex | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 142677e73..7fff6e532 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -92,6 +92,14 @@ \begin{apiexamples} + \apicexample + {The following example demonstrates the use of strided split in a + \Cstd[11] program. The program creates a new team of all even number + \acp{PE} from the default team, then retrieves the \ac{PE} number and + team size on all \acp{PE} that are members of the new team.} + {./example_code/shmem_team_split_strided.c} + {} + \end{apiexamples} \end{apidefinition} From e4d94d5a3f46463c5eaa5eee19c001c990aa6c77 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 26 Mar 2019 11:46:35 -0500 Subject: [PATCH 170/319] Add include and caption for team translate example --- content/shmem_team_translate.tex | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/content/shmem_team_translate.tex b/content/shmem_team_translate.tex index b9941ae7f..7841629c2 100644 --- a/content/shmem_team_translate.tex +++ b/content/shmem_team_translate.tex @@ -38,4 +38,17 @@ None. } +\begin{apiexamples} + + \apicexample + {The following example demonstrates the use of the team \ac{PE} + number translation routine. The program makes a new team of all + of the even number \acp{PE} in the default team. Then, all \acp{PE} + in the new team acquire their \ac{PE} number in the new team + and translate it to the \ac{PE} number in the default team.} + {./example_code/shmem_team_translate.c} + {} + +\end{apiexamples} + \end{apidefinition} From f15495567977eb0f490786707a7a51de492c1dac Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Tue, 26 Mar 2019 11:47:26 -0500 Subject: [PATCH 171/319] Update authors list --- content/coverpage.tex | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/content/coverpage.tex b/content/coverpage.tex index d7692e386..15ca21dbe 100644 --- a/content/coverpage.tex +++ b/content/coverpage.tex @@ -59,6 +59,7 @@ \section*{Current Authors and Collaborators} \item Mike Dubman, Mellanox \item Karl Feind, \ac{HPE} \item Manjunath Gorentla Venkata, \ac{ORNL} +\item Megan Grodowitz, Arm Inc. \item Max Grossman, Rice University \item Khaled Hamidouche, \ac{AMD} \item Jeff Hammond, Intel @@ -78,7 +79,7 @@ \section*{Current Authors and Collaborators} \item Naveen Ravichandrasekaran, Cray Inc. \item Michael Raymond, \ac{HPE} \item James Ross, \ac{ARL} -\item Pavel Shamis, ARM Inc. +\item Pavel Shamis, Arm Inc. \item Sameer Shende, \ac{UO} \item Lauren Smith, \ac{DoD} From 4cebc5bece12bf9ff96c4abad889438de4c2c073 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Thu, 28 Mar 2019 15:06:16 -0400 Subject: [PATCH 172/319] Updated from March 28 WG discussion Signed-off-by: James Dinan --- content/shmem_ptr.tex | 14 +++++++++----- content/shmem_test.tex | 4 ++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/content/shmem_ptr.tex b/content/shmem_ptr.tex index 81732013b..889d987c8 100644 --- a/content/shmem_ptr.tex +++ b/content/shmem_ptr.tex @@ -16,7 +16,8 @@ \begin{apiarguments} -\apiargument{IN}{dest}{The symmetric data object to be referenced.} +\apiargument{IN}{dest}{The symmetric address of the remotely accessible data + object to be referenced.} \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which \dest{} is to be accessed. When using \Fortran, it must be a default integer value.} @@ -26,8 +27,10 @@ \FUNC{shmem\_ptr} returns an address that may be used to directly reference \dest{} on the specified \ac{PE}. This address can be assigned to a pointer. After that, ordinary loads and stores to this remote address may be performed. - The address returned by \FUNC{shmem\_ptr} is considered to be locally - accessible and is not valid where a remotely accessible address is required. + The address returned by \FUNC{shmem\_ptr} is a local address to a remotely + accessible data object. Providing this address to argument of an + \openshmem routine that requires a symmetric address to a remotely + accessible object results in undefined behavior. The \FUNC{shmem\_ptr} routine can provide an efficient means to accomplish communication, for example when a sequence of reads and writes to a data @@ -37,8 +40,9 @@ } \apireturnvalues{ - The address of the \dest{} data object is returned when it is accessible - using memory loads and stores. Otherwise, a null pointer is returned. + A local pointer to the remotely accessible \dest{} data object is returned + when it can be accessed using memory loads and stores. Otherwise, a null + pointer is returned. } \apinotes{ diff --git a/content/shmem_test.tex b/content/shmem_test.tex index fca8eed10..d9de7b3d5 100644 --- a/content/shmem_test.tex +++ b/content/shmem_test.tex @@ -33,9 +33,9 @@ calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located within the calling \ac{PE} or within another \ac{PE}. - Implementations must ensure that \FUNC{shmem\_test} does not return true before + Implementations must ensure that \FUNC{shmem\_test} does not return 1 before the update of the memory indicated by \VAR{ivar} is fully complete. - Partial updates to the memory must not cause \FUNC{shmem\_test} to return true. + Partial updates to the memory must not cause \FUNC{shmem\_test} to return 1. } \apireturnvalues{ From da476d8eb51badf8493f4f1bb52cf1a1f78386c4 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Thu, 11 Apr 2019 00:53:46 -0500 Subject: [PATCH 173/319] Add support for different sig update operations --- content/library_constants.tex | 14 ++++++ content/shmem_put_signal.tex | 62 ++++++++++++++----------- example_code/shmem_put_signal_example.c | 4 +- 3 files changed, 52 insertions(+), 28 deletions(-) diff --git a/content/library_constants.tex b/content/library_constants.tex index 6ef572170..754081a5d 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -67,6 +67,20 @@ See Section~\ref{subsec:shmem_ctx_create} for more detail about its use. \tabularnewline \hline %% +\color{ForestGreen} +\LibConstDecl{SHMEM\_SIGNAL\_SET} & +\color{ForestGreen} +An integer constant expression corresponding to the signal update set operation. +See Section~\ref{subsec:shmem_put_signal} for more detail about its use. +\tabularnewline \hline +%% +\color{ForestGreen} +\LibConstDecl{SHMEM\_SIGNAL\_ADD} & +\color{ForestGreen} +An integer constant expression corresponding to the signal update add operation. +See Section~\ref{subsec:shmem_put_signal} for more detail about its use. +\tabularnewline \hline +%% \LibConstDecl{SHMEM\_SYNC\_VALUE} \begin{DeprecateBlock} \LibConstDecl{\_SHMEM\_SYNC\_VALUE} diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 2956ae879..c6eef2e00 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -1,32 +1,33 @@ +\color{ForestGreen} \apisummary{ The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} - and subsequently setting a remote flag to signal completion. + and subsequently update a remote flag to signal completion. } \begin{apidefinition} \begin{C11synopsis} -void @\FuncDecl{shmem\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +void @\FuncDecl{shmem\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_putmem\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_putmem\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_putmem\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +void @\FuncDecl{shmem\_ctx\_putmem\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); \end{CsynopsisCol} \begin{apiarguments} @@ -43,17 +44,23 @@ remotely accessible.} \apiargument{IN}{signal}{Unsigned 64-bit value that is assigned to the remote \VAR{sig\_addr} signal data object.} + \apiargument{IN}{sig\_op}{Signal operator that represents the type of update + to be performed to the remote \VAR{sig\_addr} signal data object.} \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} \end{apiarguments} \apidescription{ The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} - and subsequently setting a remote flag to signal completion. The routines + and subsequently update a remote flag to signal completion. The routines return after the data has been copied out of the \source{} array on the - local \ac{PE}. The delivery of \VAR{signal} flag on the remote \ac{PE} - indicates the delivery of its corresponding \dest{} data words into the - data object on the remote \ac{PE}. + local \ac{PE}. + + The \VAR{sig\_op} signal operator determines the type of update to be + performed on the remote \VAR{sig\_addr} signal data object. The completion + of signal update based on the \VAR{sig\_op} signal operator using the + \VAR{signal} flag on the remote \ac{PE} indicates the delivery of its + corresponding \dest{} data words into the data object on the remote \ac{PE}. } \apireturnvalues{ @@ -68,19 +75,21 @@ The \VAR{sig\_addr} and \dest{} may not be overlapping in memory. - The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the - delivery of its corresponding \dest{} data words into the data object on - the remote \ac{PE}. Without a memory-ordering operation, there is no implied - ordering between the delivery of the signal word of a put-with-signal - routine and another data transfer. For example, the delivery of the signal - word in a sequence consisting of a put routine followed by a put-with-signal - routine does not imply delivery of the put routine's data. - - The signal set by the put-with-signal routines is compatible - with all point-to-point synchronization interfaces. The delivery of - \VAR{signal} flag on the remote \ac{PE} must not cause partial updates. This - requires the update on \VAR{signal} flag to be an atomic operation, with - atomicity guarantees described in Section~\ref{subsec:amo_guarantees}. + The completion of signal update using the \VAR{signal} flag on the remote + \ac{PE} indicates only the delivery of its corresponding \dest{} data words + into the data object on the remote \ac{PE}. Without a memory-ordering + operation, there is no implied ordering between the signal update of a + put-with-signal routine and another data transfer. For example, the + completion of the signal update in a sequence consisting of a put routine + followed by a put-with-signal routine does not imply delivery of the put + routine's data. + + The signal update by the put-with-signal routines is compatible with all + point-to-point synchronization interfaces. The delivery of \VAR{signal} flag + based on the \VAR{sig\_op} signal operator on the remote \ac{PE} must not + cause partial updates. Only concurrent accesses on \VAR{sig\_addr} by + different signal update operations using the same signal update operator is + guaranteed to be exclusive. } \begin{apiexamples} @@ -95,3 +104,4 @@ \end{apiexamples} \end{apidefinition} +\color{black} diff --git a/example_code/shmem_put_signal_example.c b/example_code/shmem_put_signal_example.c index a0a4ed36b..179da6d26 100644 --- a/example_code/shmem_put_signal_example.c +++ b/example_code/shmem_put_signal_example.c @@ -23,10 +23,10 @@ int main(void) uint64_t *data = shmem_calloc(size, sizeof(uint64_t)); if (me == 0) { - shmem_put_signal(data, message, size, &sig_addr, 1, pe); + shmem_put_signal(data, message, size, &sig_addr, 1, SHMEM_SIGNAL_SET, pe); } else { shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); - shmem_put_signal(data, data, size, &sig_addr, 1, pe); + shmem_put_signal(data, data, size, &sig_addr, 1, SHMEM_SIGNAL_SET, pe); } free(message); From c0107e6a0aa62c45362aa3831b65b4e16f9e1fdf Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 17 Apr 2019 15:02:02 -0500 Subject: [PATCH 174/319] Fix typos in library constants, handles sections --- content/library_constants.tex | 2 +- content/library_handles.tex | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/content/library_constants.tex b/content/library_constants.tex index b89b85ddd..7e857964b 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -44,7 +44,7 @@ \color{Green} The bitwise flag which specifies that a team creation routine should use the \VAR{num\_contexts} member of the provided -\CTYPE{shmem\_team\_config\_t} configuration parameter as a requirement. +\CTYPE{shmem\_team\_config\_t} configuration parameter as a request. See Sections~\ref{subsec:shmem_team_config_t} and \ref{subsec:shmem_team_split_strided} for more detail about its use. \tabularnewline \hline diff --git a/content/library_handles.tex b/content/library_handles.tex index 9ccbd2522..e7c94a647 100644 --- a/content/library_handles.tex +++ b/content/library_handles.tex @@ -26,7 +26,7 @@ \color{Green} \LibHandleDecl{SHMEM\_TEAM\_SHARED} & \color{Green} -Handle of type \CTYPE{shmem\_team\_t} that corresponds a team of \acp{PE} +Handle of type \CTYPE{shmem\_team\_t} that corresponds to a team of \acp{PE} that share a memory domain. When this handle is used by some \ac{PE}, it will refer to the team of all \acp{PE} that would return a non-null pointer from \FUNC{shmem\_ptr} for symmetric objects on that \ac{PE}, From cf0742b6a3515ffea46a213d988b04280525f6e7 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Wed, 17 Apr 2019 15:19:57 -0500 Subject: [PATCH 175/319] Simplify descriptions of undefined behavior in error checking section --- content/error_handling.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/error_handling.tex b/content/error_handling.tex index c3f0196b6..e6c30a922 100644 --- a/content/error_handling.tex +++ b/content/error_handling.tex @@ -6,8 +6,8 @@ Collective operations involving many \acp{PE} may return values indicating success while other \acp{PE} are still executing the collective operation. By default, return values indicating success of a collective routine on one \ac{PE} do not indicate that all \acp{PE} involved in the collective operation will return from the routine successfully. If a collective routine provides any such global error checking behavior, it will be explicitly stated in the description of that routine. -\openshmem implementations for production environments may do minimal internal error checking for performance reasons. Also, some errors may be undetectable or uncorrectable due to limitations in underlying technologies, resulting in undefined behaviors. For these reasons, \openshmem generally considers that when routine parameters are invalid or violate requirements specified in this document, behavior is undefined. Likewise, when ordering or synchronization of communication operations violates requirements specified in this document, behavior is undefined. +If some routine specified in this document does not explicity state resulting error behavior when a program violates the routine assumptions and requirements, then the behavior is undefined. See Annex \ref{sec:undefined} for more details on undefined behavior in \openshmem. + +\openshmem implementations are encouraged but not required to attempt to continue execution in the face of resource allocation errors, such as lack of network resources or memory resources. In these cases, if resource allocation fails inside a routine with an integer return code, library implementations should return some nonzero value, which may have implementation specific definition. If the routine has some other out parameter, such as pointer to a new memory allocation, routines may specify that the out parameter has some sentinel value to indicate failure to complete the operation. -Applications are encouraged but not required to attempt to continue execution in the face of resource allocations errors, such as lack of network resources or memory resources. In these cases, if resource allocation fails inside a routine with an integer return code, applications should return some nonzero value, which may have implementation specific definition. If the routine has some other out parameter, such as pointer to a new memory allocation, routines may specify that the out parameter has some sentinel value to indicate failure to complete the operation. -If some routine specified in this document does not explicity state resulting error behavior when a program violates the routine assumptions and requirements, then the behavior is undefined, and could include continuing execution regardless, aborting the application with an informative message, returning sentinel values in outgoing parameters, launching a debugger, tweeting the failure information as emoji, sounding a rather loud siren, or any other behavior which the implementation might find desirable. From df423c0414a2b0b9fdf82840dabe26ed49daa90f Mon Sep 17 00:00:00 2001 From: "David M. Ozog" Date: Mon, 29 Apr 2019 15:27:16 -0400 Subject: [PATCH 176/319] Fix minor issues: shmem_team_translate_pe example Signed-off-by: David M. Ozog --- example_code/shmem_team_translate.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/example_code/shmem_team_translate.c b/example_code/shmem_team_translate.c index d1fb796fd..33e6efbfa 100644 --- a/example_code/shmem_team_translate.c +++ b/example_code/shmem_team_translate.c @@ -9,22 +9,23 @@ int main(int argc, char *argv[]) { - int rank; + int rank, npes; int t_pe; - int t_global; + int t_global; shmem_team_t new_team; shmem_team_config_t *config; shmem_init(); config = NULL; rank = shmem_my_pe(); + npes = shmem_num_pes(); shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, npes / 2, config, 0, &new_team); if (new_team != SHMEM_TEAM_NULL) { t_pe = shmem_team_my_pe(new_team); - t_global = shmem_team_translate(new_team, t_pe, SHMEM_TEAM_WORLD); + t_global = shmem_team_translate_pe(new_team, t_pe, SHMEM_TEAM_WORLD); if (t_global != rank) { shmem_global_exit(1); From 7c51cbd3a2d5145e6289ef22610efdd0a9f6e990 Mon Sep 17 00:00:00 2001 From: "David M. Ozog" Date: Fri, 3 May 2019 13:46:27 -0400 Subject: [PATCH 177/319] Add wait/test all/any/some API w/vector cmp_values Signed-off-by: David M. Ozog --- content/shmem_test_all_vector.tex | 66 +++++++++++++++++ content/shmem_test_any_vector.tex | 71 ++++++++++++++++++ content/shmem_test_some_vector.tex | 84 +++++++++++++++++++++ content/shmem_wait_until_all_vector.tex | 75 +++++++++++++++++++ content/shmem_wait_until_any_vector.tex | 78 ++++++++++++++++++++ content/shmem_wait_until_some_vector.tex | 94 ++++++++++++++++++++++++ main_spec.tex | 17 +++++ 7 files changed, 485 insertions(+) create mode 100644 content/shmem_test_all_vector.tex create mode 100644 content/shmem_test_any_vector.tex create mode 100644 content/shmem_test_some_vector.tex create mode 100644 content/shmem_wait_until_all_vector.tex create mode 100644 content/shmem_wait_until_any_vector.tex create mode 100644 content/shmem_wait_until_some_vector.tex diff --git a/content/shmem_test_all_vector.tex b/content/shmem_test_all_vector.tex new file mode 100644 index 000000000..7baa7f0aa --- /dev/null +++ b/content/shmem_test_all_vector.tex @@ -0,0 +1,66 @@ +\apisummary{ + Indicate whether all variables within an array of variables on the local \ac{PE} meet \oldtext{a} \newtext{the} specified test condition\newtext{s}. +} + +\begin{apidefinition} + +\begin{C11synopsis} +int @\FuncDecl{shmem\_test\_all\newtext{\_vector}}@(TYPE *ivars, size_t nelems, const int *status, int cmp, + TYPE *cmp_values); +\end{C11synopsis} +where \TYPE{} is one of the point-to-point synchronization types specified by +Table \ref{p2psynctypes}. + +\begin{Csynopsis} +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_test\_all\newtext{\_vector}}@(TYPE *ivars, size_t nelems, const int *status, int cmp, + TYPE *cmp_values); +\end{Csynopsis} +where \TYPE{} is one of the point-to-point synchronization types and has a +corresponding \TYPENAME{} specified by Table \ref{p2psynctypes}. + +\begin{apiarguments} + + \apiargument{IN}{ivars}{A pointer to an array of remotely accessible data + objects.} + \apiargument{IN}{nelems}{The number of elements in the \VAR{ivars} array.} + \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} + that indicates which elements in \VAR{ivars} are excluded from the test set.} + \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that + \newtext{respectively} compares elements of \VAR{ivars} with + \newtext{elements of} \VAR{cmp\_value\newtext{s}}.} + \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} + containing values to be compared with the respective objects in \VAR{ivars}.}} + +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_test\_all\newtext{\_vector}} routine indicates whether all + entries in the test set specified by \VAR{ivars} and \VAR{status} have + satisfied the test condition at the calling \ac{PE}. This routine does not + block and returns zero if not all entries in \VAR{ivars} satisfied the test + condition\newtext{s}. This routine \newtext{respectively} compares each of + the \VAR{nelems} elements in the \VAR{ivars} array with \oldtext{the} + \newtext{each} value \newtext{in} \VAR{cmp\_value\newtext{s}} according to + the comparison operator \VAR{cmp} at the calling \ac{PE}. If \VAR{nelems} + is 0, the test set is empty and this routine returns 1. + + The optional \VAR{status} is a mask array of length \VAR{nelems} where each element + corresponds to the respective element in \VAR{ivars} and indicates whether + the element is excluded from the test set. Elements of \VAR{status} set to + 0 will be included in the test set, and elements set to 1 will be ignored. If all elements + in \VAR{status} are set to 1 or \VAR{nelems} is 0, the test set is empty + and this routine returns 0. If \VAR{status} is a null pointer, it is + ignored and all elements in \VAR{ivars} are included in the test set. The + \VAR{ivars}, \VAR{indices}, and \VAR{status} arrays must not overlap in + memory. +} + +\apireturnvalues{ + \FUNC{shmem\_test\_all\newtext{\_vector}} returns 1 if all variables in \VAR{ivars} satisfy the test condition\newtext{s} or if \VAR{nelems} is 0, otherwise this routine returns 0. +} + +\apinotes{ + None. +} + +\end{apidefinition} diff --git a/content/shmem_test_any_vector.tex b/content/shmem_test_any_vector.tex new file mode 100644 index 000000000..b46d419cb --- /dev/null +++ b/content/shmem_test_any_vector.tex @@ -0,0 +1,71 @@ +\apisummary{ + Indicate whether any one variable within an array of variables on the local \ac{PE} meets \oldtext{a} \newtext{its} specified test condition. +} + +\begin{apidefinition} + +\begin{C11synopsis} +size_t @\FuncDecl{shmem\_test\_any\newtext{\_vector}}@(TYPE *ivars, size_t nelems, const int *status, int cmp, + TYPE *cmp_values); +\end{C11synopsis} +where \TYPE{} is one of the point-to-point synchronization types specified by +Table \ref{p2psynctypes}. + +\begin{Csynopsis} +size_t @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_test\_any\newtext{\_vector}}@(TYPE *ivars, size_t nelems, const int *status, + int cmp, TYPE *cmp_values); +\end{Csynopsis} +where \TYPE{} is one of the point-to-point synchronization types and has a +corresponding \TYPENAME{} specified by Table \ref{p2psynctypes}. + +\begin{apiarguments} + + \apiargument{IN}{ivars}{A pointer to an array of remotely accessible data + objects.} + \apiargument{IN}{nelems}{The number of elements in the \VAR{ivars} array.} + \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} + that indicates which elements in \VAR{ivars} are excluded from the test set.} + \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that + \newtext{respectively} compares elements of \VAR{ivars} with + \newtext{elements of} \VAR{cmp\_value\newtext{s}}.} + \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} + containing values to be compared with the respective objects in \VAR{ivars}.}} + +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_test\_any\newtext{\_vector}} routine indicates whether any + entry in the test set specified by \VAR{ivars} and \VAR{status} has + satisfied the test condition at the calling \ac{PE}. This routine does not + block and returns \CONST{SIZE\_MAX} if no entries in \VAR{ivars} satisfied + the test condition. This routine \newtext{respectively} compares each of + the \VAR{nelems} elements in the \VAR{ivars} array with \oldtext{the} + \newtext{each} value \newtext{in} \VAR{cmp\_value\newtext{s}} according to + the comparison operator \VAR{cmp} at the calling \ac{PE}. The order in + which these elements are tested is unspecified. If an entry $i$ in + \VAR{ivars} within the test set satisfies the test condition, a series of + calls to \FUNC{shmem\_test\_any\newtext{\_vector}} must eventually return $i$. + + The optional \VAR{status} is a mask array of length \VAR{nelems} where each + element corresponds to the respective element in \VAR{ivars} and indicates + whether the element is excluded from the test set. Elements of + \VAR{status} set to 0 will be included in the test set, and elements set to + 1 will be ignored. If all elements in \VAR{status} are set to 1 or + \VAR{nelems} is 0, the test set is empty and this routine returns + \CONST{SIZE\_MAX}. If \VAR{status} is a null pointer, it is ignored and + all elements in \VAR{ivars} are included in the test set. The \VAR{ivars} + and \VAR{status} arrays must not overlap in memory. +} + +\apireturnvalues{ + \FUNC{shmem\_test\_any\newtext{\_vector}} returns the index of an element in the \VAR{ivars} + array that satisfies the test condition. If the test set is empty or no + conditions in the test set are satisfied, this routine returns \CONST{SIZE\_MAX}. +} + +\apinotes{ + None. +} + + +\end{apidefinition} diff --git a/content/shmem_test_some_vector.tex b/content/shmem_test_some_vector.tex new file mode 100644 index 000000000..ee0ff6279 --- /dev/null +++ b/content/shmem_test_some_vector.tex @@ -0,0 +1,84 @@ +\apisummary{ + Indicate whether at least one variable within an array of variables on the local \ac{PE} meets \oldtext{a} \newtext{its} specified test condition. +} + +\begin{apidefinition} + +\begin{C11synopsis} +size_t @\FuncDecl{shmem\_test\_some\newtext{\_vector}}@(TYPE *ivars, size_t nelems, size_t *indices, const int *status, + int cmp, TYPE *cmp_values); +\end{C11synopsis} +where \TYPE{} is one of the point-to-point synchronization types specified by +Table \ref{p2psynctypes}. + +\begin{Csynopsis} +size_t @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_test\_some\newtext{\_vector}}@(TYPE *ivars, size_t nelems, size_t *indices, + const int *status, int cmp, TYPE *cmp_values); +\end{Csynopsis} +where \TYPE{} is one of the point-to-point synchronization types and has a +corresponding \TYPENAME{} specified by Table \ref{p2psynctypes}. + +\begin{apiarguments} + + \apiargument{IN}{ivars}{A pointer to an array of remotely accessible data + objects.} + \apiargument{IN}{nelems}{The number of elements in the \VAR{ivars} array.} + \apiargument{OUT}{indices}{An array of indices of length at least + \VAR{nelems} into \VAR{ivars} that satisfied the test condition.} + \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} + that indicates which elements in \VAR{ivars} are excluded from the test set.} + \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that + \newtext{respectively} compares elements of \VAR{ivars} with + \newtext{elements of} \VAR{cmp\_value\newtext{s}}.} + \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} + containing values to be compared with the respective objects in \VAR{ivars}.}} + +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_test\_some\newtext{\_vector}} routine indicates whether at least one entry + in the test set specified by \VAR{ivars} and \VAR{status} satisfies the + test condition at the calling \ac{PE}. This routine does not block and returns zero if + no entries in \VAR{ivars} satisfied the test condition. This routine + \newtext{respectively} compares each element of the \VAR{ivars} array in the test set with \oldtext{the} \newtext{each} + value \newtext{in} \VAR{cmp\_value\newtext{s}} according to the comparison operator \VAR{cmp} at + the calling \ac{PE}. This routine tests all elements of \VAR{ivars} in the + test set at least once, and the order in which the elements are tested is + unspecified. If an entry $i$ in \VAR{ivars} within the test set satisfies + the test condition, a series of calls to \FUNC{shmem\_test\_some\newtext{\_vector}} must + eventually return $i$. + + Upon return, the \VAR{indices} array contains the indices of the elements + in the test set that satisfied the test condition during the call to + \FUNC{shmem\_test\_some\newtext{\_vector}}. The return value of + \FUNC{shmem\_test\_some\newtext{\_vector}} is equal to the total number of + these satisfied elements. If the return value is $N$, then the first $N$ + elements of the \VAR{indices} array contain those unique indices that + satisfied the test condition. These first $N$ elements of \VAR{indices} + may be unordered with respect to the corresponding indices of \VAR{ivars}. + The array pointed to by \VAR{indices} must be at least \VAR{nelems} long. + If an entry $i$ in \VAR{ivars} within the test set satisfies the test + condition, a series of calls to \FUNC{shmem\_test\_some\newtext{\_vector}} + must eventually include $i$ in the \VAR{indices} array. + + The optional \VAR{status} is a mask array of length \VAR{nelems} where each element + corresponds to the respective element in \VAR{ivars} and indicates whether + the element is excluded from the test set. Elements of \VAR{status} set to + 0 will be included in the test set, and elements set to 1 will be ignored. If all + elements in \VAR{status} are set to 1 or \VAR{nelems} is 0, the test set is + empty and this routine returns 0. If \VAR{status} is a null pointer, it is ignored and all + elements in \VAR{ivars} are included in the test set. The \VAR{ivars}, + \VAR{indices}, and \VAR{status} arrays must not overlap in memory. +} + +\apireturnvalues{ + \FUNC{shmem\_test\_some\newtext{\_vector}} returns the number of indices returned in + the \VAR{indices} array. If the test set is empty, this routine returns 0. +} + +\apinotes{ + None. +} + + +\end{apidefinition} diff --git a/content/shmem_wait_until_all_vector.tex b/content/shmem_wait_until_all_vector.tex new file mode 100644 index 000000000..2567f1e5a --- /dev/null +++ b/content/shmem_wait_until_all_vector.tex @@ -0,0 +1,75 @@ +\apisummary{ + Wait on an array of variables on the local \ac{PE} until all variables meet the specified wait condition\newtext{s}. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_wait\_until\_all\newtext{\_vector}}@(TYPE *ivars, size_t nelems, const int *status, int cmp, + TYPE *cmp_values); +\end{C11synopsis} +where \TYPE{} is one of the point-to-point synchronization types specified by +Table \ref{p2psynctypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until\_all\newtext{\_vector}}@(TYPE *ivars, size_t nelems, const int *status, int cmp, TYPE *cmp_values); +\end{Csynopsis} +where \TYPE{} is one of the point-to-point synchronization types and has a +corresponding \TYPENAME{} specified by Table~\ref{p2psynctypes}. + +\begin{apiarguments} + + \apiargument{IN}{ivars}{A pointer to an array of remotely accessible data + objects.} + \apiargument{IN}{nelems}{The number of elements in the \VAR{ivars} array.} + \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} + that indicates which elements in \VAR{ivars} are excluded from the wait set.} + \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that + \newtext{respectively} compares elements of \VAR{ivars} with + \newtext{elements of} \VAR{cmp\_value\newtext{s}}.} + \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} + containing values to be compared with the respective objects in \VAR{ivars}.}} + +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_wait\_until\_all\_vector} routine waits until all entries + in the wait set specified by \VAR{ivars} and \VAR{status} have satisfied + the wait condition\newtext{s} at the calling \ac{PE}. \newtext{This + routine behaves very similarly to \FUNC{shmem\_wait\_until\_all}, but + supports multiple conditional values in the \VAR{cmp\_values} array.} If + \VAR{nelems} is 0, the wait set is empty and this routine returns + immediately. \oldtext{This routine is semantically similar to + \FUNC{shmem\_wait\_until} in Section~\ref{subsec:shmem_wait_until}, but + adds support for point-to-point synchronization involving an array of + symmetric data objects.} + + The optional \VAR{status} is a mask array of length \VAR{nelems} where each + element corresponds to the respective element in \VAR{ivars} and indicates + whether the element is excluded from the wait set. Elements of + \VAR{status} set to 0 will be included in the wait set, and elements set to + 1 will be ignored. If all elements in \VAR{status} are set to 1 or + \VAR{nelems} is 0, the wait set is empty and this routine returns + immediately. If \VAR{status} is a null pointer, it is ignored and + all elements in \VAR{ivars} are included in the wait set. The \VAR{ivars} + and \VAR{status} arrays must not overlap in memory. +} + + +\apireturnvalues{ + None. +} + +\apinotes{ + None. +} + +\apiimpnotes{ + Implementations must ensure that \FUNC{shmem\_wait\_until\_all\_vector} does not + return before the update of the memory indicated by \VAR{ivars} is fully + complete. Partial updates to the memory must not cause + \FUNC{shmem\_wait\_until\_all\_vector} to return. +} + + +\end{apidefinition} diff --git a/content/shmem_wait_until_any_vector.tex b/content/shmem_wait_until_any_vector.tex new file mode 100644 index 000000000..a738e8358 --- /dev/null +++ b/content/shmem_wait_until_any_vector.tex @@ -0,0 +1,78 @@ +\apisummary{ + Wait on an array of variables on the local \ac{PE} until any one variable meets \oldtext{the} \newtext{its} specified wait condition. +} + +\begin{apidefinition} + +\begin{C11synopsis} +size_t @\FuncDecl{shmem\_wait\_until\_any\newtext{\_vector}}@(TYPE *ivars, size_t nelems, const int *status, int cmp, + TYPE *cmp_values); +\end{C11synopsis} +where \TYPE{} is one of the point-to-point synchronization types specified by +Table \ref{p2psynctypes}. + +\begin{Csynopsis} +size_t @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until\_any\newtext{\_vector}}@(TYPE *ivars, size_t nelems, const int *status, + int cmp, TYPE *cmp_values); +\end{Csynopsis} +where \TYPE{} is one of the point-to-point synchronization types and has a +corresponding \TYPENAME{} specified by Table~\ref{p2psynctypes}. + +\begin{apiarguments} + + \apiargument{IN}{ivars}{A pointer to an array of remotely accessible data + objects.} + \apiargument{IN}{nelems}{The number of elements in the \VAR{ivars} array.} + \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} + that indicates which elements in \VAR{ivars} are excluded from the wait set.} + \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that + \newtext{respectively} compares elements of \VAR{ivars} with + \newtext{elements of} \VAR{cmp\_value\newtext{s}}.} + \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} + containing values to be compared with the respective objects in \VAR{ivars}.}} + +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_wait\_until\_any\_vector} routine waits until any one + entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies + the wait condition at the calling \ac{PE}. \newtext{This + routine behaves very similarly to \FUNC{shmem\_wait\_until\_any}, but + supports multiple conditional values in the \VAR{cmp\_values} array.} The + order in which these elements are waited upon is unspecified. If an entry + $i$ in \VAR{ivars} within the wait set satisfies the wait condition, a + series of calls to \FUNC{shmem\_wait\_until\_any\_vector} must eventually + return $i$. + + The optional \VAR{status} is a mask array of length \VAR{nelems} where each + element corresponds to the respective element in \VAR{ivars} and indicates + whether the element is excluded from the wait set. Elements of + \VAR{status} set to 0 will be included in the wait set, and elements set to + 1 will be ignored. If all elements in \VAR{status} are set to 1 or + \VAR{nelems} is 0, the wait set is empty and this routine returns + \CONST{SIZE\_MAX}. If + \VAR{status} is a null pointer, it is ignored and all elements in + \VAR{ivars} are included in the wait set. The \VAR{ivars} and \VAR{status} + arrays must not overlap in memory. +} + +\apireturnvalues{ + \FUNC{shmem\_wait\_until\_any\_vector} returns the index of an element in the + \VAR{ivars} array that satisfies the wait condition. If the wait set is + empty, this routine returns \CONST{SIZE\_MAX}. +} + +\apinotes{ + None. +} + +\apiimpnotes{ + Implementations must ensure that \FUNC{shmem\_wait\_until\_any\newtext{\_vector}} does not + return before the update of the memory indicated by the completed index of \VAR{ivars} is fully + executed. Partial updates to the memory must not cause + \FUNC{shmem\_wait\_until\_any\newtext{\_vector}} to return. +} + + +\end{apidefinition} + diff --git a/content/shmem_wait_until_some_vector.tex b/content/shmem_wait_until_some_vector.tex new file mode 100644 index 000000000..07796be28 --- /dev/null +++ b/content/shmem_wait_until_some_vector.tex @@ -0,0 +1,94 @@ +\apisummary{ + Wait on an array of variables on the local \ac{PE} until at least one variable meets \oldtext{the} \newtext{its} specified wait condition. +} + +\begin{apidefinition} + +\begin{C11synopsis} +size_t @\FuncDecl{shmem\_wait\_until\_some\newtext{\_vector}}@(TYPE *ivars, size_t nelems, size_t *indices, + const int *status, int cmp, TYPE *cmp_values); +\end{C11synopsis} +where \TYPE{} is one of the point-to-point synchronization types specified by +Table \ref{p2psynctypes}. + +\begin{Csynopsis} +size_t @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until\_some\newtext{\_vector}}@(TYPE *ivars, size_t nelems, size_t *indices, + const int *status, int cmp, TYPE *cmp_values); +\end{Csynopsis} +where \TYPE{} is one of the point-to-point synchronization types and has a +corresponding \TYPENAME{} specified by Table~\ref{p2psynctypes}. + +\begin{apiarguments} + + \apiargument{IN}{ivars}{A pointer to an array of remotely accessible data + objects.} + \apiargument{IN}{nelems}{The number of elements in the \VAR{ivars} array.} + \apiargument{OUT}{indices}{An array of indices of length at least + \VAR{nelems} into \VAR{ivars} that satisfied the wait condition.} + \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} + that indicates which elements in \VAR{ivars} are excluded from the wait set.} + \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that + \newtext{respectively} compares elements of \VAR{ivars} with + \newtext{elements of} \VAR{cmp\_value\newtext{s}}.} + \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} + containing values to be compared with the respective objects in \VAR{ivars}.}} + +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_wait\_until\_some\newtext{\_vector}} routine waits until + at least one entry in the wait set specified by \VAR{ivars} and + \VAR{status} satisfies the wait condition at the calling \ac{PE}. + \newtext{This routine behaves very similarly to + \FUNC{shmem\_wait\_until\_some}, but supports multiple conditional values in + the \VAR{cmp\_values} array.} This routine tests all elements of + \VAR{ivars} in the wait set at least once, and the order in which the + elements are waited upon is unspecified. + + Upon return, the \VAR{indices} array contains the indices of at least one + element in the wait set that satisfied the wait condition during the call + to \FUNC{shmem\_wait\_until\_some\newtext{\_vector}}. The return value of + \FUNC{shmem\_wait\_until\_some\newtext{\_vector}} is equal to the total + number of these satisfied elements. For a given return value $N$, the + first $N$ elements of the \VAR{indices} array contain those unique indices + that satisfied the wait condition. These first $N$ elements of + \VAR{indices} may be unordered with respect to the corresponding indices of + \VAR{ivars}. The array pointed to by \VAR{indices} must be at least + \VAR{nelems} long. If an entry $i$ in \VAR{ivars} within the wait set + satisfies the wait condition, a series of calls to + \FUNC{shmem\_wait\_until\_some\newtext{\_vector}} must eventually include + $i$ in the \VAR{indices} array. + + The optional \VAR{status} is a mask array of length \VAR{nelems} where each + element corresponds to the respective element in \VAR{ivars} and indicates + whether the element is excluded from the wait set. Elements of + \VAR{status} set to 0 will be included in the wait set, and elements set to + 1 will be ignored. If all elements in \VAR{status} are set to 1 or + \VAR{nelems} is 0, the wait set is empty and this routine returns 0. + If \VAR{status} is a null pointer, it is ignored + and all elements in \VAR{ivars} are included in the wait set. The + \VAR{ivars}, \VAR{indices}, and \VAR{status} arrays must not overlap in + memory. +} + + +\apireturnvalues{ + \FUNC{shmem\_wait\_until\_some\newtext{\_vector}} returns the number of + indices returned in the \VAR{indices} array. If the wait set is empty, this + routine returns 0. +} + +\apinotes{ + None. +} + +\apiimpnotes{ + Implementations must ensure that + \FUNC{shmem\_wait\_until\_some\newtext{\_vector}} does not return before + the update of the memory indicated by the completed indices of \VAR{ivars} + is fully executed. Partial updates to the memory must not cause + \FUNC{shmem\_wait\_until\_some\newtext{\_vector}} to return. +} + + +\end{apidefinition} diff --git a/main_spec.tex b/main_spec.tex index 1714cd28f..21be0fae5 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -301,6 +301,15 @@ \subsubsection{\textbf{SHMEM\_WAIT\_UNTIL\_ANY}}\label{subsec:shmem_wait_until_a \subsubsection{\textbf{SHMEM\_WAIT\_UNTIL\_SOME}}\label{subsec:shmem_wait_until_some} \input{content/shmem_wait_until_some.tex} +\subsubsection{\textbf{SHMEM\_WAIT\_UNTIL\_ALL\_VECTOR}}\label{subsec:shmem_wait_until_all_vector} +\input{content/shmem_wait_until_all_vector.tex} + +\subsubsection{\textbf{SHMEM\_WAIT\_UNTIL\_ANY\_VECTOR}}\label{subsec:shmem_wait_until_any_vector} +\input{content/shmem_wait_until_any_vector.tex} + +\subsubsection{\textbf{SHMEM\_WAIT\_UNTIL\_SOME\_VECTOR}}\label{subsec:shmem_wait_until_some_vector} +\input{content/shmem_wait_until_some_vector.tex} + \subsubsection{\textbf{SHMEM\_TEST}}\label{subsec:shmem_test} \input{content/shmem_test.tex} @@ -313,6 +322,14 @@ \subsubsection{\textbf{SHMEM\_TEST\_ANY}}\label{subsec:shmem_test_any} \subsubsection{\textbf{SHMEM\_TEST\_SOME}}\label{subsec:shmem_test_some} \input{content/shmem_test_some.tex} +\subsubsection{\textbf{SHMEM\_TEST\_ALL\_VECTOR}}\label{subsec:shmem_test_all_vector} +\input{content/shmem_test_all_vector.tex} + +\subsubsection{\textbf{SHMEM\_TEST\_ANY\_VECTOR}}\label{subsec:shmem_test_any_vector} +\input{content/shmem_test_any_vector.tex} + +\subsubsection{\textbf{SHMEM\_TEST\_SOME\_VECTOR}}\label{subsec:shmem_test_some_vector} +\input{content/shmem_test_some_vector.tex} From 3d300e9f10af05057127f23292fb3673d9f0e3e3 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 3 May 2019 15:59:26 -0500 Subject: [PATCH 178/319] Update put-with-signal operation Move the atomicity semantics to the API description section --- content/shmem_put_signal.tex | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index c6eef2e00..fed9870c3 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -40,9 +40,8 @@ \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd.} \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote - \ac{PE} as the signal. This signal data object must be - remotely accessible.} - \apiargument{IN}{signal}{Unsigned 64-bit value that is assigned to the + \ac{PE} as the signal. This signal data object must be remotely accessible.} + \apiargument{IN}{signal}{Unsigned 64-bit value that is used to update the remote \VAR{sig\_addr} signal data object.} \apiargument{IN}{sig\_op}{Signal operator that represents the type of update to be performed to the remote \VAR{sig\_addr} signal data object.} @@ -61,6 +60,12 @@ of signal update based on the \VAR{sig\_op} signal operator using the \VAR{signal} flag on the remote \ac{PE} indicates the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. + + The signal update by the put-with-signal routine is compatible with all + point-to-point synchronization interfaces. The delivery of \VAR{signal} flag + based on the remote \ac{PE} must not cause partial updates. Only concurrent + accesses on \VAR{sig\_addr} by different put-with-signal operations using + the same signal update operator is guaranteed to be exclusive. } \apireturnvalues{ @@ -83,13 +88,6 @@ completion of the signal update in a sequence consisting of a put routine followed by a put-with-signal routine does not imply delivery of the put routine's data. - - The signal update by the put-with-signal routines is compatible with all - point-to-point synchronization interfaces. The delivery of \VAR{signal} flag - based on the \VAR{sig\_op} signal operator on the remote \ac{PE} must not - cause partial updates. Only concurrent accesses on \VAR{sig\_addr} by - different signal update operations using the same signal update operator is - guaranteed to be exclusive. } \begin{apiexamples} From cee7f53a252ff6684c6684d99f5a5cc9415388d6 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 3 May 2019 17:09:00 -0500 Subject: [PATCH 179/319] Reframe the atomicity guarantees for p-w-s Changing the text to confirm the atomicity guarantees of the put with signal operation. The signal update is atomic only with respect to itself, and other put-with-signal of the same operator, and any point-to-point synchronization routines --- content/shmem_put_signal.tex | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index fed9870c3..95b7186c7 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -2,7 +2,7 @@ \apisummary{ The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} - and subsequently update a remote flag to signal completion. + and subsequently updating a remote flag to signal completion. } \begin{apidefinition} @@ -34,24 +34,24 @@ \apiargument{IN}{ctx}{A context handle specifying the context on which to perform the operation. When this argument is not provided, the operation is performed on the default context.} - \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This - data object must be remotely accessible.} + \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. + This data object must be remotely accessible.} \apiargument{IN}{source}{Data object containing the data to be copied.} \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd.} \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote \ac{PE} as the signal. This signal data object must be remotely accessible.} - \apiargument{IN}{signal}{Unsigned 64-bit value that is used to update the + \apiargument{IN}{signal}{Unsigned 64-bit value that is used for updating the remote \VAR{sig\_addr} signal data object.} \apiargument{IN}{sig\_op}{Signal operator that represents the type of update - to be performed to the remote \VAR{sig\_addr} signal data object.} + to be performed on the remote \VAR{sig\_addr} signal data object.} \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} \end{apiarguments} \apidescription{ The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} - and subsequently update a remote flag to signal completion. The routines + and subsequently updating a remote flag to signal completion. The routines return after the data has been copied out of the \source{} array on the local \ac{PE}. @@ -61,11 +61,11 @@ \VAR{signal} flag on the remote \ac{PE} indicates the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. - The signal update by the put-with-signal routine is compatible with all - point-to-point synchronization interfaces. The delivery of \VAR{signal} flag - based on the remote \ac{PE} must not cause partial updates. Only concurrent - accesses on \VAR{sig\_addr} by different put-with-signal operations using - the same signal update operator is guaranteed to be exclusive. + An update to the \VAR{sig\_addr} signal data object through a put-with-signal + routine completes as if performed atomically with respect to any other + put-with-signal routine that updates the \VAR{sig\_addr} signal data object + using the same \VAR{sig\_op} signal update operator and any point-to-point + synchronization routine that accesses the \VAR{sig\_addr} signal data object. } \apireturnvalues{ @@ -78,7 +78,7 @@ for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. - The \VAR{sig\_addr} and \dest{} may not be overlapping in memory. + \VAR{sig\_addr} and \dest{} may not be overlapping in memory. The completion of signal update using the \VAR{signal} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words From 3eb7c0b6bbccdf505e1bf65a6fe3a68fd398e6a3 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Mon, 10 Sep 2018 13:46:29 -0500 Subject: [PATCH 180/319] Add support for NBI put-with-signal NBI put-with-signal is an extension to its blocking variant. --- content/shmem_put_signal_nbi.tex | 84 ++++++++++++++++++++++++++++++++ main_spec.tex | 3 ++ utils/defs.tex | 12 +++-- 3 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 content/shmem_put_signal_nbi.tex diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex new file mode 100644 index 000000000..4d416a1e9 --- /dev/null +++ b/content/shmem_put_signal_nbi.tex @@ -0,0 +1,84 @@ +\color{Green} +\apisummary{ + The nonblocking put-with-signal routines provide a method for copying data + from a contiguous local data object to a data object on a specified \ac{PE} + and subsequently setting a remote flag to signal completion. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +\end{C11synopsis} +where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +\end{Csynopsis} +where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. + +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +\end{CsynopsisCol} +where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. + +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_putmem\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_putmem\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +\end{CsynopsisCol} + +\begin{apiarguments} + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This + data object must be remotely accessible.} + \apiargument{IN}{source}{Data object containing the data to be copied.} + \apiargument{IN}{nelems}{Number of elements in the \VAR{dest} and \VAR{source} + arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd.} + \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote + \ac{PE} as the signal. This signal data object must be + remotely accessible.} + \apiargument{IN}{signal}{Unsigned 64-bit value that is assigned to the + remote \VAR{sig\_addr} signal data object.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} +\end{apiarguments} + +\apidescription{ + The routines return after posting the operation. The operation is considered + complete after the subsequent call to \FUNC{shmem\_quiet}. At the completion + of \FUNC{shmem\_quiet}, the data has been copied out of the \VAR{source} + array on the local \ac{PE} and delivered into the \VAR{dest} array on the + destination \ac{PE}. The delivery of \VAR{signal} flag on the remote + \ac{PE} indicates the delivery of its corresponding \VAR{dest} data words + into the data object on the remote \ac{PE}. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + The \VAR{dest} and \VAR{sig\_addr} data objects must both be remotely + accessible. The \VAR{sig\_addr} and \VAR{dest} could be of different kinds, + for example, one could be a global/static \Cstd variable and the other could + be allocated on the symmetric heap. + + The restrict qualifier in \VAR{sig\_addr} expects the data object to be + distinct from \VAR{dest} and \VAR{source} data objects. + + The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the + delivery of its corresponding \VAR{dest} data words into the data object on + the remote \ac{PE}. Without a memory-ordering operation, there is no implied + ordering between the delivery of the signal word of a nonblocking + put-with-signal routine and another data transfer. For example, the delivery + of the signal word in a sequence consisting of a put routine followed by a + nonblocking put-with-signal routine does not imply delivery of the put + routine's data. +} + +\end{apidefinition} +\color{Black} diff --git a/main_spec.tex b/main_spec.tex index 2f8e7c4c1..e90a27e0c 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -188,6 +188,9 @@ \subsection{Non-blocking Remote Memory Access Routines}\label{sec:rma_nbi} \subsubsection{\textbf{SHMEM\_PUT\_NBI}}\label{subsec:shmem_put_nbi} \input{content/shmem_put_nbi.tex} +\subsubsection{\textbf{SHMEM\_PUT\_SIGNAL\_NBI}}\label{subsec:shmem_put_signal_nbi} +\input{content/shmem_put_signal_nbi.tex} + \subsubsection{\textbf{SHMEM\_GET\_NBI}}\label{subsec:shmem_get_nbi} \input{content/shmem_get_nbi.tex} diff --git a/utils/defs.tex b/utils/defs.tex index 7772da02d..258272db5 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -408,8 +408,8 @@ \textbf{C11:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t, - uint64_t}, + morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, uint64_t, + restrict, shmem_ctx_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisCol} @@ -426,8 +426,13 @@ \textbf{C/C++:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, +<<<<<<< cee7f53a252ff6684c6684d99f5a5cc9415388d6 morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, uint64_t}, +======= + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, uint64_t, + restrict, shmem_ctx_t}, +>>>>>>> Add support for NBI put-with-signal aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisST} @@ -436,7 +441,8 @@ \color{red} {\lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, uint64_t, + restrict, shmem_ctx_t}, aboveskip=0pt, belowskip=0pt}}}{} \lstnewenvironment{Fsynopsis} From 47594544921d64081876fd6ea2f09790dabfdbcc Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 8 Oct 2018 17:24:58 -0500 Subject: [PATCH 181/319] Implement review comments similar to blocking put-with-signal We have incorporated common review comments from put-with-signal blocking routines: 1. duplicated explanation from summary to description 2. removed restrict qualifier and also overlapping explanation 3. modified ctx arg explanation --- content/shmem_put_signal_nbi.tex | 34 ++++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 4d416a1e9..0cbbbbd9b 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -8,32 +8,32 @@ \begin{apidefinition} \begin{C11synopsis} -void @\FuncDecl{shmem\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_putmem\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_putmem\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_putmem\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_putmem\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); \end{CsynopsisCol} \begin{apiarguments} - \apiargument{IN}{ctx}{The context on which to perform the operation. - When this argument is not provided, the operation is performed on - \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the operation. When this argument is not provided, the operation is + performed on the default context.} \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This data object must be remotely accessible.} \apiargument{IN}{source}{Data object containing the data to be copied.} @@ -48,9 +48,12 @@ \end{apiarguments} \apidescription{ - The routines return after posting the operation. The operation is considered - complete after the subsequent call to \FUNC{shmem\_quiet}. At the completion - of \FUNC{shmem\_quiet}, the data has been copied out of the \VAR{source} + The nonblocking put-with-signal routines provide a method for copying data + from a contiguous local data object to a data object on a specified \ac{PE} + and subsequently setting a remote flag to signal completion. The routines + return after posting the operation. The operation is considered complete + after the subsequent call to \FUNC{shmem\_quiet}. At the completion of + \FUNC{shmem\_quiet}, the data has been copied out of the \VAR{source} array on the local \ac{PE} and delivered into the \VAR{dest} array on the destination \ac{PE}. The delivery of \VAR{signal} flag on the remote \ac{PE} indicates the delivery of its corresponding \VAR{dest} data words @@ -67,9 +70,6 @@ for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. - The restrict qualifier in \VAR{sig\_addr} expects the data object to be - distinct from \VAR{dest} and \VAR{source} data objects. - The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the delivery of its corresponding \VAR{dest} data words into the data object on the remote \ac{PE}. Without a memory-ordering operation, there is no implied From 5c31dc937ec5c01c3fb5d822b67f3ad5f77bbb46 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Thu, 18 Oct 2018 12:33:52 -0500 Subject: [PATCH 182/319] Adding overlapping semantics in put-with-signal-nbi --- content/shmem_put_signal_nbi.tex | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 0cbbbbd9b..3d99679e5 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -70,6 +70,8 @@ for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. + The \VAR{sig\_addr} and \VAR{dest} may not be overlapping in memory. + The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the delivery of its corresponding \VAR{dest} data words into the data object on the remote \ac{PE}. Without a memory-ordering operation, there is no implied From c8966106092f0b71da4f7778fd4b954211175d23 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Tue, 8 Jan 2019 13:40:08 -0600 Subject: [PATCH 183/319] Explicitly state the NBI signal update is AMO Based on recent review comments, it looks like it would be more clear if we state that the signal update is an atomic operation We have added this as part of the Notes to Implementers section. --- content/shmem_put_signal_nbi.tex | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 3d99679e5..8f7279ffa 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -82,5 +82,12 @@ routine's data. } +\apiimpnotes{ + Implementations must ensure that put-with-signal routines are compatible + with all point-to-point synchronization interfaces. The delivery of + \signal{} flag on the remote \ac{PE} must not cause partial updates. This + requires the update on \signal{} flag to be an atomic memory operation. +} + \end{apidefinition} \color{Black} From 11682c7ba6d1c090f85d8cb8f26f60769e4dc477 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Tue, 8 Jan 2019 13:44:18 -0600 Subject: [PATCH 184/319] Fix variable usage in NBI notes section --- content/shmem_put_signal_nbi.tex | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 8f7279ffa..38f19b75c 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -1,4 +1,3 @@ -\color{Green} \apisummary{ The nonblocking put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} @@ -85,9 +84,8 @@ \apiimpnotes{ Implementations must ensure that put-with-signal routines are compatible with all point-to-point synchronization interfaces. The delivery of - \signal{} flag on the remote \ac{PE} must not cause partial updates. This - requires the update on \signal{} flag to be an atomic memory operation. + \VAR{signal} flag on the remote \ac{PE} must not cause partial updates. This + requires the update on \VAR{signal} flag to be an atomic memory operation. } \end{apidefinition} -\color{Black} From 5950fbbbf5661594cc3287fd3a9ef9e14024bc72 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Wed, 9 Jan 2019 14:37:06 -0600 Subject: [PATCH 185/319] Move NBI put-with-signal apiimpnotes to apinotes Previously, we had the information about the signal updates atomicity guarantees in the notes to implementors section for NBI put-with-signal We are not now moving this into main notes section. We have also clarifies the atomicity guarantees by refering to atomicty section. --- content/shmem_put_signal_nbi.tex | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 38f19b75c..ff5439f89 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -79,13 +79,12 @@ of the signal word in a sequence consisting of a put routine followed by a nonblocking put-with-signal routine does not imply delivery of the put routine's data. -} -\apiimpnotes{ - Implementations must ensure that put-with-signal routines are compatible - with all point-to-point synchronization interfaces. The delivery of - \VAR{signal} flag on the remote \ac{PE} must not cause partial updates. This - requires the update on \VAR{signal} flag to be an atomic memory operation. + The nonblocking put-with-signal routines are compatible with all + point-to-point synchronization interfaces. The delivery of \VAR{signal} flag + on the remote \ac{PE} must not cause partial updates. This requires the + update on \VAR{signal} flag to be an atomic operation, with atomicity + guarantees described in Section~\ref{subsec:amo_guarantees}. } \end{apidefinition} From c7b0db3e2d945bc0a09bc52810321103ef517637 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Sat, 12 Jan 2019 12:45:22 -0600 Subject: [PATCH 186/319] Add backmatter for NBI put-with-signal --- content/backmatter.tex | 3 +++ 1 file changed, 3 insertions(+) diff --git a/content/backmatter.tex b/content/backmatter.tex index 915d9947a..f50eeb331 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -513,6 +513,9 @@ \section{Version 1.5} \item Added support for blocking put-with-signal functions. \\ See Section \ref{subsec:shmem_put_signal}. % +\item Added support for nonblocking put-with-signal functions. +\\ See Section \ref{subsec:shmem_put_signal_nbi}. +% \item Specified the validity of communication contexts, added the constant \CONST{SHMEM\_CTX\_INVALID}, and clarified the behavior of \FUNC{shmem\_ctx\_*} routines on invalid contexts. From 02353121b422526ae5134e2c4aaaec0cc5a8e769 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Sat, 12 Jan 2019 12:51:54 -0600 Subject: [PATCH 187/319] RM restrict qualifier from def.tex Previously, we used restrict qualifier and defined in the def.tex for syntax highlighting in the function definitions. As the usage of restrict qualifier is removed, this change is no longer nedeed. --- utils/defs.tex | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/utils/defs.tex b/utils/defs.tex index 258272db5..e18a2e10d 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -408,8 +408,8 @@ \textbf{C11:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, uint64_t, - restrict, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t, + uint64_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisCol} @@ -426,13 +426,8 @@ \textbf{C/C++:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, -<<<<<<< cee7f53a252ff6684c6684d99f5a5cc9415388d6 morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, - uint64_t}, -======= - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, uint64_t, - restrict, shmem_ctx_t}, ->>>>>>> Add support for NBI put-with-signal + uint64_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisST} @@ -441,8 +436,8 @@ \color{red} {\lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, uint64_t, - restrict, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, + uint64_t}, aboveskip=0pt, belowskip=0pt}}}{} \lstnewenvironment{Fsynopsis} From e6a7bb293f0d823f9e2287c52a53335af652d5fd Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Sat, 12 Jan 2019 13:03:10 -0600 Subject: [PATCH 188/319] Fix \VAR and macro usage correctly We were incorrectly using variable and macros incorrectly for \dest and \source. Fixing it in put-with-signal-nbi. --- content/shmem_put_signal_nbi.tex | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index ff5439f89..868887b52 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -36,7 +36,7 @@ \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This data object must be remotely accessible.} \apiargument{IN}{source}{Data object containing the data to be copied.} - \apiargument{IN}{nelems}{Number of elements in the \VAR{dest} and \VAR{source} + \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd.} \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote \ac{PE} as the signal. This signal data object must be @@ -52,10 +52,10 @@ and subsequently setting a remote flag to signal completion. The routines return after posting the operation. The operation is considered complete after the subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, the data has been copied out of the \VAR{source} - array on the local \ac{PE} and delivered into the \VAR{dest} array on the + \FUNC{shmem\_quiet}, the data has been copied out of the \source{} + array on the local \ac{PE} and delivered into the \dest{} array on the destination \ac{PE}. The delivery of \VAR{signal} flag on the remote - \ac{PE} indicates the delivery of its corresponding \VAR{dest} data words + \ac{PE} indicates the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. } @@ -64,15 +64,15 @@ } \apinotes{ - The \VAR{dest} and \VAR{sig\_addr} data objects must both be remotely - accessible. The \VAR{sig\_addr} and \VAR{dest} could be of different kinds, + The \dest{} and \VAR{sig\_addr} data objects must both be remotely + accessible. The \VAR{sig\_addr} and \dest{} could be of different kinds, for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. - The \VAR{sig\_addr} and \VAR{dest} may not be overlapping in memory. + The \VAR{sig\_addr} and \dest{} may not be overlapping in memory. The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the - delivery of its corresponding \VAR{dest} data words into the data object on + delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. Without a memory-ordering operation, there is no implied ordering between the delivery of the signal word of a nonblocking put-with-signal routine and another data transfer. For example, the delivery From d08fd172934db542e2457040c03cd9029f21f538 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 14 Jan 2019 22:25:32 -0600 Subject: [PATCH 189/319] Reframe NBI signal-put compatibility with p2p syncs --- content/shmem_put_signal_nbi.tex | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 868887b52..996d8a4f2 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -80,11 +80,11 @@ nonblocking put-with-signal routine does not imply delivery of the put routine's data. - The nonblocking put-with-signal routines are compatible with all - point-to-point synchronization interfaces. The delivery of \VAR{signal} flag - on the remote \ac{PE} must not cause partial updates. This requires the - update on \VAR{signal} flag to be an atomic operation, with atomicity - guarantees described in Section~\ref{subsec:amo_guarantees}. + The signal set by the nonblocking put-with-signal routines is compatible + with all point-to-point synchronization interfaces. The delivery of + \VAR{signal} flag on the remote \ac{PE} must not cause partial updates. This + requires the update on \VAR{signal} flag to be an atomic operation, with + atomicity guarantees described in Section~\ref{subsec:amo_guarantees}. } \end{apidefinition} From 8ef94d43709f2223842226ab66c9e5bb3112df83 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 3 May 2019 17:30:42 -0500 Subject: [PATCH 190/319] Update NBI put-with-signal atomicity description --- content/shmem_put_signal_nbi.tex | 75 ++++++++++++++++---------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 996d8a4f2..9673c7d21 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -1,62 +1,75 @@ +\color{ForestGreen} \apisummary{ The nonblocking put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} - and subsequently setting a remote flag to signal completion. + and subsequently updating a remote flag to signal completion. } \begin{apidefinition} \begin{C11synopsis} -void @\FuncDecl{shmem\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +void @\FuncDecl{shmem\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_putmem\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_putmem\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_putmem\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +void @\FuncDecl{shmem\_ctx\_putmem\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); \end{CsynopsisCol} \begin{apiarguments} \apiargument{IN}{ctx}{A context handle specifying the context on which to perform the operation. When this argument is not provided, the operation is performed on the default context.} - \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This - data object must be remotely accessible.} + \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. + This data object must be remotely accessible.} \apiargument{IN}{source}{Data object containing the data to be copied.} \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd.} \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote - \ac{PE} as the signal. This signal data object must be - remotely accessible.} - \apiargument{IN}{signal}{Unsigned 64-bit value that is assigned to the + \ac{PE} as the signal. This signal data object must be remotely accessible.} + \apiargument{IN}{signal}{Unsigned 64-bit value that is used for updating the remote \VAR{sig\_addr} signal data object.} + \apiargument{IN}{sig\_op}{Signal operator that represents the type of update + to be performed on the remote \VAR{sig\_addr} signal data object.} \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} \end{apiarguments} \apidescription{ The nonblocking put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} - and subsequently setting a remote flag to signal completion. The routines - return after posting the operation. The operation is considered complete - after the subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, the data has been copied out of the \source{} - array on the local \ac{PE} and delivered into the \dest{} array on the - destination \ac{PE}. The delivery of \VAR{signal} flag on the remote - \ac{PE} indicates the delivery of its corresponding \dest{} data words - into the data object on the remote \ac{PE}. + and subsequently updating a remote flag to signal completion. + + The routines return after posting the operation. The operation is considered + complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion + of \FUNC{shmem\_quiet}, the data has been copied out of the \source{} array + on the local \ac{PE} and delivered into the \dest{} array on the destination + \ac{PE}. The delivery of \VAR{signal} flag on the remote \ac{PE} indicates + the delivery of its corresponding \dest{} data words into the data object on + the remote \ac{PE}. + + The \VAR{sig\_op} signal operator determines the type of update to be + performed on the remote \VAR{sig\_addr} signal data object. + + An update to the \VAR{sig\_addr} signal data object through a non-blocking + put-with-signal routine completes as if performed atomically with respect to + any other non-blocking put-with-signal routine that updates the + \VAR{sig\_addr} signal data object using the same \VAR{sig\_op} signal + update operator and any point-to-point synchronization routine that accesses + the \VAR{sig\_addr} signal data object. } \apireturnvalues{ @@ -69,22 +82,8 @@ for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. - The \VAR{sig\_addr} and \dest{} may not be overlapping in memory. - - The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the - delivery of its corresponding \dest{} data words into the data object on - the remote \ac{PE}. Without a memory-ordering operation, there is no implied - ordering between the delivery of the signal word of a nonblocking - put-with-signal routine and another data transfer. For example, the delivery - of the signal word in a sequence consisting of a put routine followed by a - nonblocking put-with-signal routine does not imply delivery of the put - routine's data. - - The signal set by the nonblocking put-with-signal routines is compatible - with all point-to-point synchronization interfaces. The delivery of - \VAR{signal} flag on the remote \ac{PE} must not cause partial updates. This - requires the update on \VAR{signal} flag to be an atomic operation, with - atomicity guarantees described in Section~\ref{subsec:amo_guarantees}. + \VAR{sig\_addr} and \dest{} may not be overlapping in memory. } \end{apidefinition} +\color{black} From 43a2d11e8cf02f7c338c631e7c6c25ce81df8435 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 3 May 2019 17:50:22 -0500 Subject: [PATCH 191/319] Combine blk and nbi put-with-signal --- content/library_constants.tex | 6 ++++-- content/shmem_put_signal.tex | 7 ++++--- content/shmem_put_signal_nbi.tex | 8 ++++---- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/content/library_constants.tex b/content/library_constants.tex index 754081a5d..df9835ca3 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -71,14 +71,16 @@ \LibConstDecl{SHMEM\_SIGNAL\_SET} & \color{ForestGreen} An integer constant expression corresponding to the signal update set operation. -See Section~\ref{subsec:shmem_put_signal} for more detail about its use. +See Section~\ref{subsec:shmem_put_signal} and +Section~\ref{subsec:shmem_put_signal_nbi} for more detail about its use. \tabularnewline \hline %% \color{ForestGreen} \LibConstDecl{SHMEM\_SIGNAL\_ADD} & \color{ForestGreen} An integer constant expression corresponding to the signal update add operation. -See Section~\ref{subsec:shmem_put_signal} for more detail about its use. +See Section~\ref{subsec:shmem_put_signal} and +Section~\ref{subsec:shmem_put_signal_nbi} for more detail about its use. \tabularnewline \hline %% \LibConstDecl{SHMEM\_SYNC\_VALUE} diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 95b7186c7..f56892d47 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -63,9 +63,10 @@ An update to the \VAR{sig\_addr} signal data object through a put-with-signal routine completes as if performed atomically with respect to any other - put-with-signal routine that updates the \VAR{sig\_addr} signal data object - using the same \VAR{sig\_op} signal update operator and any point-to-point - synchronization routine that accesses the \VAR{sig\_addr} signal data object. + blocking or non-blocking variant of the put-with-signal routine that updates + the \VAR{sig\_addr} signal data object using the same \VAR{sig\_op} signal + update operator and any point-to-point synchronization routine that accesses + the \VAR{sig\_addr} signal data object. } \apireturnvalues{ diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 9673c7d21..e9782490e 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -66,10 +66,10 @@ An update to the \VAR{sig\_addr} signal data object through a non-blocking put-with-signal routine completes as if performed atomically with respect to - any other non-blocking put-with-signal routine that updates the - \VAR{sig\_addr} signal data object using the same \VAR{sig\_op} signal - update operator and any point-to-point synchronization routine that accesses - the \VAR{sig\_addr} signal data object. + any other blocking or non-blocking variant of the put-with-signal routine + that updates the \VAR{sig\_addr} signal data object using the same + \VAR{sig\_op} signal update operator and any point-to-point synchronization + routine that accesses the \VAR{sig\_addr} signal data object. } \apireturnvalues{ From 41d1819e76e0f6b579e2284201cf2930689bd518 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 3 May 2019 18:17:58 -0500 Subject: [PATCH 192/319] Describe PWS nbi ordering --- content/shmem_put_signal_nbi.tex | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index e9782490e..03b0619ce 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -57,9 +57,14 @@ complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, the data has been copied out of the \source{} array on the local \ac{PE} and delivered into the \dest{} array on the destination - \ac{PE}. The delivery of \VAR{signal} flag on the remote \ac{PE} indicates - the delivery of its corresponding \dest{} data words into the data object on - the remote \ac{PE}. + \ac{PE}. + + The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the + delivery of its corresponding \dest{} data words into the data object on the + remote \ac{PE}. Furthermore, two successive non-blocking put-with-signal + routines, or a non-blocking put-with-signal routine with another data + transfer may deliver data out of order unless a call to \FUNC{shmem\_fence} + is introduced between the two calls. The \VAR{sig\_op} signal operator determines the type of update to be performed on the remote \VAR{sig\_addr} signal data object. From 3fad257342b3b7ba17021a7ce5f7b190037b65b3 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 3 May 2019 18:36:22 -0500 Subject: [PATCH 193/319] Update wait_until to return value We are updating wait until to return the value of the ivar for which the wait condition is satisfied --- content/shmem_wait_until.tex | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/content/shmem_wait_until.tex b/content/shmem_wait_until.tex index e5943b468..d815b00ba 100644 --- a/content/shmem_wait_until.tex +++ b/content/shmem_wait_until.tex @@ -5,13 +5,13 @@ \begin{apidefinition} \begin{C11synopsis} -void @\FuncDecl{shmem\_wait\_until}@(TYPE *ivar, int cmp, TYPE cmp_value); +TYPE @\FuncDecl{shmem\_wait\_until}@(TYPE *ivar, int cmp, TYPE cmp_value); \end{C11synopsis} where \TYPE{} is one of the point-to-point synchronization types specified by Table \ref{p2psynctypes}. \begin{Csynopsis} -void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until}@(TYPE *ivar, int cmp, TYPE cmp_value); +TYPE @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until}@(TYPE *ivar, int cmp, TYPE cmp_value); \end{Csynopsis} where \TYPE{} is one of the point-to-point synchronization types and has a corresponding \TYPENAME{} specified by Table~\ref{p2psynctypes}. @@ -39,7 +39,7 @@ \begin{apiarguments} \apiargument{OUT}{ivar}{A remotely accessible integer variable. When using \CorCpp, - the type of \VAR{ivar} should match that implied in the SYNOPSIS section.} + the type of \VAR{ivar} should match that implied in the SYNOPSIS section.} \apiargument{IN}{cmp}{The compare operator that compares \VAR{ivar} with \VAR{cmp\_value}. When using \Fortran, it must be of default kind. When using \CorCpp, it must be of type \CTYPE{int}.} @@ -50,7 +50,7 @@ \end{apiarguments} -\apidescription{ +\apidescription{ The \FUNC{shmem\_wait} and \FUNC{shmem\_wait\_until} operations block until the value contained in the symmetric data object, \VAR{ivar}, at the calling \ac{PE} satisfies the wait condition. In an \openshmem program @@ -80,9 +80,13 @@ \apitablerow{shmem\_int4\_wait, shmem\_int4\_wait\_until}{INTEGER*4} \apitablerow{shmem\_int8\_wait, shmem\_int8\_wait\_until}{INTEGER*8} +\color{ForestGreen} \apireturnvalues{ - None. + Returns a single element of type specified in the symopsis. It is the value + contained in the symmetric data object, \VAR{ivar}, at the calling \ac{PE} + that satisfies the wait condition. } +\color{Black} \apinotes{ As of \openshmem[1.4], the \FUNC{shmem\_wait} routine is deprecated; From 0312cd3d19d8e6fd71ee2a7d4d289d711de4e446 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 3 May 2019 19:49:40 -0500 Subject: [PATCH 194/319] Add support to provide hints for p2p sync We are adding support to provide hints for the p2p sync routines --- content/library_constants.tex | 11 +++++++++++ content/p2p_sync_intro.tex | 21 +++++++++++++++++++++ content/shmem_test.tex | 4 +++- content/shmem_wait_until.tex | 13 ++++++++----- 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/content/library_constants.tex b/content/library_constants.tex index df9835ca3..05a082d9f 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -286,4 +286,15 @@ See Section~\ref{subsec:p2p_intro} for more detail about its use. \tabularnewline \hline %% +\color{ForestGreen} +\LibConstDecl{SHMEM\_SIGNAL\_UPDATE} +& +\color{ForestGreen} +An integer constant expression corresponding to a hint, specifying an update to +be expected as a signal on a variable on the local \ac{PE} through \openshmem +routines described in Section~\ref{subsec:shmem_put_signal} and +Section~\ref{subsec:shmem_put_signal_nbi}. See Section~\ref{subsec:p2p_intro} +for more detail about its use. +\tabularnewline \hline +%% \end{longtable} diff --git a/content/p2p_sync_intro.tex b/content/p2p_sync_intro.tex index c2a2e1dbd..0c824a4fa 100644 --- a/content/p2p_sync_intro.tex +++ b/content/p2p_sync_intro.tex @@ -67,3 +67,24 @@ \label{p2p-consts} \end{center} \end{table} + +\color{ForestGreen} +The point-to-point synchronization interface provides support for passing hints +to specify the type of \openshmem routine that is expected to update a variable +on the local \ac{PE} on which the point-to-point synchronization operation is +performed. The hints are named constants whose values are integer constant +expressions. The hint names and associated \openshmem routines that are expected +to update the variable on the local \ac{PE} are presented in Table~\ref{p2p-hints}. +\begin{table}[h] + \begin{center} + \begin{tabular}{ll} + \hline + Hint Name & Associated \openshmem routines \\ \hline + \LibConstRef{SHMEM\_SIGNAL\_UPDATE} & Section~\ref{subsec:shmem_put_signal} + and Section~\ref{subsec:shmem_put_signal_nbi} \\ \hline + \end{tabular} + \TableCaptionRef{Point-to-Point Hint Constants} + \label{p2p-hints} + \end{center} +\end{table} +\color{Black} diff --git a/content/shmem_test.tex b/content/shmem_test.tex index a7dd67f6d..aecea2b2b 100644 --- a/content/shmem_test.tex +++ b/content/shmem_test.tex @@ -20,7 +20,9 @@ \apiargument{OUT}{ivar}{A pointer to a remotely accessible data object.} \apiargument{IN}{cmp}{The comparison operator that compares \VAR{ivar} with - \VAR{cmp\_value}.} + \VAR{cmp\_value}.\color{ForestGreen}The comparison operator may be used with + a point-to-point hint constants by combining them with a bitwise OR + operation.\color{Black}} \apiargument{IN}{cmp\_value}{The value against which the object pointed to by \VAR{ivar} will be compared.} diff --git a/content/shmem_wait_until.tex b/content/shmem_wait_until.tex index d815b00ba..6d1a4dd8a 100644 --- a/content/shmem_wait_until.tex +++ b/content/shmem_wait_until.tex @@ -38,11 +38,14 @@ \begin{apiarguments} -\apiargument{OUT}{ivar}{A remotely accessible integer variable. When using \CorCpp, - the type of \VAR{ivar} should match that implied in the SYNOPSIS section.} -\apiargument{IN}{cmp}{The compare operator that compares \VAR{ivar} with - \VAR{cmp\_value}. When using \Fortran, it must be of default kind. - When using \CorCpp, it must be of type \CTYPE{int}.} +\apiargument{OUT}{ivar}{A remotely accessible integer variable. When using + \CorCpp, the type of \VAR{ivar} should match that implied in the SYNOPSIS + section.} +\apiargument{IN}{cmp}{The comparison operator that compares \VAR{ivar} with + \VAR{cmp\_value}. When using \Fortran, it must be of default kind. + When using \CorCpp, it must be of type \CTYPE{int}. \color{ForestGreen}The + comparison operator may be used with a point-to-point hint constants by + combining them with a bitwise OR operation.\color{Black}} \apiargument{IN}{cmp\_value}{\VAR{cmp\_value} must be of type integer. When using \CorCpp, the type of \VAR{cmp\_value} should match that implied in the SYNOPSIS section. When using \Fortran, cmp\_value must be an integer of From a2bd4e52961ebe3f8653868ca27f88d80daffda7 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Mon, 6 May 2019 11:01:00 -0500 Subject: [PATCH 195/319] Update put-with-signal proposal The following changes were made to the put-with-signal proposal. 1. Put-with-signal combined into a single group 2. pt2pt sync texts are updated to be more clear --- content/p2p_sync_intro.tex | 16 +++++++++------- content/shmem_test.tex | 4 ++-- content/shmem_wait_until.tex | 10 +++++----- main_spec.tex | 15 +++++++++------ 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/content/p2p_sync_intro.tex b/content/p2p_sync_intro.tex index 75c51b336..6a26144a0 100644 --- a/content/p2p_sync_intro.tex +++ b/content/p2p_sync_intro.tex @@ -73,18 +73,20 @@ \color{ForestGreen} The point-to-point synchronization interface provides support for passing hints -to specify the type of \openshmem routine that is expected to update a variable -on the local \ac{PE} on which the point-to-point synchronization operation is -performed. The hints are named constants whose values are integer constant -expressions. The hint names and associated \openshmem routines that are expected -to update the variable on the local \ac{PE} are presented in Table~\ref{p2p-hints}. +to specify the \openshmem routine that is expected to update the remotely +accessible data object on a local \ac{PE} on which the synchronization operation +is performed. The hints are named constants whose values are integer constant +expressions. The hint names and associated \openshmem routines that is expected +to update the remotely accessible data object on the local \ac{PE} are presented +in Table~\ref{p2p-hints}. \begin{table}[h] \begin{center} \begin{tabular}{ll} \hline Hint Name & Associated \openshmem routines \\ \hline - \LibConstRef{SHMEM\_SIGNAL\_UPDATE} & Section~\ref{subsec:shmem_put_signal} - and Section~\ref{subsec:shmem_put_signal_nbi} \\ \hline + \LibConstRef{SHMEM\_SIGNAL\_UPDATE} & Data object used for signalling + through routines from Sections~\ref{subsec:shmem_put_signal} and + ~\ref{subsec:shmem_put_signal_nbi} \\ \hline \end{tabular} \TableCaptionRef{Point-to-Point Hint Constants} \label{p2p-hints} diff --git a/content/shmem_test.tex b/content/shmem_test.tex index 40f1f04e0..63b01c005 100644 --- a/content/shmem_test.tex +++ b/content/shmem_test.tex @@ -20,8 +20,8 @@ \apiargument{IN}{ivar}{A pointer to a remotely accessible data object.} \apiargument{IN}{cmp}{The comparison operator that compares \VAR{ivar} with - \VAR{cmp\_value}.\color{ForestGreen}The comparison operator may be used with - a point-to-point hint constants by combining them with a bitwise OR + \VAR{cmp\_value} \color{ForestGreen} and the comparison operator may be used + with point-to-point hint constants by combining them with a bitwise OR operation.\color{Black}} \apiargument{IN}{cmp\_value}{The value against which the object pointed to by \VAR{ivar} will be compared.} diff --git a/content/shmem_wait_until.tex b/content/shmem_wait_until.tex index 341ac2ac2..42f6b436e 100644 --- a/content/shmem_wait_until.tex +++ b/content/shmem_wait_until.tex @@ -43,8 +43,8 @@ section.} \apiargument{IN}{cmp}{The comparison operator that compares \VAR{ivar} with \VAR{cmp\_value}. When using \Fortran, it must be of default kind. - When using \CorCpp, it must be of type \CTYPE{int}. \color{ForestGreen}The - comparison operator may be used with a point-to-point hint constants by + When using \CorCpp, it must be of type \CTYPE{int} \color{ForestGreen} and + the comparison operator may be used with point-to-point hint constants by combining them with a bitwise OR operation.\color{Black}} \apiargument{IN}{cmp\_value}{\VAR{cmp\_value} must be of type integer. When using \CorCpp, the type of \VAR{cmp\_value} should match that implied in the @@ -85,9 +85,9 @@ \color{ForestGreen} \apireturnvalues{ - Returns a single element of type specified in the symopsis. It is the value - contained in the symmetric data object, \VAR{ivar}, at the calling \ac{PE} - that satisfies the wait condition. + When using \CorCpp, returns the contents of the symmetric data object, + \VAR{ivar}, at the calling \ac{PE} that satisfies the wait condition. The + data type of the return value is same as the type of \VAR{ivar}. } \color{Black} diff --git a/main_spec.tex b/main_spec.tex index 9ae510220..6b17b65b5 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -170,9 +170,6 @@ \subsubsection{\textbf{SHMEM\_P}}\label{subsec:shmem_p} \subsubsection{\textbf{SHMEM\_IPUT}}\label{subsec:shmem_iput} \input{content/shmem_iput.tex} -\subsubsection{\textbf{SHMEM\_PUT\_SIGNAL}}\label{subsec:shmem_put_signal} -\input{content/shmem_put_signal.tex} - \subsubsection{\textbf{SHMEM\_GET}}\label{subsec:shmem_get} \input{content/shmem_get.tex} @@ -188,13 +185,19 @@ \subsection{Non-blocking Remote Memory Access Routines}\label{sec:rma_nbi} \subsubsection{\textbf{SHMEM\_PUT\_NBI}}\label{subsec:shmem_put_nbi} \input{content/shmem_put_nbi.tex} -\subsubsection{\textbf{SHMEM\_PUT\_SIGNAL\_NBI}}\label{subsec:shmem_put_signal_nbi} -\input{content/shmem_put_signal_nbi.tex} - \subsubsection{\textbf{SHMEM\_GET\_NBI}}\label{subsec:shmem_get_nbi} \input{content/shmem_get_nbi.tex} +\subsection{Signalling Operations}\label{sec:signal} + +\subsubsection{\textbf{SHMEM\_PUT\_SIGNAL}}\label{subsec:shmem_put_signal} +\input{content/shmem_put_signal.tex} + +\subsubsection{\textbf{SHMEM\_PUT\_SIGNAL\_NBI}}\label{subsec:shmem_put_signal_nbi} +\input{content/shmem_put_signal_nbi.tex} + + \subsection{Atomic Memory Operations}\label{sec:amo} \input{content/atomics_intro} From d553d428a3fd13cd518e9484b02b6f9dc81ce55a Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Mon, 6 May 2019 11:38:57 -0500 Subject: [PATCH 196/319] Update put-with-signal text with many changes --- content/library_constants.tex | 17 +++++++++-------- content/p2p_sync_intro.tex | 6 +++--- content/shmem_put_signal.tex | 11 ++++++++++- content/shmem_put_signal_nbi.tex | 11 ++++++++++- content/shmem_test.tex | 5 ++--- content/shmem_wait_until.tex | 4 ++-- 6 files changed, 36 insertions(+), 18 deletions(-) diff --git a/content/library_constants.tex b/content/library_constants.tex index 05a082d9f..3c085a25c 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -70,16 +70,16 @@ \color{ForestGreen} \LibConstDecl{SHMEM\_SIGNAL\_SET} & \color{ForestGreen} -An integer constant expression corresponding to the signal update set operation. -See Section~\ref{subsec:shmem_put_signal} and +An integer constant expression corresponding to the signal update operation of +type set. See Section~\ref{subsec:shmem_put_signal} and Section~\ref{subsec:shmem_put_signal_nbi} for more detail about its use. \tabularnewline \hline %% \color{ForestGreen} \LibConstDecl{SHMEM\_SIGNAL\_ADD} & \color{ForestGreen} -An integer constant expression corresponding to the signal update add operation. -See Section~\ref{subsec:shmem_put_signal} and +An integer constant expression corresponding to the signal update operation of +type add. See Section~\ref{subsec:shmem_put_signal} and Section~\ref{subsec:shmem_put_signal_nbi} for more detail about its use. \tabularnewline \hline %% @@ -290,10 +290,11 @@ \LibConstDecl{SHMEM\_SIGNAL\_UPDATE} & \color{ForestGreen} -An integer constant expression corresponding to a hint, specifying an update to -be expected as a signal on a variable on the local \ac{PE} through \openshmem -routines described in Section~\ref{subsec:shmem_put_signal} and -Section~\ref{subsec:shmem_put_signal_nbi}. See Section~\ref{subsec:p2p_intro} +An integer constant expression corresponding to a hint used in point-to-point +synchronization. It specifies that the remotely accessible data object on a +local \ac{PE} used for synchronization is expected to be updated as a signal +through \openshmem routines described in Section~\ref{subsec:shmem_put_signal} +and Section~\ref{subsec:shmem_put_signal_nbi}. See Section~\ref{subsec:p2p_intro} for more detail about its use. \tabularnewline \hline %% diff --git a/content/p2p_sync_intro.tex b/content/p2p_sync_intro.tex index 6a26144a0..30eb9a412 100644 --- a/content/p2p_sync_intro.tex +++ b/content/p2p_sync_intro.tex @@ -76,9 +76,9 @@ to specify the \openshmem routine that is expected to update the remotely accessible data object on a local \ac{PE} on which the synchronization operation is performed. The hints are named constants whose values are integer constant -expressions. The hint names and associated \openshmem routines that is expected -to update the remotely accessible data object on the local \ac{PE} are presented -in Table~\ref{p2p-hints}. +expressions. The hint names and the associated \openshmem routines that are +expected to update the remotely accessible data object on the local \ac{PE} are +presented in Table~\ref{p2p-hints}. \begin{table}[h] \begin{center} \begin{tabular}{ll} diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index f56892d47..f632db851 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -66,7 +66,16 @@ blocking or non-blocking variant of the put-with-signal routine that updates the \VAR{sig\_addr} signal data object using the same \VAR{sig\_op} signal update operator and any point-to-point synchronization routine that accesses - the \VAR{sig\_addr} signal data object. + the \VAR{sig\_addr} signal data object. With the above described atomicity + guarantees, the following options can be used as the \VAR{sig\_op} signal + operator. + + \apitablerow{\LibConstRef{SHMEM\_SIGNAL\_SET}}{An update to \VAR{sig\_addr} + signal data object is an atomic set operation. It writes the \VAR{signal} + value into \VAR{sig\_addr} on \VAR{PE} as an atomic operation.} + \apitablerow{\LibConstRef{SHMEM\_SIGNAL\_ADD}}{An update to \VAR{sig\_addr} + signal data object is an atomic add operation. It adds the \VAR{signal} + value into \VAR{sig\_addr} on \VAR{PE} as an atomic operation.} } \apireturnvalues{ diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 03b0619ce..efd242ef1 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -74,7 +74,16 @@ any other blocking or non-blocking variant of the put-with-signal routine that updates the \VAR{sig\_addr} signal data object using the same \VAR{sig\_op} signal update operator and any point-to-point synchronization - routine that accesses the \VAR{sig\_addr} signal data object. + routine that accesses the \VAR{sig\_addr} signal data object. With the above + described atomicity guarantees, the following options can be used as the + \VAR{sig\_op} signal operator. + + \apitablerow{\LibConstRef{SHMEM\_SIGNAL\_SET}}{An update to \VAR{sig\_addr} + signal data object is an atomic set operation. It writes the \VAR{signal} + value into \VAR{sig\_addr} on \VAR{PE} as an atomic operation.} + \apitablerow{\LibConstRef{SHMEM\_SIGNAL\_ADD}}{An update to \VAR{sig\_addr} + signal data object is an atomic add operation. It adds the \VAR{signal} + value into \VAR{sig\_addr} on \VAR{PE} as an atomic operation.} } \apireturnvalues{ diff --git a/content/shmem_test.tex b/content/shmem_test.tex index 63b01c005..22fb4b4e6 100644 --- a/content/shmem_test.tex +++ b/content/shmem_test.tex @@ -20,9 +20,8 @@ \apiargument{IN}{ivar}{A pointer to a remotely accessible data object.} \apiargument{IN}{cmp}{The comparison operator that compares \VAR{ivar} with - \VAR{cmp\_value} \color{ForestGreen} and the comparison operator may be used - with point-to-point hint constants by combining them with a bitwise OR - operation.\color{Black}} + \VAR{cmp\_value} \color{ForestGreen} and it may be used with point-to-point + hint constants by combining them with a bitwise OR operation.\color{Black}} \apiargument{IN}{cmp\_value}{The value against which the object pointed to by \VAR{ivar} will be compared.} diff --git a/content/shmem_wait_until.tex b/content/shmem_wait_until.tex index 42f6b436e..24b1ce9fa 100644 --- a/content/shmem_wait_until.tex +++ b/content/shmem_wait_until.tex @@ -44,8 +44,8 @@ \apiargument{IN}{cmp}{The comparison operator that compares \VAR{ivar} with \VAR{cmp\_value}. When using \Fortran, it must be of default kind. When using \CorCpp, it must be of type \CTYPE{int} \color{ForestGreen} and - the comparison operator may be used with point-to-point hint constants by - combining them with a bitwise OR operation.\color{Black}} + it may be used with point-to-point hint constants by combining them with a + bitwise OR operation.\color{Black}} \apiargument{IN}{cmp\_value}{\VAR{cmp\_value} must be of type integer. When using \CorCpp, the type of \VAR{cmp\_value} should match that implied in the SYNOPSIS section. When using \Fortran, cmp\_value must be an integer of From fb03d298bcfabefede48b9e7f756dfa30da6dde9 Mon Sep 17 00:00:00 2001 From: "David M. Ozog" Date: Mon, 6 May 2019 14:34:17 -0400 Subject: [PATCH 197/319] Add example for shmem_wait_until_any_vector Signed-off-by: David M. Ozog --- example_code/shmem_wait_until_any_vector.c | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 example_code/shmem_wait_until_any_vector.c diff --git a/example_code/shmem_wait_until_any_vector.c b/example_code/shmem_wait_until_any_vector.c new file mode 100644 index 000000000..b9811bd1c --- /dev/null +++ b/example_code/shmem_wait_until_any_vector.c @@ -0,0 +1,41 @@ +#include +#include + +#define N 100 + +int main(void) +{ + int total_sum = 0; + + shmem_init(); + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); + + int *ivars = shmem_calloc(npes, sizeof(int)); + int *status = calloc(npes, sizeof(int)); + int *cmp_values = malloc(npes * sizeof(int)); + + /* All odd PEs put 2 and all even PEs put 1 */ + for (int i = 0; i < npes; i++) { + shmem_p(&ivars[mype], mype % 2 + 1, i); + + /* Set cmp_values to the expected values coming from each PE */ + cmp_values[i] = i % 2 + 1; + } + + for (int i = 0; i < npes; i++) { + size_t completed_idx = shmem_wait_until_any_vector(ivars, npes, status, SHMEM_CMP_EQ, cmp_values); + status[completed_idx] = 1; + total_sum += ivars[completed_idx]; + } + + /* check the result */ + int correct_result = npes + npes / 2 + npes % 2; + + if (total_sum != correct_result) { + shmem_global_exit(1); + } + + shmem_finalize(); + return 0; +} From 8eedcfa8ec3df70e7a35251522bd7ce96d7953d0 Mon Sep 17 00:00:00 2001 From: "David M. Ozog" Date: Mon, 6 May 2019 14:36:46 -0400 Subject: [PATCH 198/319] Remove 'respectively' from `cmp` description Signed-off-by: David M. Ozog --- content/shmem_test_all_vector.tex | 4 ++-- content/shmem_test_any_vector.tex | 4 ++-- content/shmem_test_some_vector.tex | 4 ++-- content/shmem_wait_until_all_vector.tex | 4 ++-- content/shmem_wait_until_any_vector.tex | 4 ++-- content/shmem_wait_until_some_vector.tex | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/content/shmem_test_all_vector.tex b/content/shmem_test_all_vector.tex index 7baa7f0aa..d7cbfb865 100644 --- a/content/shmem_test_all_vector.tex +++ b/content/shmem_test_all_vector.tex @@ -26,8 +26,8 @@ \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} that indicates which elements in \VAR{ivars} are excluded from the test set.} \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that - \newtext{respectively} compares elements of \VAR{ivars} with - \newtext{elements of} \VAR{cmp\_value\newtext{s}}.} + compares elements of \VAR{ivars} with \newtext{elements of} + \VAR{cmp\_value\newtext{s}}.} \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} containing values to be compared with the respective objects in \VAR{ivars}.}} diff --git a/content/shmem_test_any_vector.tex b/content/shmem_test_any_vector.tex index b46d419cb..dd2f176e1 100644 --- a/content/shmem_test_any_vector.tex +++ b/content/shmem_test_any_vector.tex @@ -26,8 +26,8 @@ \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} that indicates which elements in \VAR{ivars} are excluded from the test set.} \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that - \newtext{respectively} compares elements of \VAR{ivars} with - \newtext{elements of} \VAR{cmp\_value\newtext{s}}.} + compares elements of \VAR{ivars} with \newtext{elements of} + \VAR{cmp\_value\newtext{s}}.} \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} containing values to be compared with the respective objects in \VAR{ivars}.}} diff --git a/content/shmem_test_some_vector.tex b/content/shmem_test_some_vector.tex index ee0ff6279..93d40baa4 100644 --- a/content/shmem_test_some_vector.tex +++ b/content/shmem_test_some_vector.tex @@ -28,8 +28,8 @@ \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} that indicates which elements in \VAR{ivars} are excluded from the test set.} \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that - \newtext{respectively} compares elements of \VAR{ivars} with - \newtext{elements of} \VAR{cmp\_value\newtext{s}}.} + compares elements of \VAR{ivars} with \newtext{elements of} + \VAR{cmp\_value\newtext{s}}.} \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} containing values to be compared with the respective objects in \VAR{ivars}.}} diff --git a/content/shmem_wait_until_all_vector.tex b/content/shmem_wait_until_all_vector.tex index 2567f1e5a..3abc36d3e 100644 --- a/content/shmem_wait_until_all_vector.tex +++ b/content/shmem_wait_until_all_vector.tex @@ -25,8 +25,8 @@ \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} that indicates which elements in \VAR{ivars} are excluded from the wait set.} \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that - \newtext{respectively} compares elements of \VAR{ivars} with - \newtext{elements of} \VAR{cmp\_value\newtext{s}}.} + compares elements of \VAR{ivars} with \newtext{elements of} + \VAR{cmp\_value\newtext{s}}.} \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} containing values to be compared with the respective objects in \VAR{ivars}.}} diff --git a/content/shmem_wait_until_any_vector.tex b/content/shmem_wait_until_any_vector.tex index a738e8358..80ceb95d5 100644 --- a/content/shmem_wait_until_any_vector.tex +++ b/content/shmem_wait_until_any_vector.tex @@ -26,8 +26,8 @@ \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} that indicates which elements in \VAR{ivars} are excluded from the wait set.} \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that - \newtext{respectively} compares elements of \VAR{ivars} with - \newtext{elements of} \VAR{cmp\_value\newtext{s}}.} + compares elements of \VAR{ivars} with \newtext{elements of} + \VAR{cmp\_value\newtext{s}}.} \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} containing values to be compared with the respective objects in \VAR{ivars}.}} diff --git a/content/shmem_wait_until_some_vector.tex b/content/shmem_wait_until_some_vector.tex index 07796be28..da0d86617 100644 --- a/content/shmem_wait_until_some_vector.tex +++ b/content/shmem_wait_until_some_vector.tex @@ -28,8 +28,8 @@ \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} that indicates which elements in \VAR{ivars} are excluded from the wait set.} \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that - \newtext{respectively} compares elements of \VAR{ivars} with - \newtext{elements of} \VAR{cmp\_value\newtext{s}}.} + compares elements of \VAR{ivars} with \newtext{elements of} + \VAR{cmp\_value\newtext{s}}.} \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} containing values to be compared with the respective objects in \VAR{ivars}.}} From cbaca236fc6b0c160c616b679850f03c5edc6b4c Mon Sep 17 00:00:00 2001 From: "David M. Ozog" Date: Mon, 6 May 2019 16:09:58 -0400 Subject: [PATCH 199/319] Make wait/test API consistent wrt cmp/cmp_value(s) Signed-off-by: David M. Ozog --- content/shmem_test_all.tex | 8 +++--- content/shmem_test_all_vector.tex | 10 +++---- content/shmem_test_any.tex | 9 ++++--- content/shmem_test_any_vector.tex | 14 +++++----- content/shmem_test_some.tex | 9 ++++--- content/shmem_test_some_vector.tex | 21 ++++++++------- content/shmem_wait_until_all.tex | 3 +++ content/shmem_wait_until_all_vector.tex | 22 +++++++-------- content/shmem_wait_until_any.tex | 6 ++++- content/shmem_wait_until_any_vector.tex | 31 ++++++++++++++-------- content/shmem_wait_until_some.tex | 6 ++++- content/shmem_wait_until_some_vector.tex | 10 +++---- example_code/shmem_wait_until_any_vector.c | 3 ++- 13 files changed, 88 insertions(+), 64 deletions(-) diff --git a/content/shmem_test_all.tex b/content/shmem_test_all.tex index 601cf4f3b..a7ed78c22 100644 --- a/content/shmem_test_all.tex +++ b/content/shmem_test_all.tex @@ -35,10 +35,10 @@ The \FUNC{shmem\_test\_all} routine indicates whether all entries in the test set specified by \VAR{ivars} and \VAR{status} have satisfied the test condition at the calling \ac{PE}. This routine does not block and returns zero if - not all entries in \VAR{ivars} satisfied the test condition. This routine - compares each of the \VAR{nelems} elements in the \VAR{ivars} array with - the value \VAR{cmp\_value} according to the comparison operator \VAR{cmp} - at the calling \ac{PE}. + not all entries in \VAR{ivars} satisfied the test condition. + \newtext{This routine compares each element of the \VAR{ivars} array in the + test set with the value \VAR{cmp\_value} according to the comparison + operator \VAR{cmp} at the calling \ac{PE}}. If \VAR{nelems} is 0, the test set is empty and this routine returns 1. The optional \VAR{status} is a mask array of length \VAR{nelems} where each element diff --git a/content/shmem_test_all_vector.tex b/content/shmem_test_all_vector.tex index d7cbfb865..58036bb5e 100644 --- a/content/shmem_test_all_vector.tex +++ b/content/shmem_test_all_vector.tex @@ -38,11 +38,11 @@ entries in the test set specified by \VAR{ivars} and \VAR{status} have satisfied the test condition at the calling \ac{PE}. This routine does not block and returns zero if not all entries in \VAR{ivars} satisfied the test - condition\newtext{s}. This routine \newtext{respectively} compares each of - the \VAR{nelems} elements in the \VAR{ivars} array with \oldtext{the} - \newtext{each} value \newtext{in} \VAR{cmp\_value\newtext{s}} according to - the comparison operator \VAR{cmp} at the calling \ac{PE}. If \VAR{nelems} - is 0, the test set is empty and this routine returns 1. + condition\newtext{s}. \newtext{This routine compares each element of the + \VAR{ivars} array in the test set with each respective value in + \VAR{cmp\_values} according to the comparison operator \VAR{cmp} at the + calling \ac{PE}}. If \VAR{nelems} is 0, the test set is empty and this + routine returns 1. The optional \VAR{status} is a mask array of length \VAR{nelems} where each element corresponds to the respective element in \VAR{ivars} and indicates whether diff --git a/content/shmem_test_any.tex b/content/shmem_test_any.tex index bd2462242..0d2938d72 100644 --- a/content/shmem_test_any.tex +++ b/content/shmem_test_any.tex @@ -36,10 +36,11 @@ The \FUNC{shmem\_test\_any} routine indicates whether any entry in the test set specified by \VAR{ivars} and \VAR{status} has satisfied the test condition at the calling \ac{PE}. This routine does not block and returns \CONST{SIZE\_MAX} if - no entries in \VAR{ivars} satisfied the test condition. This routine - compares each of the \VAR{nelems} elements in the \VAR{ivars} array with - the value \VAR{cmp\_value} according to the comparison operator \VAR{cmp} - at the calling \ac{PE}. The order in which these elements are tested is + no entries in \VAR{ivars} satisfied the test condition. + \newtext{This routine compares each element of the \VAR{ivars} array in the + test set with the value \VAR{cmp\_value} according to the comparison + operator \VAR{cmp} at the calling \ac{PE}}. + The order in which these elements are tested is unspecified. If an entry $i$ in \VAR{ivars} within the test set satisfies the test condition, a series of calls to \FUNC{shmem\_test\_any} must eventually return $i$. diff --git a/content/shmem_test_any_vector.tex b/content/shmem_test_any_vector.tex index dd2f176e1..6528ac83a 100644 --- a/content/shmem_test_any_vector.tex +++ b/content/shmem_test_any_vector.tex @@ -38,13 +38,13 @@ entry in the test set specified by \VAR{ivars} and \VAR{status} has satisfied the test condition at the calling \ac{PE}. This routine does not block and returns \CONST{SIZE\_MAX} if no entries in \VAR{ivars} satisfied - the test condition. This routine \newtext{respectively} compares each of - the \VAR{nelems} elements in the \VAR{ivars} array with \oldtext{the} - \newtext{each} value \newtext{in} \VAR{cmp\_value\newtext{s}} according to - the comparison operator \VAR{cmp} at the calling \ac{PE}. The order in - which these elements are tested is unspecified. If an entry $i$ in - \VAR{ivars} within the test set satisfies the test condition, a series of - calls to \FUNC{shmem\_test\_any\newtext{\_vector}} must eventually return $i$. + the test condition. \newtext{This routine compares each element of the + \VAR{ivars} array in the test set with each respective value in + \VAR{cmp\_values} according to the comparison operator \VAR{cmp} at the + calling \ac{PE}}. The order in which these elements are tested is + unspecified. If an entry $i$ in \VAR{ivars} within the test set satisfies + the test condition, a series of calls to + \FUNC{shmem\_test\_any\newtext{\_vector}} must eventually return $i$. The optional \VAR{status} is a mask array of length \VAR{nelems} where each element corresponds to the respective element in \VAR{ivars} and indicates diff --git a/content/shmem_test_some.tex b/content/shmem_test_some.tex index e619e293a..ed32ec729 100644 --- a/content/shmem_test_some.tex +++ b/content/shmem_test_some.tex @@ -38,10 +38,11 @@ The \FUNC{shmem\_test\_some} routine indicates whether at least one entry in the test set specified by \VAR{ivars} and \VAR{status} satisfies the test condition at the calling \ac{PE}. This routine does not block and returns zero if - no entries in \VAR{ivars} satisfied the test condition. This routine - compares each element of the \VAR{ivars} array in the test set with the - value \VAR{cmp\_value} according to the comparison operator \VAR{cmp} at - the calling \ac{PE}. This routine tests all elements of \VAR{ivars} in the + no entries in \VAR{ivars} satisfied the test condition. + \newtext{This routine compares each element of the \VAR{ivars} array in the + test set with the value \VAR{cmp\_value} according to the comparison + operator \VAR{cmp} at the calling \ac{PE}}. + This routine tests all elements of \VAR{ivars} in the test set at least once, and the order in which the elements are tested is unspecified. If an entry $i$ in \VAR{ivars} within the test set satisfies the test condition, a series of calls to \FUNC{shmem\_test\_some} must diff --git a/content/shmem_test_some_vector.tex b/content/shmem_test_some_vector.tex index 93d40baa4..4eb866899 100644 --- a/content/shmem_test_some_vector.tex +++ b/content/shmem_test_some_vector.tex @@ -36,16 +36,17 @@ \end{apiarguments} \apidescription{ - The \FUNC{shmem\_test\_some\newtext{\_vector}} routine indicates whether at least one entry - in the test set specified by \VAR{ivars} and \VAR{status} satisfies the - test condition at the calling \ac{PE}. This routine does not block and returns zero if - no entries in \VAR{ivars} satisfied the test condition. This routine - \newtext{respectively} compares each element of the \VAR{ivars} array in the test set with \oldtext{the} \newtext{each} - value \newtext{in} \VAR{cmp\_value\newtext{s}} according to the comparison operator \VAR{cmp} at - the calling \ac{PE}. This routine tests all elements of \VAR{ivars} in the - test set at least once, and the order in which the elements are tested is - unspecified. If an entry $i$ in \VAR{ivars} within the test set satisfies - the test condition, a series of calls to \FUNC{shmem\_test\_some\newtext{\_vector}} must + The \FUNC{shmem\_test\_some\newtext{\_vector}} routine indicates whether at + least one entry in the test set specified by \VAR{ivars} and \VAR{status} + satisfies the test condition at the calling \ac{PE}. This routine does not + block and returns zero if no entries in \VAR{ivars} satisfied the test + condition. \newtext{This routine compares each element of the \VAR{ivars} + array in the test set with each respective value in \VAR{cmp\_values} + according to the comparison operator \VAR{cmp} at the calling \ac{PE}}. + This routine tests all elements of \VAR{ivars} in the test set at least + once, and the order in which the elements are tested is unspecified. If an + entry $i$ in \VAR{ivars} within the test set satisfies the test condition, + a series of calls to \FUNC{shmem\_test\_some\newtext{\_vector}} must eventually return $i$. Upon return, the \VAR{indices} array contains the indices of the elements diff --git a/content/shmem_wait_until_all.tex b/content/shmem_wait_until_all.tex index 8b947e099..c4dd1b1b7 100644 --- a/content/shmem_wait_until_all.tex +++ b/content/shmem_wait_until_all.tex @@ -35,6 +35,9 @@ The \FUNC{shmem\_wait\_until\_all} routine waits until all entries in the wait set specified by \VAR{ivars} and \VAR{status} have satisfied the wait condition at the calling \ac{PE}. If \VAR{nelems} is 0, the wait set is empty and this routine returns immediately. + \newtext{This routine compares each element of the \VAR{ivars} array in the + wait set with the value \VAR{cmp\_value} according to the comparison + operator \VAR{cmp} at the calling \ac{PE}}. This routine is semantically similar to \FUNC{shmem\_wait\_until} in Section~\ref{subsec:shmem_wait_until}, but adds support for point-to-point synchronization involving an array of diff --git a/content/shmem_wait_until_all_vector.tex b/content/shmem_wait_until_all_vector.tex index 3abc36d3e..6e5116a86 100644 --- a/content/shmem_wait_until_all_vector.tex +++ b/content/shmem_wait_until_all_vector.tex @@ -35,14 +35,14 @@ \apidescription{ The \FUNC{shmem\_wait\_until\_all\_vector} routine waits until all entries in the wait set specified by \VAR{ivars} and \VAR{status} have satisfied - the wait condition\newtext{s} at the calling \ac{PE}. \newtext{This - routine behaves very similarly to \FUNC{shmem\_wait\_until\_all}, but - supports multiple conditional values in the \VAR{cmp\_values} array.} If - \VAR{nelems} is 0, the wait set is empty and this routine returns - immediately. \oldtext{This routine is semantically similar to - \FUNC{shmem\_wait\_until} in Section~\ref{subsec:shmem_wait_until}, but - adds support for point-to-point synchronization involving an array of - symmetric data objects.} + the wait condition\newtext{s} at the calling \ac{PE}. If \VAR{nelems} is + 0, the wait set is empty and this routine returns immediately. + \newtext{This routine compares each element of the \VAR{ivars} array in the + wait set with each respective value in \VAR{cmp\_values} according to the + comparison operator \VAR{cmp} at the calling \ac{PE}}. \oldtext{This + routine is semantically similar to \FUNC{shmem\_wait\_until} in + Section~\ref{subsec:shmem_wait_until}, but adds support for point-to-point + synchronization involving an array of symmetric data objects.} The optional \VAR{status} is a mask array of length \VAR{nelems} where each element corresponds to the respective element in \VAR{ivars} and indicates @@ -50,9 +50,9 @@ \VAR{status} set to 0 will be included in the wait set, and elements set to 1 will be ignored. If all elements in \VAR{status} are set to 1 or \VAR{nelems} is 0, the wait set is empty and this routine returns - immediately. If \VAR{status} is a null pointer, it is ignored and - all elements in \VAR{ivars} are included in the wait set. The \VAR{ivars} - and \VAR{status} arrays must not overlap in memory. + immediately. If \VAR{status} is a null pointer, it is ignored and all + elements in \VAR{ivars} are included in the wait set. The \VAR{ivars} and + \VAR{status} arrays must not overlap in memory. } diff --git a/content/shmem_wait_until_any.tex b/content/shmem_wait_until_any.tex index 349c1ae75..634c396d6 100644 --- a/content/shmem_wait_until_any.tex +++ b/content/shmem_wait_until_any.tex @@ -35,7 +35,11 @@ \apidescription{ The \FUNC{shmem\_wait\_until\_any} routine waits until any one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies the wait - condition at the calling \ac{PE}. The order in which these elements are + condition at the calling \ac{PE}. + \newtext{This routine compares each element of the \VAR{ivars} array in the + wait set with the value \VAR{cmp\_value} according to the comparison + operator \VAR{cmp} at the calling \ac{PE}}. + The order in which these elements are waited upon is unspecified. If an entry $i$ in \VAR{ivars} within the wait set satisfies the wait condition, a series of calls to \FUNC{shmem\_wait\_until\_any} must eventually return $i$. diff --git a/content/shmem_wait_until_any_vector.tex b/content/shmem_wait_until_any_vector.tex index 80ceb95d5..c35c0ac27 100644 --- a/content/shmem_wait_until_any_vector.tex +++ b/content/shmem_wait_until_any_vector.tex @@ -36,13 +36,13 @@ \apidescription{ The \FUNC{shmem\_wait\_until\_any\_vector} routine waits until any one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies - the wait condition at the calling \ac{PE}. \newtext{This - routine behaves very similarly to \FUNC{shmem\_wait\_until\_any}, but - supports multiple conditional values in the \VAR{cmp\_values} array.} The - order in which these elements are waited upon is unspecified. If an entry - $i$ in \VAR{ivars} within the wait set satisfies the wait condition, a - series of calls to \FUNC{shmem\_wait\_until\_any\_vector} must eventually - return $i$. + the wait condition at the calling \ac{PE}. \newtext{This routine compares + each element of the \VAR{ivars} array in the wait set with each respective + value in \VAR{cmp\_values} according to the comparison operator \VAR{cmp} + at the calling \ac{PE}}. The order in which these elements are waited upon + is unspecified. If an entry $i$ in \VAR{ivars} within the wait set + satisfies the wait condition, a series of calls to + \FUNC{shmem\_wait\_until\_any\_vector} must eventually return $i$. The optional \VAR{status} is a mask array of length \VAR{nelems} where each element corresponds to the respective element in \VAR{ivars} and indicates @@ -50,10 +50,9 @@ \VAR{status} set to 0 will be included in the wait set, and elements set to 1 will be ignored. If all elements in \VAR{status} are set to 1 or \VAR{nelems} is 0, the wait set is empty and this routine returns - \CONST{SIZE\_MAX}. If - \VAR{status} is a null pointer, it is ignored and all elements in - \VAR{ivars} are included in the wait set. The \VAR{ivars} and \VAR{status} - arrays must not overlap in memory. + \CONST{SIZE\_MAX}. If \VAR{status} is a null pointer, it is ignored and + all elements in \VAR{ivars} are included in the wait set. The \VAR{ivars} + and \VAR{status} arrays must not overlap in memory. } \apireturnvalues{ @@ -73,6 +72,16 @@ \FUNC{shmem\_wait\_until\_any\newtext{\_vector}} to return. } +\color{ForestGreen}{ +\begin{apiexamples} + \apicexample + {The following \Cstd[11] example demonstrates the use of + \FUNC{shmem\_wait\_until\_any\_vector} to wait on values that differ + between even PEs and odd PEs.} + {./example_code/shmem_wait_until_any_vector.c} + {} +\end{apiexamples} +} \end{apidefinition} diff --git a/content/shmem_wait_until_some.tex b/content/shmem_wait_until_some.tex index 2729f09ba..834a2355a 100644 --- a/content/shmem_wait_until_some.tex +++ b/content/shmem_wait_until_some.tex @@ -37,7 +37,11 @@ \apidescription{ The \FUNC{shmem\_wait\_until\_some} routine waits until at least one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies the - wait condition at the calling \ac{PE}. This routine tests all elements of + wait condition at the calling \ac{PE}. + \newtext{This routine compares each element of the \VAR{ivars} array in the + wait set with the value \VAR{cmp\_value} according to the comparison + operator \VAR{cmp} at the calling \ac{PE}}. + This routine tests all elements of \VAR{ivars} in the wait set at least once, and the order in which the elements are waited upon is unspecified. diff --git a/content/shmem_wait_until_some_vector.tex b/content/shmem_wait_until_some_vector.tex index da0d86617..f92ca082a 100644 --- a/content/shmem_wait_until_some_vector.tex +++ b/content/shmem_wait_until_some_vector.tex @@ -39,11 +39,11 @@ The \FUNC{shmem\_wait\_until\_some\newtext{\_vector}} routine waits until at least one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies the wait condition at the calling \ac{PE}. - \newtext{This routine behaves very similarly to - \FUNC{shmem\_wait\_until\_some}, but supports multiple conditional values in - the \VAR{cmp\_values} array.} This routine tests all elements of - \VAR{ivars} in the wait set at least once, and the order in which the - elements are waited upon is unspecified. + \newtext{This routine compares each element of the \VAR{ivars} array in the + wait set with each respective value in \VAR{cmp\_values} according to the + comparison operator \VAR{cmp} at the calling \ac{PE}}. This routine tests + all elements of \VAR{ivars} in the wait set at least once, and the order in + which the elements are waited upon is unspecified. Upon return, the \VAR{indices} array contains the indices of at least one element in the wait set that satisfied the wait condition during the call diff --git a/example_code/shmem_wait_until_any_vector.c b/example_code/shmem_wait_until_any_vector.c index b9811bd1c..0977ed66c 100644 --- a/example_code/shmem_wait_until_any_vector.c +++ b/example_code/shmem_wait_until_any_vector.c @@ -24,7 +24,8 @@ int main(void) } for (int i = 0; i < npes; i++) { - size_t completed_idx = shmem_wait_until_any_vector(ivars, npes, status, SHMEM_CMP_EQ, cmp_values); + size_t completed_idx = shmem_wait_until_any_vector(ivars, npes, status, + SHMEM_CMP_EQ, cmp_values); status[completed_idx] = 1; total_sum += ivars[completed_idx]; } From 2aa3605518d68b7fb487cb584163a9379a3cdb4e Mon Sep 17 00:00:00 2001 From: "David M. Ozog" Date: Mon, 6 May 2019 16:19:25 -0400 Subject: [PATCH 200/319] Add "Wait and Test" to programming model summary Signed-off-by: David M. Ozog --- content/programming_model_overview.tex | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/content/programming_model_overview.tex b/content/programming_model_overview.tex index 47e456a08..ba017cb9c 100644 --- a/content/programming_model_overview.tex +++ b/content/programming_model_overview.tex @@ -108,6 +108,11 @@ \item \OPR{Barrier}: All or some \acp{PE} collectively synchronize and ensure completion of all remote and local updates prior to any \ac{PE} returning from the call. + \item \OPR{Wait and Test}: A PE calling a point-to-point synchronization + routine ensures the value of a local symmetric object meets a specified + condition. Wait operations block until the specified condition is + met, whereas test operations return immediately and indicate whether or + not the specified condition is met. \end{enumerate} \item \textbf{Collective Communication} From eeac4f40a473424f4dc0e1dd67df3bcf30f1a4c1 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Mon, 6 May 2019 17:10:15 -0400 Subject: [PATCH 201/319] Revert "Updated from March 28 WG discussion" This reverts commit 4cebc5bece12bf9ff96c4abad889438de4c2c073. --- content/shmem_ptr.tex | 14 +++++--------- content/shmem_test.tex | 4 ++-- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/content/shmem_ptr.tex b/content/shmem_ptr.tex index 889d987c8..81732013b 100644 --- a/content/shmem_ptr.tex +++ b/content/shmem_ptr.tex @@ -16,8 +16,7 @@ \begin{apiarguments} -\apiargument{IN}{dest}{The symmetric address of the remotely accessible data - object to be referenced.} +\apiargument{IN}{dest}{The symmetric data object to be referenced.} \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which \dest{} is to be accessed. When using \Fortran, it must be a default integer value.} @@ -27,10 +26,8 @@ \FUNC{shmem\_ptr} returns an address that may be used to directly reference \dest{} on the specified \ac{PE}. This address can be assigned to a pointer. After that, ordinary loads and stores to this remote address may be performed. - The address returned by \FUNC{shmem\_ptr} is a local address to a remotely - accessible data object. Providing this address to argument of an - \openshmem routine that requires a symmetric address to a remotely - accessible object results in undefined behavior. + The address returned by \FUNC{shmem\_ptr} is considered to be locally + accessible and is not valid where a remotely accessible address is required. The \FUNC{shmem\_ptr} routine can provide an efficient means to accomplish communication, for example when a sequence of reads and writes to a data @@ -40,9 +37,8 @@ } \apireturnvalues{ - A local pointer to the remotely accessible \dest{} data object is returned - when it can be accessed using memory loads and stores. Otherwise, a null - pointer is returned. + The address of the \dest{} data object is returned when it is accessible + using memory loads and stores. Otherwise, a null pointer is returned. } \apinotes{ diff --git a/content/shmem_test.tex b/content/shmem_test.tex index d9de7b3d5..fca8eed10 100644 --- a/content/shmem_test.tex +++ b/content/shmem_test.tex @@ -33,9 +33,9 @@ calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located within the calling \ac{PE} or within another \ac{PE}. - Implementations must ensure that \FUNC{shmem\_test} does not return 1 before + Implementations must ensure that \FUNC{shmem\_test} does not return true before the update of the memory indicated by \VAR{ivar} is fully complete. - Partial updates to the memory must not cause \FUNC{shmem\_test} to return 1. + Partial updates to the memory must not cause \FUNC{shmem\_test} to return true. } \apireturnvalues{ From ec15003f9abbbcac7dde08aa64e1b6e118e3a77c Mon Sep 17 00:00:00 2001 From: James Dinan Date: Mon, 6 May 2019 17:11:16 -0400 Subject: [PATCH 202/319] Revert "Clarify that shmem_ptr returns a locally accessible address" This reverts commit 70338e3d6846a2bfc03b65779b80d1b6aef5da3b. --- content/shmem_ptr.tex | 2 -- 1 file changed, 2 deletions(-) diff --git a/content/shmem_ptr.tex b/content/shmem_ptr.tex index 81732013b..98d8c592a 100644 --- a/content/shmem_ptr.tex +++ b/content/shmem_ptr.tex @@ -26,8 +26,6 @@ \FUNC{shmem\_ptr} returns an address that may be used to directly reference \dest{} on the specified \ac{PE}. This address can be assigned to a pointer. After that, ordinary loads and stores to this remote address may be performed. - The address returned by \FUNC{shmem\_ptr} is considered to be locally - accessible and is not valid where a remotely accessible address is required. The \FUNC{shmem\_ptr} routine can provide an efficient means to accomplish communication, for example when a sequence of reads and writes to a data From 3e9410ff3254659e75182ac363e688214e348a2e Mon Sep 17 00:00:00 2001 From: James Dinan Date: Mon, 6 May 2019 17:13:23 -0400 Subject: [PATCH 203/319] Remove partial updates text Signed-off-by: James Dinan --- content/shmem_test.tex | 3 +-- content/shmem_wait_until.tex | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/content/shmem_test.tex b/content/shmem_test.tex index fca8eed10..5a6cc86c7 100644 --- a/content/shmem_test.tex +++ b/content/shmem_test.tex @@ -33,9 +33,8 @@ calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located within the calling \ac{PE} or within another \ac{PE}. - Implementations must ensure that \FUNC{shmem\_test} does not return true before + Implementations must ensure that \FUNC{shmem\_test} does not return 1 before the update of the memory indicated by \VAR{ivar} is fully complete. - Partial updates to the memory must not cause \FUNC{shmem\_test} to return true. } \apireturnvalues{ diff --git a/content/shmem_wait_until.tex b/content/shmem_wait_until.tex index 90daf5a0c..8be5cbfb3 100644 --- a/content/shmem_wait_until.tex +++ b/content/shmem_wait_until.tex @@ -67,8 +67,7 @@ Implementations must ensure that \FUNC{shmem\_wait} and \FUNC{shmem\_wait\_until} do not return before the update of the memory - indicated by \VAR{ivar} is fully complete. Partial updates to the memory - must not cause \FUNC{shmem\_wait} or \FUNC{shmem\_wait\_until} to return. + indicated by \VAR{ivar} is fully complete. } From 909f4e6922830576808d9170f342fe3495f7bd6d Mon Sep 17 00:00:00 2001 From: "David M. Ozog" Date: Tue, 14 May 2019 14:29:37 -0400 Subject: [PATCH 204/319] Fix comparison bug in shmem_sync example w/ teams Signed-off-by: David M. Ozog --- example_code/shmem_sync_example.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example_code/shmem_sync_example.c b/example_code/shmem_sync_example.c index e6fb53e3e..00992a2d4 100644 --- a/example_code/shmem_sync_example.c +++ b/example_code/shmem_sync_example.c @@ -28,14 +28,14 @@ int main(void) int my_pe_twos = shmem_team_my_pe(twos_team); int my_pe_threes = shmem_team_my_pe(threes_team); - if (my_pe_twos != SHMEM_TEAM_NULL) { + if (twos_team != SHMEM_TEAM_NULL) { /* put the value 2 to the next team member in a circular fashion */ shmem_p(&x, 2, (me + 2) % npes); shmem_quiet(); shmem_sync(twos_team); } - if (my_pe_threes != SHMEM_TEAM_NULL) { + if (threes_team != SHMEM_TEAM_NULL) { /* put the value 3 to the next team member in a circular fashion */ shmem_p(&x, 3, (me + 3) % npes); shmem_quiet(); From 3c770744066a954cee1b39e381dccc68c68bfc66 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Sat, 1 Jun 2019 09:03:17 -0400 Subject: [PATCH 205/319] Rename shmem_teams_intro.tex -> teams_intro.tex --- content/{shmem_teams_intro.tex => teams_intro.tex} | 0 main_spec.tex | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename content/{shmem_teams_intro.tex => teams_intro.tex} (100%) diff --git a/content/shmem_teams_intro.tex b/content/teams_intro.tex similarity index 100% rename from content/shmem_teams_intro.tex rename to content/teams_intro.tex diff --git a/main_spec.tex b/main_spec.tex index 1839c1a6b..be89b01bb 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -122,7 +122,7 @@ \subsubsection{\textbf{SHPDEALLC}}\label{subsec:shpdeallc} \color{Green} \subsection{Team Management Routines}\label{subsec:team} -\input{content/shmem_teams_intro.tex} +\input{content/teams_intro.tex} \subsubsection{\textbf{SHMEM\_TEAM\_MY\_PE}}\label{subsec:shmem_team_my_pe} \input{content/shmem_team_my_pe.tex} From dc571559786201d22fa42dea0dbee4d73a64e3ef Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Sat, 1 Jun 2019 09:06:14 -0400 Subject: [PATCH 206/319] Rename SHMEM_TEAM_NULL -> SHMEM_TEAM_INVALID --- content/collective_intro.tex | 2 +- content/library_constants.tex | 2 +- content/shmem_alltoall.tex | 2 +- content/shmem_broadcast.tex | 6 +++--- content/shmem_collect.tex | 2 +- content/shmem_reductions.tex | 4 ++-- content/shmem_sync.tex | 4 ++-- content/shmem_team_destroy.tex | 2 +- content/shmem_team_get_config.tex | 2 +- content/shmem_team_my_pe.tex | 2 +- content/shmem_team_n_pes.tex | 2 +- content/shmem_team_split_2d.tex | 6 +++--- content/shmem_team_split_strided.tex | 8 ++++---- content/teams_intro.tex | 2 +- example_code/shmem_sync_example.c | 4 ++-- example_code/shmem_team_context.c | 4 ++-- example_code/shmem_team_split_strided.c | 2 +- example_code/shmem_team_translate.c | 2 +- 18 files changed, 29 insertions(+), 29 deletions(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index a8a0c4c0a..89aff6ddd 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -23,7 +23,7 @@ \openshmem team, which is specified by a team handle argument. Team-based collective operations require all \acp{PE} in the team to call the routine in order for the operation to complete. If an invalid team handle -or \LibConstRef{SHMEM\_TEAM\_NULL} is passed to a team-based collective +or \LibConstRef{SHMEM\_TEAM\_INVALID} is passed to a team-based collective routine, the behavior is undefined. Team objects encapsulate the per \ac{PE} system resources required to complete diff --git a/content/library_constants.tex b/content/library_constants.tex index 7e857964b..f8ac961cb 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -50,7 +50,7 @@ \tabularnewline \hline %% \color{Green} -\LibConstDecl{SHMEM\_TEAM\_NULL} & +\LibConstDecl{SHMEM\_TEAM\_INVALID} & \color{Green} Predefined constant that can be compared against handles of type \CTYPE{shmem\_team\_t} to determine if they refer to a valid team. diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 812e3dd7e..f6cd5c576 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -169,7 +169,7 @@ See section \ref{subsec:error_handling} for expected error checking and return code behavior specific to implementations. For portable error checking and debugging behavior, programs should do their own checks - for invalid team handles or \LibConstRef{SHMEM\_TEAM\_NULL}. + for invalid team handles or \LibConstRef{SHMEM\_TEAM\_INVALID}. } This routine restores \VAR{pSync} to its original contents. Multiple calls diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 7e18ae6f9..d73171003 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -95,7 +95,7 @@ Team-based broadcast routines operate over all \acp{PE} in the provided team argument. All \acp{PE} in the provided team must participate in the operation. - If an invalid team handle or \LibConstRef{SHMEM\_TEAM\_NULL} is passed to this routine, + If an invalid team handle or \LibConstRef{SHMEM\_TEAM\_INVALID} is passed to this routine, the behavior is undefined. As with all team-based \openshmem routines, \ac{PE} @@ -164,7 +164,7 @@ See section \ref{subsec:error_handling} for expected error checking and return code behavior specific to implementations. For portable error checking and debugging behavior, programs should do their own checks - for invalid team handles or \LibConstRef{SHMEM\_TEAM\_NULL} + for invalid team handles or \LibConstRef{SHMEM\_TEAM\_INVALID} } All \openshmem broadcast routines restore \VAR{pSync} to its original contents. @@ -186,7 +186,7 @@ Team handle error checking and integer return codes are currently undefined. Implementations may define these behaviors as needed, but programs should ensure portability by doing their own checks for invalid team handles and for - \LibConstRef{SHMEM\_TEAM\_NULL}. + \LibConstRef{SHMEM\_TEAM\_INVALID}. } \begin{apiexamples} diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 276a097f9..209099fe0 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -163,7 +163,7 @@ See section \ref{subsec:error_handling} for expected error checking and return code behavior specific to implementations. For portable error checking and debugging behavior, programs should do their own checks - for invalid team handles or \LibConstRef{SHMEM\_TEAM\_NULL}. + for invalid team handles or \LibConstRef{SHMEM\_TEAM\_INVALID}. } All \openshmem collective routines reset the values in \VAR{pSync} before they diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 081750f71..3c5b294cd 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -345,7 +345,7 @@ {\color{Green} Team-based reduction routines operate over all \acp{PE} in the provided team argument. All \acp{PE} in the provided team must participate in the reduction. If an invalid team handle - or \LibConstRef{SHMEM\_TEAM\_NULL} is passed to this routine, the behavior is undefined. + or \LibConstRef{SHMEM\_TEAM\_INVALID} is passed to this routine, the behavior is undefined. Active-set-based sync routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. @@ -440,7 +440,7 @@ See section \ref{subsec:error_handling} for expected error checking and return code behavior specific to implementations. For portable error checking and debugging behavior, programs should do their own checks - for invalid team handles or \LibConstRef{SHMEM\_TEAM\_NULL} + for invalid team handles or \LibConstRef{SHMEM\_TEAM\_INVALID} } All \openshmem reduction routines reset the values in \VAR{pSync} before they diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 66c81c006..633b44ef5 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -57,7 +57,7 @@ Team-based sync routines operate over all \acp{PE} in the provided team argument. All \acp{PE} in the provided team must participate in the sync operation. - If an invalid team handle or \LibConstRef{SHMEM\_TEAM\_NULL} + If an invalid team handle or \LibConstRef{SHMEM\_TEAM\_INVALID} is passed to this routine, the behavior is undefined. Active-set-based sync routines operate over all \acp{PE} in the active set @@ -93,7 +93,7 @@ See section \ref{subsec:error_handling} for expected error checking and return code behavior specific to implementations. For portable error checking and debugging behavior, programs should do their own checks - for invalid team handles or \LibConstRef{SHMEM\_TEAM\_NULL} + for invalid team handles or \LibConstRef{SHMEM\_TEAM\_INVALID} } If the \VAR{pSync} array is initialized at run time, another method of diff --git a/content/shmem_team_destroy.tex b/content/shmem_team_destroy.tex index 7177bb63b..9c12d1402 100644 --- a/content/shmem_team_destroy.tex +++ b/content/shmem_team_destroy.tex @@ -28,7 +28,7 @@ and a nonzero value is returned. After returning from the routine, if the team was successfully destroyed, -the handle will be assigned the value \LibConstRef{SHMEM\_TEAM\_NULL}. +the handle will be assigned the value \LibConstRef{SHMEM\_TEAM\_INVALID}. Team destruction assumes that any resources explicitly created from the team, such as contexts created from the team, have already been released through diff --git a/content/shmem_team_get_config.tex b/content/shmem_team_get_config.tex index d031ffef0..0e9c60a84 100644 --- a/content/shmem_team_get_config.tex +++ b/content/shmem_team_get_config.tex @@ -20,7 +20,7 @@ to input configuration parameters when the team was created. If the \VAR{team} argument does not specify a valid team, the behavior is -undefined. If \VAR{team} is equal to \LibConstRef{SHMEM\_TEAM\_NULL}, then config will be set to the null pointer. +undefined. If \VAR{team} is equal to \LibConstRef{SHMEM\_TEAM\_INVALID}, then config will be set to the null pointer. } \apireturnvalues{ diff --git a/content/shmem_team_my_pe.tex b/content/shmem_team_my_pe.tex index 43d41aa5a..af6c3293d 100644 --- a/content/shmem_team_my_pe.tex +++ b/content/shmem_team_my_pe.tex @@ -24,7 +24,7 @@ \apireturnvalues{ The number of the calling \ac{PE} within the provided team, or the value -\CONST{-1} if the provided team compares equal to \LibConstRef{SHMEM\_TEAM\_NULL}. +\CONST{-1} if the provided team compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID}. } \apinotes{ diff --git a/content/shmem_team_n_pes.tex b/content/shmem_team_n_pes.tex index f4dc45755..260726733 100644 --- a/content/shmem_team_n_pes.tex +++ b/content/shmem_team_n_pes.tex @@ -26,7 +26,7 @@ \apireturnvalues{ Total number of \acp{PE} in the provided team, or the value -\CONST{-1} if the provided team compares equal to \LibConstRef{SHMEM\_TEAM\_NULL}. +\CONST{-1} if the provided team compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID}. } \apinotes{ diff --git a/content/shmem_team_split_2d.tex b/content/shmem_team_split_2d.tex index c5e0d39ac..bfed1b43d 100644 --- a/content/shmem_team_split_2d.tex +++ b/content/shmem_team_split_2d.tex @@ -92,12 +92,12 @@ If \VAR{parent\_team} is an invalid team handle, the behavior is undefined. -If \VAR{parent\_team} compares equal to \LibConstRef{SHMEM\_TEAM\_NULL}, no new +If \VAR{parent\_team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID}, no new teams will be created, and both \VAR{xaxis\_team} and \VAR{yaxis\_team} -will be assigned the value \LibConstRef{SHMEM\_TEAM\_NULL}. +will be assigned the value \LibConstRef{SHMEM\_TEAM\_INVALID}. If either team cannot be created, that team will be assigned the value -\LibConstRef{SHMEM\_TEAM\_NULL}. +\LibConstRef{SHMEM\_TEAM\_INVALID}. } \apireturnvalues{ diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 7fff6e532..c2eda7e6d 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -46,7 +46,7 @@ This routine must be called by all processes contained in the \ac{PE} triplet specification. It may be called by additional \acp{PE} not included in the triplet specification, but for those processes a \VAR{new\_team} value of -\LibConstRef{SHMEM\_TEAM\_NULL} is returned. All calling processes must provide the +\LibConstRef{SHMEM\_TEAM\_INVALID} is returned. All calling processes must provide the same values for the \ac{PE} triplet. This routine will return a \VAR{new\_team} containing the \ac{PE} subset specified by the triplet, and ordered by the existing global \ac{PE} number. None of the parameters need to reside in @@ -64,14 +64,14 @@ If \VAR{parent\_team} is an invalid team handle, the behavior is undefined. -If \VAR{parent\_team} compares equal to \LibConstRef{SHMEM\_TEAM\_NULL}, then no +If \VAR{parent\_team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID}, then no new team will be created, and \VAR{new\_team} will be assigned the value -\LibConstRef{SHMEM\_TEAM\_NULL}. +\LibConstRef{SHMEM\_TEAM\_INVALID}. If an invalid \ac{PE} triplet is provided, then the \VAR{new\_team} will not be created. If \VAR{new\_team} cannot be created, then it will be assigned the value -\LibConstRef{SHMEM\_TEAM\_NULL}. +\LibConstRef{SHMEM\_TEAM\_INVALID}. } \apireturnvalues{ diff --git a/content/teams_intro.tex b/content/teams_intro.tex index 5bd40b41f..158a45235 100644 --- a/content/teams_intro.tex +++ b/content/teams_intro.tex @@ -26,7 +26,7 @@ \subsubsection*{Team Handles and Predefined Teams} semantics only. That is, team handles should not be stored in shared variables and used across other \acp{PE}. Doing so will result in undefined behavior. -A special team handle value, \LibConstRef{SHMEM\_TEAM\_NULL}, may be used to +A special team handle value, \LibConstRef{SHMEM\_TEAM\_INVALID}, may be used to indicate that a returned team handle is not valid. This value can be tested against to check for successful split operations and can be assigned to user declared team handles as a sentinel value. diff --git a/example_code/shmem_sync_example.c b/example_code/shmem_sync_example.c index e6fb53e3e..f5d40fed9 100644 --- a/example_code/shmem_sync_example.c +++ b/example_code/shmem_sync_example.c @@ -28,14 +28,14 @@ int main(void) int my_pe_twos = shmem_team_my_pe(twos_team); int my_pe_threes = shmem_team_my_pe(threes_team); - if (my_pe_twos != SHMEM_TEAM_NULL) { + if (my_pe_twos != SHMEM_TEAM_INVALID) { /* put the value 2 to the next team member in a circular fashion */ shmem_p(&x, 2, (me + 2) % npes); shmem_quiet(); shmem_sync(twos_team); } - if (my_pe_threes != SHMEM_TEAM_NULL) { + if (my_pe_threes != SHMEM_TEAM_INVALID) { /* put the value 3 to the next team member in a circular fashion */ shmem_p(&x, 3, (me + 3) % npes); shmem_quiet(); diff --git a/example_code/shmem_team_context.c b/example_code/shmem_team_context.c index dc41ae1ce..343417fec 100644 --- a/example_code/shmem_team_context.c +++ b/example_code/shmem_team_context.c @@ -19,7 +19,7 @@ int my_ctx_translate_pe(shmem_ctx_t src_ctx, int src_pe, shmem_ctx_t dest_ctx) } shmem_ctx_t my_team_create_ctx(shmem_team_t team) { - if (team == SHMEM_TEAM_NULL) { + if (team == SHMEM_TEAM_INVALID) { return SHMEM_CTX_INVALID; } @@ -83,7 +83,7 @@ int main() shmem_team_sync(SHMEM_TEAM_WORLD); // We will add up some results on pe 4 of team_3s using ctx_2s - if ((team_3s != SHMEM_TEAM_NULL) && (team_2s != SHMEM_TEAM_NULL)) { + if ((team_3s != SHMEM_TEAM_INVALID) && (team_2s != SHMEM_TEAM_INVALID)) { int _pe4_of_3s_in_2s = my_ctx_translate_pe(ctx_3s, 4, ctx_2s); if (_pe4_of_3s_in_2s < 0) { diff --git a/example_code/shmem_team_split_strided.c b/example_code/shmem_team_split_strided.c index dfb0c8137..42973ba57 100644 --- a/example_code/shmem_team_split_strided.c +++ b/example_code/shmem_team_split_strided.c @@ -21,7 +21,7 @@ int main(int argc, char *argv[]) shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, npes / 2, config, 0, &new_team); - if (new_team != SHMEM_TEAM_NULL) { + if (new_team != SHMEM_TEAM_INVALID) { t_size = shmem_team_n_pes(new_team); t_pe = shmem_team_my_pe(new_team); diff --git a/example_code/shmem_team_translate.c b/example_code/shmem_team_translate.c index d1fb796fd..b1ae74b1a 100644 --- a/example_code/shmem_team_translate.c +++ b/example_code/shmem_team_translate.c @@ -22,7 +22,7 @@ int main(int argc, char *argv[]) shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, npes / 2, config, 0, &new_team); - if (new_team != SHMEM_TEAM_NULL) { + if (new_team != SHMEM_TEAM_INVALID) { t_pe = shmem_team_my_pe(new_team); t_global = shmem_team_translate(new_team, t_pe, SHMEM_TEAM_WORLD); From 3e2a38bd42895549a30a0bc76a6ec1b2cd4ac4f4 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Sat, 1 Jun 2019 09:07:28 -0400 Subject: [PATCH 207/319] Clarify definition of SHMEM_TEAM_INVALID --- content/library_constants.tex | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/content/library_constants.tex b/content/library_constants.tex index f8ac961cb..5a5850e10 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -52,8 +52,11 @@ \color{Green} \LibConstDecl{SHMEM\_TEAM\_INVALID} & \color{Green} -Predefined constant that can be compared against handles of type -\CTYPE{shmem\_team\_t} to determine if they refer to a valid team. +A value corresponding to an invalid team. +This value can be used to initialize or update team handles to indicate +that they do not reference a valid team. +When managed in this way, applications can use an equality comparison +to test whether a given team handle references a valid team. See Section~\ref{subsec:team} for more detail about its use. \tabularnewline \hline %% From 2a4102f2e36e2ddbf0ed47e2e2f9e0be1adf8f0a Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Sat, 1 Jun 2019 09:27:26 -0400 Subject: [PATCH 208/319] Emphasize *collective* synchronizations w.r.t. team --- content/library_handles.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/library_handles.tex b/content/library_handles.tex index e7c94a647..0899e61e5 100644 --- a/content/library_handles.tex +++ b/content/library_handles.tex @@ -18,7 +18,7 @@ \color{Green} Handle of type \CTYPE{shmem\_team\_t} that corresponds to the default team of all \acp{PE} in the \openshmem program. All point-to-point -communication operations and synchronizations that do not specify a team +communication operations and collective synchronizations that do not specify a team are performed on the default team. See Section~\ref{subsec:team} for more detail about its use. \tabularnewline \hline From 4dcc9e54d443d7cc2e12e7bf01697e69abee1028 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Sat, 1 Jun 2019 10:28:28 -0400 Subject: [PATCH 209/319] Partial revision of the teams intro --- content/teams_intro.tex | 119 ++++++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 59 deletions(-) diff --git a/content/teams_intro.tex b/content/teams_intro.tex index 158a45235..be736b8fd 100644 --- a/content/teams_intro.tex +++ b/content/teams_intro.tex @@ -1,65 +1,66 @@ The \acp{PE} in an \openshmem program communicate using either -point-to-point routines that specify the \ac{PE} number of the target -\ac{PE} or collective routines that operate over some predefined -set of \acp{PE}. Teams in \openshmem allow programs to group subsets -of \acp{PE} for communications. Collective communications operate on -teams objects across the \acp{PE} in the team. Point-to-point routines -can make use of team based renumbering of \acp{PE} by utilizing team -based contexts or \ac{PE} number translation. - -An \openshmem team is a set of \acp{PE} defined by calling a specific team -split routine with a parent team argument and other arguments to -specify how the parent team is to be split into one or more new teams. -Any team created by a \FUNC{shmem\_team\_split\_*} routine can subsequently -be used as the parent team for further calls to team split routines. -A team persists and can be used for team-based routine calls -until it is destroyed by \FUNC{shmem\_team\_destroy}. - -Every team must have a least one member. Any attempt to create a team over an -empty set of \acp{PE} will result in no new team being created. - -\subsubsection*{Team Handles and Predefined Teams} - -A ``team handle'' is an opaque object with type \CTYPE{shmem\_team\_t} that is used -to reference a defined team. Team handles are created by one of the team split -routines and destroyed by the team destroy routine. Team handles have local -semantics only. That is, team handles should not be stored in shared variables -and used across other \acp{PE}. Doing so will result in undefined behavior. - -A special team handle value, \LibConstRef{SHMEM\_TEAM\_INVALID}, may be used to -indicate that a returned team handle is not valid. This value can be tested -against to check for successful split operations and can be assigned to user -declared team handles as a sentinel value. - -By default, \openshmem creates predefined teams that will be available -for use once the routine \FUNC{shmem\_init} has been called. See -Section~\ref{subsec:library_handles} for a description of all predefined team handles -provided by \openshmem. Predefined \CTYPE{shmem\_team\_t} handles can be used as -the parent team when creating new \openshmem teams. - -Every \ac{PE} is a member of the default team, which may be referenced -through the team handle \LibHandleRef{SHMEM\_TEAM\_WORLD}. -The \ac{PE} number in the default team is equal to the -value of its \ac{PE} number as returned by \FUNC{shmem\_my\_pe}. - -\subsubsection*{Team Objects and Multithreading Within a \ac{PE}} - -Team handles are passed as arguments to a variety of \openshmem routines, -including collective routines (see Section~\ref{subsec:coll}), include team -creation routines. While \openshmem routines are thread-safe as -per threading model (see section \ref{subsec:thread_support}),\openshmem -teams objects are not themselves thread-safe. It is the responsibility -of the application to ensure that there are no simultaneous collective -routines operating on the same \openshmem team on a given \ac{PE}. - -\subsubsection*{Team Objects and Collective Ordering across \acp{PE}} - -In \openshmem, a team object encapsulates resources uses to communicate +point-to-point routines, which specify the \ac{PE} number of the target +\ac{PE}, or collective routines, which operate over a set of \acp{PE}. +In \openshmem, teams allow programs to group a set of \acp{PE} for +communication. +Team-based collective communications operate across all the \acp{PE} +in a valid team. +Point-to-point communication can make use of team-relative \ac{PE} +numbering through team-based contexts (see Section~\ref{sec:ctx}) or +\ac{PE} number translation. + +\subsubsection*{Predefined and Program-Defined Teams} + +An \openshmem team may be predefined (i.e., provided by the \openshmem +library) or defined by the \openshmem program. +A program-defined team is created by ``splitting'' a parent team into +one or more new teams---each with some subset of \acp{PE} of the +parent team---via one of the \FUNC{shmem\_team\_split\_*} routines. + +All predefined teams are valid for the duration of the \openshmem +portion of an application. +Any team successfully created by a \FUNC{shmem\_team\_split\_*} +routine is valid until it is destroyed. +All valid teams have a least one member. + +\subsubsection*{Team Handles} + +A ``team handle'' is an opaque object with type \CTYPE{shmem\_team\_t} +that is used to reference a team. +Team handles are not remotely accessible objects +The predefined teams may be accessed via the team handles listed in +Section~\ref{subsec:library_handles}. + +\openshmem communication routines that do not accept a team handle +argument operate on the default team, which may be accessed through +the \LibHandleRef{SHMEM\_TEAM\_WORLD} handle. +The default team encompasses the set of all \acp{PE} in the \openshmem +program, and a \ac{PE} number in the default team is the same as the +value returned by \FUNC{shmem\_my\_pe}. + +A team handle may be initialized to or assigned the value +\CONST{SHMEM\_TEAM\_INVALID} to indicate that handle does not +reference a valid team. +When managed in this way, applications can use an equality comparison +to test whether a given team handle references a valid team. + +\subsubsection*{Thread Safety} + +When it is allowed by the threading model provided by the OpenSHMEM +library, a team may be used concurrently in non-collective operations +(e.g., \FUNC{shmem\_team\_my\_pe}) by multiple threads within the +\ac{PE} where it was created. +For collective operations, a team may not be used concurrently by +multiple threads in the same \ac{PE}. + +\subsubsection*{Collective Ordering} + +In \openshmem, a team object encapsulates resources used to communicate between \acp{PE} in collective operations. When calling multiple subsequent -collective operations on a team, the collective operations -- along with any -relevant team based resources -- are matched across the \acp{PE} in the team +collective operations on a team, the collective operations---along with any +relevant team based resources---are matched across the \acp{PE} in the team based on ordering of collective routine calls. It is the responsibility -of the application to ensure a consistent ordering of collective routine calls +of the \openshmem program to ensure the same ordering of collective routine calls across all \acp{PE} in a team. There is no need for explicit synchronization between subsequent calls From 53d998782060614b43b4f2f37fae73100a1035b7 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Wed, 5 Jun 2019 18:22:34 -0500 Subject: [PATCH 210/319] RM P2P changes from put-with-sig proposal --- content/library_constants.tex | 12 ------------ content/p2p_sync_intro.tex | 23 ----------------------- content/shmem_test.tex | 3 +-- content/shmem_wait_until.tex | 14 ++++---------- 4 files changed, 5 insertions(+), 47 deletions(-) diff --git a/content/library_constants.tex b/content/library_constants.tex index 3c085a25c..dfbcba344 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -286,16 +286,4 @@ See Section~\ref{subsec:p2p_intro} for more detail about its use. \tabularnewline \hline %% -\color{ForestGreen} -\LibConstDecl{SHMEM\_SIGNAL\_UPDATE} -& -\color{ForestGreen} -An integer constant expression corresponding to a hint used in point-to-point -synchronization. It specifies that the remotely accessible data object on a -local \ac{PE} used for synchronization is expected to be updated as a signal -through \openshmem routines described in Section~\ref{subsec:shmem_put_signal} -and Section~\ref{subsec:shmem_put_signal_nbi}. See Section~\ref{subsec:p2p_intro} -for more detail about its use. -\tabularnewline \hline -%% \end{longtable} diff --git a/content/p2p_sync_intro.tex b/content/p2p_sync_intro.tex index 30eb9a412..172300c62 100644 --- a/content/p2p_sync_intro.tex +++ b/content/p2p_sync_intro.tex @@ -70,26 +70,3 @@ \label{p2p-consts} \end{center} \end{table} - -\color{ForestGreen} -The point-to-point synchronization interface provides support for passing hints -to specify the \openshmem routine that is expected to update the remotely -accessible data object on a local \ac{PE} on which the synchronization operation -is performed. The hints are named constants whose values are integer constant -expressions. The hint names and the associated \openshmem routines that are -expected to update the remotely accessible data object on the local \ac{PE} are -presented in Table~\ref{p2p-hints}. -\begin{table}[h] - \begin{center} - \begin{tabular}{ll} - \hline - Hint Name & Associated \openshmem routines \\ \hline - \LibConstRef{SHMEM\_SIGNAL\_UPDATE} & Data object used for signalling - through routines from Sections~\ref{subsec:shmem_put_signal} and - ~\ref{subsec:shmem_put_signal_nbi} \\ \hline - \end{tabular} - \TableCaptionRef{Point-to-Point Hint Constants} - \label{p2p-hints} - \end{center} -\end{table} -\color{Black} diff --git a/content/shmem_test.tex b/content/shmem_test.tex index 22fb4b4e6..ac739fea2 100644 --- a/content/shmem_test.tex +++ b/content/shmem_test.tex @@ -20,8 +20,7 @@ \apiargument{IN}{ivar}{A pointer to a remotely accessible data object.} \apiargument{IN}{cmp}{The comparison operator that compares \VAR{ivar} with - \VAR{cmp\_value} \color{ForestGreen} and it may be used with point-to-point - hint constants by combining them with a bitwise OR operation.\color{Black}} + \VAR{cmp\_value}.} \apiargument{IN}{cmp\_value}{The value against which the object pointed to by \VAR{ivar} will be compared.} diff --git a/content/shmem_wait_until.tex b/content/shmem_wait_until.tex index 24b1ce9fa..9b536cb64 100644 --- a/content/shmem_wait_until.tex +++ b/content/shmem_wait_until.tex @@ -5,13 +5,13 @@ \begin{apidefinition} \begin{C11synopsis} -TYPE @\FuncDecl{shmem\_wait\_until}@(TYPE *ivar, int cmp, TYPE cmp_value); +void @\FuncDecl{shmem\_wait\_until}@(TYPE *ivar, int cmp, TYPE cmp_value); \end{C11synopsis} where \TYPE{} is one of the point-to-point synchronization types specified by Table \ref{p2psynctypes}. \begin{Csynopsis} -TYPE @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until}@(TYPE *ivar, int cmp, TYPE cmp_value); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until}@(TYPE *ivar, int cmp, TYPE cmp_value); \end{Csynopsis} where \TYPE{} is one of the point-to-point synchronization types and has a corresponding \TYPENAME{} specified by Table~\ref{p2psynctypes}. @@ -43,9 +43,7 @@ section.} \apiargument{IN}{cmp}{The comparison operator that compares \VAR{ivar} with \VAR{cmp\_value}. When using \Fortran, it must be of default kind. - When using \CorCpp, it must be of type \CTYPE{int} \color{ForestGreen} and - it may be used with point-to-point hint constants by combining them with a - bitwise OR operation.\color{Black}} + When using \CorCpp, it must be of type \CTYPE{int}.} \apiargument{IN}{cmp\_value}{\VAR{cmp\_value} must be of type integer. When using \CorCpp, the type of \VAR{cmp\_value} should match that implied in the SYNOPSIS section. When using \Fortran, cmp\_value must be an integer of @@ -83,13 +81,9 @@ \apitablerow{shmem\_int4\_wait, shmem\_int4\_wait\_until}{INTEGER*4} \apitablerow{shmem\_int8\_wait, shmem\_int8\_wait\_until}{INTEGER*8} -\color{ForestGreen} \apireturnvalues{ - When using \CorCpp, returns the contents of the symmetric data object, - \VAR{ivar}, at the calling \ac{PE} that satisfies the wait condition. The - data type of the return value is same as the type of \VAR{ivar}. + None } -\color{Black} \apinotes{ As of \openshmem[1.4], the \FUNC{shmem\_wait} routine is deprecated; From e7a379333adaa575345cd9e1a9b77c9d14424675 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Wed, 5 Jun 2019 19:04:32 -0500 Subject: [PATCH 211/319] Add support for shmem_signal_wait_until --- content/shmem_signal_wait_until.tex | 55 +++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 content/shmem_signal_wait_until.tex diff --git a/content/shmem_signal_wait_until.tex b/content/shmem_signal_wait_until.tex new file mode 100644 index 000000000..cd0064c43 --- /dev/null +++ b/content/shmem_signal_wait_until.tex @@ -0,0 +1,55 @@ +\color{ForestGreen} +\apisummary{ + Wait for a variable on the local \ac{PE} to change from a signaling operation. +} + +\begin{apidefinition} + +\begin{Csynopsis} +uint64_t @\FuncDecl{shmem\_signal\_wait\_until}@(uint64_t *sig_addr, int cmp, uint64_t cmp_value); +\end{Csynopsis} + +\begin{apiarguments} + +\apiargument{OUT}{sig\_addr}{A pointer to a remotely accessible variable.} +\apiargument{IN}{cmp}{The comparison operator that compares \VAR{sig\_addr} with + \VAR{cmp\_value}.} +\apiargument{IN}{cmp\_value}{The value against which the object pointed to + by \VAR{sig\_addr} will be compared.} + +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_signal\_wait\_until} operation blocks until the value contained + in the symmetric data object, \VAR{sig\_addr}, at the calling \ac{PE} + satisfies the wait condition. In an \openshmem program with single-threaded + or multithreaded \acp{PE}, the \VAR{sig\_addr} object at the calling \ac{PE} + is expected only to be updated as a signal, through the signalling operations + available in Section~\ref{subsec:shmem_signal}. + + This routine can be used to implement point-to-point synchronization between + \acp{PE} or between threads within the same \ac{PE}. A call to this routine + blocks until the value of \VAR{sig\_addr} at the calling \ac{PE} satisfies + the wait condition specified by the comparison operator, \VAR{cmp}, and + comparison value, \VAR{cmp\_value}. +} + +\apireturnvalues{ + Return the contents of the symmetric data object, \VAR{sig\_addr}, at the + calling \ac{PE} that satisfies the wait condition. +} + + +\apinotes{ + None. +} + +\apiimpnotes{ + Implementations must ensure that \FUNC{shmem\_signal\_wait\_until} do not + return before the update of the memory indicated by \VAR{sig\_addr} is fully + complete. Partial updates to the memory must not cause + \FUNC{shmem\_signal\_wait\_until} to return. +} + +\end{apidefinition} +\color{Black} From be43a4bbaed5373d2ea662801900e23c6553b84f Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Fri, 7 Jun 2019 16:50:02 -0400 Subject: [PATCH 212/319] Revise shmem_team_{my_pe, n_pes} --- content/shmem_team_my_pe.tex | 28 ++++++++++++++++------------ content/shmem_team_n_pes.tex | 30 ++++++++++++++++-------------- 2 files changed, 32 insertions(+), 26 deletions(-) diff --git a/content/shmem_team_my_pe.tex b/content/shmem_team_my_pe.tex index af6c3293d..74a216e7a 100644 --- a/content/shmem_team_my_pe.tex +++ b/content/shmem_team_my_pe.tex @@ -1,5 +1,5 @@ \apisummary{ - Returns the number of the calling \ac{PE} within the provided team. + Returns the number of the calling \ac{PE} within a specified team. } \begin{apidefinition} @@ -9,26 +9,30 @@ \end{Csynopsis} \begin{apiarguments} -\apiargument{IN}{team}{A valid \openshmem team handle.} +\apiargument{IN}{team}{An \openshmem team handle.} \end{apiarguments} \apidescription{ -The \FUNC{shmem\_team\_my\_pe} routine returns the number of calling \ac{PE} within the -provided team. The number will be a value between 0 and N-1, -for a team of size N. Each member of the team has a unique number. -For the team \LibHandleRef{SHMEM\_TEAM\_WORLD}, this will return the same value -as \FUNC{shmem\_my\_pe}. - -If the provided team handle is invalid, the behavior is undefined. + When \VAR{team} specifies a valid team, the + \FUNC{shmem\_team\_my\_pe} routine returns the number of the calling + \ac{PE} within the specified team. + The number is an integer between $0$ and $N-1$ for a team of size $N$. + Each member of the team has a unique number. + + When \VAR{team} specifies an invalid team, if \VAR{team} compares + equal to \LibConstRef{SHMEM\_TEAM\_INVALID}, then the value + \CONST{-1} is returned; otherwise, the behavior is undefined. } \apireturnvalues{ -The number of the calling \ac{PE} within the provided team, or the value -\CONST{-1} if the provided team compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID}. + The number of the calling \ac{PE} within the specified team, or the + value \CONST{-1} if the team handle compares equal to + \LibConstRef{SHMEM\_TEAM\_INVALID}. } \apinotes{ -None. + For the default team, this routine will return the same value as + \FUNC{shmem\_my\_pe}. } \end{apidefinition} diff --git a/content/shmem_team_n_pes.tex b/content/shmem_team_n_pes.tex index 260726733..20a99edf9 100644 --- a/content/shmem_team_n_pes.tex +++ b/content/shmem_team_n_pes.tex @@ -1,5 +1,5 @@ \apisummary{ - Returns the total number of \acp{PE} in the provided team. + Returns the number of \acp{PE} in a specified team. } \begin{apidefinition} @@ -9,28 +9,30 @@ \end{Csynopsis} \begin{apiarguments} -\apiargument{IN}{team}{A valid \openshmem team handle.} +\apiargument{IN}{team}{An \openshmem team handle.} \end{apiarguments} \apidescription{ -The \FUNC{shmem\_team\_n\_pes} routine returns the number of \acp{PE} in the -team. This will always be a value between 1 and N, where N is the total number of -\acp{PE} accessible to the \openshmem program. For the team -\LibHandleRef{SHMEM\_TEAM\_WORLD}, this routine will return the same value as -\FUNC{shmem\_n\_pes}. - -All \acp{PE} in the team will get back the same value for the team size. - -If the provided team handle is invalid, the behavior is undefined. + When \VAR{team} specifies a valid team, the + \FUNC{shmem\_team\_n\_pes} routine returns the number of \acp{PE} in + the team. + This will always be a value between $1$ and $N$, where $N$ is the + total number of \acp{PE} running in the \openshmem program. + + When \VAR{team} specifies an invalid team, if \VAR{team} compares + equal to \LibConstRef{SHMEM\_TEAM\_INVALID}, then the value + \CONST{-1} is returned; otherwise, the behavior is undefined. } \apireturnvalues{ -Total number of \acp{PE} in the provided team, or the value -\CONST{-1} if the provided team compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID}. + The number of \acp{PE} in the specified team, or the value + \CONST{-1} if the team handle compares equal to + \LibConstRef{SHMEM\_TEAM\_INVALID}. } \apinotes{ -None. + For the default team, this routine will return the same value as + \FUNC{shmem\_n\_pes}. } \end{apidefinition} From 286aa84e11e8a31fe123a58a9b38223453b0b325 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Fri, 7 Jun 2019 16:56:31 -0400 Subject: [PATCH 213/319] Formatting, whitespace, and indentation cleanup for shmem_team_config_t --- content/shmem_team_config_t.tex | 83 +++++++++++++++++---------------- 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/content/shmem_team_config_t.tex b/content/shmem_team_config_t.tex index d8dcddd59..720f9e278 100644 --- a/content/shmem_team_config_t.tex +++ b/content/shmem_team_config_t.tex @@ -10,59 +10,62 @@ } shmem_team_config_t; \end{Csynopsis} - \vspace{1.0em} +\begin{apiarguments} + None. +\end{apiarguments} - \apidescription{ - A team configuration argument acts as an input - \FUNC{shmem\_team\_split\_*} routines. - It specifies the requested capabilities of the team to be - created. - The \VAR{num\_contexts} member specifies the total number of simultaneously - existing contexts that the program requests to create from this team. - These contexts may be created in any number of threads. Successful - creation of a team configured with \VAR{num\_contexts} of $N$ means - that the implementation will make a best effort to reserve enough - resources to allow the team to have $N$ contexts created from the team - in existance at any given time. It is not a guaruntee that $N$ - calls to \FUNC{shmem\_team\_create\_ctx} will succeed. - See Section~\ref{sec:ctx} for more on communication contexts and - Section~\ref{subsec:shmem_team_create_ctx} for team-based context creation. +\apidescription{ + A team configuration argument acts as an input + \FUNC{shmem\_team\_split\_*} routines. + It specifies the requested capabilities of the team to be + created. - When using the configuration structure to create teams, a mask parameter - controls which fields to use. - Any configuration parameter value that is not indicated in the mask will be - ignored, and the default value will be used instead. - Therefore, a program does not have to set all fields in the config struct; - only those for which it does not want the default values. + The \VAR{num\_contexts} member specifies the total number of simultaneously + existing contexts that the program requests to create from this team. + These contexts may be created in any number of threads. Successful + creation of a team configured with \VAR{num\_contexts} of $N$ means + that the implementation will make a best effort to reserve enough + resources to allow the team to have $N$ contexts created from the team + in existance at any given time. It is not a guaruntee that $N$ + calls to \FUNC{shmem\_team\_create\_ctx} will succeed. + See Section~\ref{sec:ctx} for more on communication contexts and + Section~\ref{subsec:shmem_team_create_ctx} for team-based context creation. + + When using the configuration structure to create teams, a mask parameter + controls which fields to use. + Any configuration parameter value that is not indicated in the mask will be + ignored, and the default value will be used instead. + Therefore, a program does not have to set all fields in the config struct; + only those for which it does not want the default values. + + A configuration mask value is created by combining individual field + masks with through a bitwise OR operation of the following library constants: - A configuration mask value is created by combining individual field - masks with through a bitwise OR operation of the following library constants: - { - \apitablerow{\LibConstRef{SHMEM\_TEAM\_NUM\_CONTEXTS}}{ - The team should be created using the value of the - \VAR{num\_contexts} member of the configuration parameter - \VAR{config} as a requirement. - } + \apitablerow{\LibConstRef{SHMEM\_TEAM\_NUM\_CONTEXTS}}{ + The team should be created using the value of the + \VAR{num\_contexts} member of the configuration parameter + \VAR{config} as a requirement. + } } - A configuration mask value of \CONST{0} indicates that the team - should be created with the default values for all configuration - parameters. + A configuration mask value of \CONST{0} indicates that the team + should be created with the default values for all configuration + parameters. - The default values for configuration parameters are: + The default values for configuration parameters are: { - \apitablerow{num\_contexts = \CONST{0}}{ - By default, no contexts can be created on a new team + \apitablerow{num\_contexts = \CONST{0}}{ + By default, no contexts can be created on a new team } } - } +} - \apinotes{ - None. - } +\apinotes{ + None. +} \end{apidefinition} From a866e474e8ba9e53d5fa50bb8a21e1764a120435 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Fri, 7 Jun 2019 17:37:09 -0400 Subject: [PATCH 214/319] Rename shmem_team_translate_pe; minor text changes --- ...team_translate.tex => shmem_team_translate_pe.tex} | 11 ++++++----- main_spec.tex | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) rename content/{shmem_team_translate.tex => shmem_team_translate_pe.tex} (82%) diff --git a/content/shmem_team_translate.tex b/content/shmem_team_translate_pe.tex similarity index 82% rename from content/shmem_team_translate.tex rename to content/shmem_team_translate_pe.tex index 7841629c2..1cf8d1dea 100644 --- a/content/shmem_team_translate.tex +++ b/content/shmem_team_translate_pe.tex @@ -1,5 +1,6 @@ \apisummary{ - Translates a given \ac{PE} number to the corresponding \ac{PE} number in another team. + Translate a given \ac{PE} number from one team to the corresponding + \ac{PE} number in another team. } \begin{apidefinition} @@ -22,9 +23,6 @@ \ac{PE}'s number in \VAR{dest\_team}. If \VAR{src\_pe} is not a member of both the \VAR{src\_team} and \VAR{dest\_team}, a value of \CONST{-1} is returned. -If \LibHandleRef{SHMEM\_TEAM\_WORLD} is provided as the \VAR{dest\_team} parameter, -this routine acts as a global \ac{PE} number translator and will return the corresponding -\LibHandleRef{SHMEM\_TEAM\_WORLD} number. If either of the \VAR{src\_team} or \VAR{dest\_team} handle is invalid, the behavior is undefined. } @@ -35,7 +33,10 @@ } \apinotes{ -None. + If \LibHandleRef{SHMEM\_TEAM\_WORLD} is provided as the + \VAR{dest\_team} parameter, this routine acts as a global \ac{PE} + number translator and will return the corresponding + \LibHandleRef{SHMEM\_TEAM\_WORLD} number. } \begin{apiexamples} diff --git a/main_spec.tex b/main_spec.tex index be89b01bb..87fc5fbb8 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -137,8 +137,8 @@ \subsubsection{\textbf{SHMEM\_TEAM\_CONFIG\_T}} \subsubsection{\textbf{SHMEM\_TEAM\_GET\_CONFIG}}\label{subsec:shmem_team_get_config} \input{content/shmem_team_get_config.tex} -\subsubsection{\textbf{SHMEM\_TEAM\_TRANSLATE}}\label{subsec:shmem_team_translate} -\input{content/shmem_team_translate.tex} +\subsubsection{\textbf{SHMEM\_TEAM\_TRANSLATE\_PE}}\label{subsec:shmem_team_translate_pe} +\input{content/shmem_team_translate_pe.tex} \subsubsection{\textbf{SHMEM\_TEAM\_SPLIT\_STRIDED}}\label{subsec:shmem_team_split_strided} \input{content/shmem_team_split_strided.tex} From 65faf2b3098055cb5cf91cb5cbd99cb3c25312d0 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Fri, 7 Jun 2019 17:48:22 -0400 Subject: [PATCH 215/319] Minor revisions to shmem_team_destroy --- content/shmem_team_destroy.tex | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/content/shmem_team_destroy.tex b/content/shmem_team_destroy.tex index 9c12d1402..5c2c11b69 100644 --- a/content/shmem_team_destroy.tex +++ b/content/shmem_team_destroy.tex @@ -1,34 +1,30 @@ \apisummary{ - Destroys existing team. + Destroy an existing team. } \begin{apidefinition} \begin{Csynopsis} -int @\FuncDecl{shmem\_team\_destroy}@(shmem_team_t *team); +void @\FuncDecl{shmem\_team\_destroy}@(shmem_team_t team); \end{Csynopsis} \begin{apiarguments} -\apiargument{INOUT}{team}{A pointer to a valid \openshmem team handle.} +\apiargument{IN}{team}{An \openshmem team handle.} \end{apiarguments} \apidescription{ The \FUNC{shmem\_team\_destroy} routine destroys an existing team. This is a collective call, in which every member of the team being destroyed needs -to participate. This will free all internal memory structures associated -with the team and invalidate the team handle. Upon return, the team +to participate. This will free all internal resources associated +with the team. Upon return, the team handle can no longer be used for team API calls. It is considered erroneous to free \LibHandleRef{SHMEM\_TEAM\_WORLD} or any other predefined team. -If a pointer to an invalid handle is provided, the behavior is undefined. - -If the pointer to \VAR{team} is a null pointer, then no team is destroyed, -and a nonzero value is returned. - -After returning from the routine, if the team was successfully destroyed, -the handle will be assigned the value \LibConstRef{SHMEM\_TEAM\_INVALID}. +When \VAR{team} specifies an invalid team, if \VAR{team} compares +equal to \LibConstRef{SHMEM\_TEAM\_INVALID}, then no operation is +performed; otherwise, the behavior is undefined. Team destruction assumes that any resources explicitly created from the team, such as contexts created from the team, have already been released through @@ -39,11 +35,11 @@ } \apireturnvalues{ -Zero upon successful destruction of the team, nonzero otherwise. + None. } \apinotes{ -None. + None. } \end{apidefinition} From 41fa466115620f6b8ee234de170476ec455c211d Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Fri, 7 Jun 2019 18:03:34 -0400 Subject: [PATCH 216/319] Un-deprecate shmem_barrier_all --- content/shmem_barrier_all.tex | 8 -------- 1 file changed, 8 deletions(-) diff --git a/content/shmem_barrier_all.tex b/content/shmem_barrier_all.tex index 548152d44..8392054bf 100644 --- a/content/shmem_barrier_all.tex +++ b/content/shmem_barrier_all.tex @@ -1,4 +1,3 @@ -\begin{DeprecateBlock} \apisummary{ Registers the arrival of a \ac{PE} at a barrier and blocks the \ac{PE} until all other \acp{PE} arrive at the barrier and all local @@ -34,12 +33,6 @@ \ac{RMA} routine calls such as \FUNC{shmem\_int\_add}, \FUNC{shmem\_put32}, \FUNC{shmem\_put\_nbi}, and \FUNC{shmem\_get\_nbi}. - -{\color{Green} - \FUNC{shmem\_barrier\_all} has been deprecated in favor of the equivalent - call to \FUNC{shmem\_quiet} followed by a call to - \FUNC{shmem\_sync(SHMEM\_TEAM\_WORLD)}. -} } \apireturnvalues{ @@ -66,4 +59,3 @@ \end{apiexamples} \end{apidefinition} -\end{DeprecateBlock} From ee7c597c92f615ffcba5d04dfbb6e5148a5ee4f7 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Fri, 7 Jun 2019 18:04:17 -0400 Subject: [PATCH 217/319] Add clarifying text to shmem_barrier_all --- content/shmem_barrier_all.tex | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/content/shmem_barrier_all.tex b/content/shmem_barrier_all.tex index 8392054bf..32ac8d0c6 100644 --- a/content/shmem_barrier_all.tex +++ b/content/shmem_barrier_all.tex @@ -21,11 +21,12 @@ \end{apiarguments} \apidescription{ - The \FUNC{shmem\_barrier\_all} routine registers the arrival of a \ac{PE} at - a barrier. Barriers are a mechanism for synchronizing all \acp{PE} at - once. This routine blocks the \ac{PE} until all \acp{PE} have called + The \FUNC{shmem\_barrier\_all} routine \oldtext{registers the arrival of a \ac{PE} at + a barrier. Barriers are} \newtext{is} a mechanism for synchronizing all \acp{PE} \newtext{in the default team} at + once. This routine blocks the \newtext{calling} \ac{PE} until all \acp{PE} have called \FUNC{shmem\_barrier\_all}. In a multithreaded \openshmem - program, only the calling thread is blocked. + program, only the calling thread is blocked\newtext{, however, + it may not be called concurrently by multiple threads in the same \ac{PE}}. Prior to synchronizing with other \acp{PE}, \FUNC{shmem\_barrier\_all} ensures completion of all previously issued memory stores and remote memory @@ -40,9 +41,17 @@ } \apinotes{ + \newtext{% + The \FUNC{shmem\_barrier\_all} routine is equivalent to calling + \FUNC{shmem\_ctx\_quiet} on the default context followed by + calling \FUNC{shmem\_team\_sync} on the default team. + } + + \oldtext{% The \FUNC{shmem\_barrier\_all} routine can be used to portably ensure that memory access operations observe remote updates in the order enforced by initiator \acp{PE}. + } Calls to \FUNC{shmem\_ctx\_quiet} can be performed prior to calling the barrier routine to ensure completion of operations issued on From 021e587ab59212d5995605c577ac0468b3a35bd0 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 12 Jun 2019 17:38:31 -0400 Subject: [PATCH 218/319] Revise shmem_team_destroy to destroy its shareable contexts --- content/shmem_team_destroy.tex | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/content/shmem_team_destroy.tex b/content/shmem_team_destroy.tex index 5c2c11b69..00df39af8 100644 --- a/content/shmem_team_destroy.tex +++ b/content/shmem_team_destroy.tex @@ -13,25 +13,22 @@ \end{apiarguments} \apidescription{ -The \FUNC{shmem\_team\_destroy} routine destroys an existing team. This is a -collective call, in which every member of the team being destroyed needs -to participate. This will free all internal resources associated -with the team. Upon return, the team -handle can no longer be used for team API calls. -It is considered erroneous to free \LibHandleRef{SHMEM\_TEAM\_WORLD} or -any other predefined team. +The \FUNC{shmem\_team\_destroy} routine is a collective operation that +destroys the team referenced by the team handle argument \VAR{team}. +Upon return, the referenced team is invalid. + +This routine destroys all shareable contexts created from the +referenced team. The user is responsible for destroying all contexts +created from this team with the \CONST{SHMEM\_CTX\_PRIVATE} option +enabled prior to calling this routine; otherwise, the behavior is +undefined. + +It is an error to free the default team or any other predefined team. When \VAR{team} specifies an invalid team, if \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID}, then no operation is performed; otherwise, the behavior is undefined. - -Team destruction assumes that any resources explicitly created from the team, -such as contexts created from the team, have already been released through -the appropriate function, such as destroying the context. If there are any -objects or resources explicitly created from the team that have not been -explicitly released before \FUNC{shmem\_team\_destroy} is called, behavior is -undefined. } \apireturnvalues{ From d10d690627ea90af90fbec296cf940fdafe08e21 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 12 Jun 2019 17:40:50 -0400 Subject: [PATCH 219/319] Revise shmem_finalize to destroy all teams --- content/shmem_finalize.tex | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/content/shmem_finalize.tex b/content/shmem_finalize.tex index a65348fa2..789dc9319 100644 --- a/content/shmem_finalize.tex +++ b/content/shmem_finalize.tex @@ -26,7 +26,11 @@ implicit global barrier in \FUNC{shmem\_finalize} to ensure that pending communications are completed and that no resources are released until all \acp{PE} have entered \FUNC{shmem\_finalize}. - This routine destroys all shareable contexts. The user is + \oldtext{This routine destroys all shareable contexts.} + \newtext{ + This routine destroys all teams created by the \openshmem program. + As a result, all shareable contexts are destroyed. + } The user is responsible for destroying all contexts with the \CONST{SHMEM\_CTX\_PRIVATE} option enabled prior to calling this routine; otherwise, the behavior is undefined. From b2f22000c574c0081ed3f08191dab7570a25387a Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 12 Jun 2019 18:01:10 -0400 Subject: [PATCH 220/319] Add missing text highlighting in shmem_ctx_destroy --- content/shmem_ctx_destroy.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_ctx_destroy.tex b/content/shmem_ctx_destroy.tex index 2cedf6d6b..c0607d0a4 100644 --- a/content/shmem_ctx_destroy.tex +++ b/content/shmem_ctx_destroy.tex @@ -14,7 +14,7 @@ \apidescription{ \FUNC{shmem\_ctx\_destroy} destroys a context that was created by a call to - \FUNC{shmem\_ctx\_create} or \FUNC{shmem\_team\_create\_ctx}. + \FUNC{shmem\_ctx\_create} \newtext{or \FUNC{shmem\_team\_create\_ctx}}. It is the user's responsibility to ensure that the context is not used after it has been destroyed, for example when the destroyed context is used by multiple threads. This function From 03ea9a03f6eab73130330fe6f69df141ddaad233 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Thu, 13 Jun 2019 08:05:26 -0400 Subject: [PATCH 221/319] Make shmem_team_translate_pe example compilable --- content/shmem_team_translate_pe.tex | 2 +- example_code/shmem_team_translate.c | 36 -------------------------- example_code/shmem_team_translate_pe.c | 32 +++++++++++++++++++++++ 3 files changed, 33 insertions(+), 37 deletions(-) delete mode 100644 example_code/shmem_team_translate.c create mode 100644 example_code/shmem_team_translate_pe.c diff --git a/content/shmem_team_translate_pe.tex b/content/shmem_team_translate_pe.tex index 1cf8d1dea..d3e8c772a 100644 --- a/content/shmem_team_translate_pe.tex +++ b/content/shmem_team_translate_pe.tex @@ -47,7 +47,7 @@ of the even number \acp{PE} in the default team. Then, all \acp{PE} in the new team acquire their \ac{PE} number in the new team and translate it to the \ac{PE} number in the default team.} - {./example_code/shmem_team_translate.c} + {./example_code/shmem_team_translate_pe.c} {} \end{apiexamples} diff --git a/example_code/shmem_team_translate.c b/example_code/shmem_team_translate.c deleted file mode 100644 index b1ae74b1a..000000000 --- a/example_code/shmem_team_translate.c +++ /dev/null @@ -1,36 +0,0 @@ -/* - * OpenSHMEM shmem_team_translate example to verify the team formed by even - * ranked PEs from SHMEM_TEAM_WORLD using the team created from - * shmem_team_split_stride operation - */ - -#include -#include - -int main(int argc, char *argv[]) -{ - int rank; - int t_pe; - int t_global; - shmem_team_t new_team; - shmem_team_config_t *config; - - shmem_init(); - config = NULL; - rank = shmem_my_pe(); - - shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, npes / 2, config, 0, - &new_team); - - if (new_team != SHMEM_TEAM_INVALID) { - t_pe = shmem_team_my_pe(new_team); - t_global = shmem_team_translate(new_team, t_pe, SHMEM_TEAM_WORLD); - - if (t_global != rank) { - shmem_global_exit(1); - } - } - - shmem_finalize(); - return 0; -} diff --git a/example_code/shmem_team_translate_pe.c b/example_code/shmem_team_translate_pe.c new file mode 100644 index 000000000..15aec0a6e --- /dev/null +++ b/example_code/shmem_team_translate_pe.c @@ -0,0 +1,32 @@ +#include +#include + +int main(void) +{ + int my_pe; + int n_pes; + int t_pe; + int t_global; + shmem_team_t new_team; + shmem_team_config_t *config; + + shmem_init(); + config = NULL; + my_pe = shmem_my_pe(); + n_pes = shmem_n_pes(); + + shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, (n_pes + 1) / 2, + config, 0, &new_team); + + if (new_team != SHMEM_TEAM_INVALID) { + t_pe = shmem_team_my_pe(new_team); + t_global = shmem_team_translate_pe(new_team, t_pe, SHMEM_TEAM_WORLD); + + if (t_global != my_pe) { + shmem_global_exit(1); + } + } + + shmem_finalize(); + return 0; +} From 0034139c09ad4f42451d9965fe1db9c627db9e51 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Thu, 13 Jun 2019 08:26:11 -0400 Subject: [PATCH 222/319] Revise: invalid teams and return values with shmem_team_get_config --- content/shmem_team_get_config.tex | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/content/shmem_team_get_config.tex b/content/shmem_team_get_config.tex index 0e9c60a84..f06c0c832 100644 --- a/content/shmem_team_get_config.tex +++ b/content/shmem_team_get_config.tex @@ -5,11 +5,11 @@ \begin{apidefinition} \begin{Csynopsis} -void @\FuncDecl{shmem\_team\_get\_config}@(shmem_team_t team, shmem_team_config_t *config); +int @\FuncDecl{shmem\_team\_get\_config}@(shmem_team_t team, shmem_team_config_t *config); \end{Csynopsis} \begin{apiarguments} - \apiargument{IN}{team}{A valid \openshmem team handle.} + \apiargument{IN}{team}{An \openshmem team handle.} \apiargument{OUT}{config}{ A pointer to the configuration parameters for the given team.} \end{apiarguments} @@ -19,12 +19,16 @@ the configuration parameters of the given team, which were assigned according to input configuration parameters when the team was created. -If the \VAR{team} argument does not specify a valid team, the behavior is -undefined. If \VAR{team} is equal to \LibConstRef{SHMEM\_TEAM\_INVALID}, then config will be set to the null pointer. +When \VAR{team} specifies an invalid team, if \VAR{team} compares +equal to \LibConstRef{SHMEM\_TEAM\_INVALID}, then no operation is +performed; otherwise, the behavior is undefined. } \apireturnvalues{ - None. + If \VAR{team} does not compare equal to + \LibConstRef{SHMEM\_TEAM\_INVALID}, then + \FUNC{shmem\_team\_get\_config} returns \CONST{0}; otherwise, + returns nonzero. } \apinotes{ From 10266ea0830262c4dd30882fce46d33091a03660 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Thu, 13 Jun 2019 11:46:28 -0400 Subject: [PATCH 223/319] Fix simple issues with shmem_team_split_strided --- content/shmem_team_split_strided.tex | 29 +++++++++++++--------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index c2eda7e6d..b03410189 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -7,15 +7,14 @@ \begin{Csynopsis} int @\FuncDecl{shmem\_team\_split\_strided}@(shmem_team_t parent_team, int PE_start, int PE_stride, - int PE_size, shmem_team_config_t *config, long config_mask, shmem_team_t *new_team); + int PE_size, const shmem_team_config_t *config, long config_mask, shmem_team_t *new_team); \end{Csynopsis} \begin{apiarguments} -\apiargument{IN}{parent\_team}{A valid \openshmem team. The predefined teams, such as -\LibHandleRef{SHMEM\_TEAM\_WORLD}, may be used, or any team created by the user.} +\apiargument{IN}{parent\_team}{An \openshmem team.} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the subset of \acp{PE} from -the parent team that will form the new team} +the parent team that will form the new team.} \apiargument{IN}{PE\_stride}{The stride between team \ac{PE} numbers in the parent team that comprise the subset of \acp{PE} that will form @@ -24,7 +23,7 @@ \apiargument{IN}{PE\_size}{The number of \acp{PE} from the parent team in the subset of \acp{PE} that will form the new team.} -\apiargument{INOUT}{config}{ +\apiargument{IN}{config}{ A pointer to the configuration parameters for the new team.} \apiargument{IN}{config\_mask}{ @@ -43,14 +42,13 @@ where the subset is defined by the \ac{PE} triplet (\VAR{PE\_start}, \VAR{PE\_stride}, and \VAR{PE\_size}) supplied to the routine. -This routine must be called by all processes contained in the \ac{PE} triplet +This routine must be called by all \acp{PE} contained in the \ac{PE} triplet specification. It may be called by additional \acp{PE} not included in the -triplet specification, but for those processes a \VAR{new\_team} value of -\LibConstRef{SHMEM\_TEAM\_INVALID} is returned. All calling processes must provide the +triplet specification, but for those \acp{PE} a \VAR{new\_team} value of +\LibConstRef{SHMEM\_TEAM\_INVALID} is returned. All \acp{PE} must provide the same values for the \ac{PE} triplet. This routine will return a \VAR{new\_team} -containing the \ac{PE} subset specified by the triplet, and ordered by the -existing global \ac{PE} number. None of the parameters need to reside in -symmetric memory. +containing the \ac{PE} subset specified by the triplet and ordered by the +existing global \ac{PE} number. The \VAR{config} argument specifies team configuration parameters, which are described in Section~\ref{subsec:shmem_team_config_t}. @@ -62,11 +60,10 @@ See Section~\ref{subsec:shmem_team_config_t} for field mask names and default configuration parameters. -If \VAR{parent\_team} is an invalid team handle, the behavior is undefined. - -If \VAR{parent\_team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID}, then no -new team will be created, and \VAR{new\_team} will be assigned the value -\LibConstRef{SHMEM\_TEAM\_INVALID}. +When \VAR{parent\_team} specifies an invalid team, if \VAR{parent\_team} +compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID}, then no new team +will be created and \VAR{new\_team} will be assigned the value +\LibConstRef{SHMEM\_TEAM\_INVALID}; otherwise, the behavior is undefined. If an invalid \ac{PE} triplet is provided, then the \VAR{new\_team} will not be created. From 8946910217515ecd5f6926385a3265fa7a821a57 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Thu, 13 Jun 2019 14:43:44 -0400 Subject: [PATCH 224/319] Add math notation for PE triple in team_split_strided --- content/shmem_team_split_strided.tex | 13 +++++++++++-- utils/packages.tex | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index b03410189..8388787c4 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -39,8 +39,17 @@ \apidescription{ The \FUNC{shmem\_team\_split\_strided} routine is a collective routine. It creates a new \openshmem team from a subset of the existing parent team, -where the subset is defined by the \ac{PE} triplet (\VAR{PE\_start}, -\VAR{PE\_stride}, and \VAR{PE\_size}) supplied to the routine. +where the \ac{PE} subset is defined by the triplet of arguments +(\VAR{PE\_start}, \VAR{PE\_stride}, \VAR{PE\_size}). +A valid triplet is one such that: +\begin{equation*} + start + stride \cdot i \in \mathbb{Z}_N + \hspace{0.35em} + \forall + \hspace{0.35em} + i \in \mathbb{Z}_{size} +\end{equation*} +where $N$ is the number of \acp{PE} in the parent team. This routine must be called by all \acp{PE} contained in the \ac{PE} triplet specification. It may be called by additional \acp{PE} not included in the diff --git a/utils/packages.tex b/utils/packages.tex index 8ff6beef6..b5bad9622 100644 --- a/utils/packages.tex +++ b/utils/packages.tex @@ -7,6 +7,7 @@ \usepackage{float} \usepackage[usenames,dvipsnames]{color} \usepackage{amsmath} +\usepackage{amsfonts} \usepackage[table]{xcolor} \usepackage{xspace} \usepackage{xhfill} @@ -15,7 +16,6 @@ \usepackage{listings} % note sure after here \usepackage{makeidx} -\usepackage{amsmath} \usepackage[UKenglish]{isodate} \usepackage{ifthen} \usepackage{textcomp} From 6691bd32faed61f763cf9794bcdcecb6eb49141e Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Thu, 13 Jun 2019 14:47:16 -0400 Subject: [PATCH 225/319] Remove leading "PE_" from PE subset triplet --- content/shmem_team_split_strided.tex | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 8388787c4..bfbc75354 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -1,26 +1,26 @@ \apisummary{ Create a new \openshmem team from a subset of the existing parent team \acp{PE}, where the subset is defined by the -\ac{PE} triplet (\VAR{PE\_start}, \VAR{PE\_stride}, and \VAR{PE\_size}) supplied to the routine.} +\ac{PE} triplet (\VAR{start}, \VAR{stride}, and \VAR{size}) supplied to the routine.} \begin{apidefinition} \begin{Csynopsis} -int @\FuncDecl{shmem\_team\_split\_strided}@(shmem_team_t parent_team, int PE_start, int PE_stride, - int PE_size, const shmem_team_config_t *config, long config_mask, shmem_team_t *new_team); +int @\FuncDecl{shmem\_team\_split\_strided}@(shmem_team_t parent_team, int start, int stride, + int size, const shmem_team_config_t *config, long config_mask, shmem_team_t *new_team); \end{Csynopsis} \begin{apiarguments} \apiargument{IN}{parent\_team}{An \openshmem team.} -\apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the subset of \acp{PE} from +\apiargument{IN}{start}{The lowest \ac{PE} number of the subset of \acp{PE} from the parent team that will form the new team.} -\apiargument{IN}{PE\_stride}{The stride between team \ac{PE} +\apiargument{IN}{stride}{The stride between team \ac{PE} numbers in the parent team that comprise the subset of \acp{PE} that will form the new team.} -\apiargument{IN}{PE\_size}{The number of \acp{PE} from the parent team in the subset +\apiargument{IN}{size}{The number of \acp{PE} from the parent team in the subset of \acp{PE} that will form the new team.} \apiargument{IN}{config}{ @@ -40,7 +40,7 @@ The \FUNC{shmem\_team\_split\_strided} routine is a collective routine. It creates a new \openshmem team from a subset of the existing parent team, where the \ac{PE} subset is defined by the triplet of arguments -(\VAR{PE\_start}, \VAR{PE\_stride}, \VAR{PE\_size}). +(\VAR{start}, \VAR{stride}, \VAR{size}). A valid triplet is one such that: \begin{equation*} start + stride \cdot i \in \mathbb{Z}_N @@ -86,7 +86,7 @@ \apinotes{ It is important to note the use of the less restrictive - \VAR{PE\_stride} argument instead of \VAR{logPE\_stride}. This method of + \VAR{stride} argument instead of \VAR{logPE\_stride}. This method of creating a team with an arbitrary set of \acp{PE} is inherently restricted by its parameters, but allows for many additional use-cases over using a \VAR{logPE\_stride} parameter, and may provide an easier transition for From 5c1a003b1476203ce68b8a156c8423205c81cdb7 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Thu, 13 Jun 2019 15:01:25 -0400 Subject: [PATCH 226/319] Minor fix for ctx-team association in shmem_ctx_create --- content/shmem_ctx_create.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_ctx_create.tex b/content/shmem_ctx_create.tex index b060b50a8..62ebf120c 100644 --- a/content/shmem_ctx_create.tex +++ b/content/shmem_ctx_create.tex @@ -29,7 +29,7 @@ options or after additional resources become available. \newtext{ - A newly created communication context has an initial association with the + A newly created communication context has a fixed association with the default team. All \openshmem routines that operate on this context will do so with respect to the associated \ac{PE} team. From 87f4f634e2028a9127b40baafdb49dbab90718d3 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Thu, 13 Jun 2019 15:20:12 -0400 Subject: [PATCH 227/319] Revise errors and UB for shmem_ctx_get_team --- content/shmem_ctx_get_team.tex | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/content/shmem_ctx_get_team.tex b/content/shmem_ctx_get_team.tex index 81f70cbdc..5fe1ea998 100644 --- a/content/shmem_ctx_get_team.tex +++ b/content/shmem_ctx_get_team.tex @@ -25,9 +25,12 @@ If \VAR{ctx} is the default context or one created by a call to \FUNC{shmem\_ctx\_create}, the returned team is the default team. - If \VAR{ctx} is an invalid context, the behavior is undefined. + When \VAR{ctx} is an invalid context, if \VAR{ctx} compares equal to + \LibConstRef{SHMEM\_CTX\_INVALID}, then \VAR{team} is assigned the + value \LibConstRef{SHMEM\_TEAM\_INVALID} and a nonzero value is + returned; otherwise, the behavior is undefined. - If \VAR{team} is a null pointer, a value of \CONST{-1} is returned. + If \VAR{team} is a null pointer, the behavior is undefined. } \apireturnvalues{ From 7646331320a0f25c5ceaf18f75b688fd544e1a36 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Thu, 13 Jun 2019 16:37:12 -0400 Subject: [PATCH 228/319] Initial revision of collectives introduction --- content/collective_intro.tex | 69 +++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index 89aff6ddd..dda5afb2b 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -2,22 +2,24 @@ operations \oldtext{on} \newtext{performed by} a group of \acp{PE} \oldtext{called an active set}. {\color{Green} -\openshmem provides two types of collective routines: +\openshmem provides three types of collective routines: \begin{enumerate} \item Collective routines that operate on teams use a team handle parameter to determine -which \acp{PE} will participate in the routine, and use resources encapsulated by the team object -to perform operations. See Section~\ref{subsec:team} for details on team management. -These routines will be the standard for \openshmem moving forward. + which \acp{PE} will participate in the routine, and use resources encapsulated by the team object + to perform operations. See Section~\ref{subsec:team} for details on team management. + +\begin{DeprecateBlock} \item Collective routines that operate on active sets use a set of parameters to determine -which \acp{PE} will participate and what resources are used to perform operations. These routines -are the legacy API for collectives which are deprecated and will be phased out of -implementations moving forward. + which \acp{PE} will participate and what resources are used to perform operations. +\end{DeprecateBlock} + +\item Collective routines that accept neither team nor active set + parameters, which implicitly operate on the default team and, as + required, the default context. \end{enumerate} -Collective routines with no team or active set parameters are deprecated, -and implicitly operate on the team consisting of all \acp{PE} in the computation, -\LibHandleRef{SHMEM\_TEAM\_WORLD} +\subsubsection*{Team-based collectives} The team-based collective routines are performed with respect to a valid \openshmem team, which is specified by a team handle argument. @@ -33,10 +35,10 @@ immediately call another collective on that same team without any other intervening synchronization across the team. -While \openshmem routines provide thread safety as per the requesting threading model -(see \ref{subsec:thread_support}), the teams object itself is not thread-safe. It is up -to the program to ensure that on a given \ac{PE}, there are no simutanously calls to routines -on a given team object, including all team based collective calls. +While \openshmem routines provide thread support according to the +thread-support level provided at initialization (see +Section~\ref{subsec:thread_support}), team-based collective routines +may not be called simultaneously by multiple threads on a given team. Collective operations are matched across a given team based on ordering. So for a given team, collectives must occur in the same order across all PEs in a team. @@ -48,21 +50,20 @@ \item \FUNC{shmem\_\{TYPE\_\}broadcast\{mem\}} \item \FUNC{shmem\_\{TYPE\_\}collect\{mem\}} \item \FUNC{shmem\_\{TYPE\_\}fcollect\{mem\}} -\item Reduction routines for the following operations: AND, MAX, MIN, SUM, PROD, OR, XOR +\item Reduction routines for the following operations: AND, OR, XOR, MAX, MIN, SUM, PROD \item \FUNC{shmem\_\{TYPE\_\}alltoall\{mem\}} \item \FUNC{shmem\_\{TYPE\_\}alltoalls\{mem\}} \end{itemize} In addition, all team creation functions are collective operations. In addition to the ordering and thread safety requirements described here, there are additional synchronization requirements -on team creation operations. See section \ref{subsec:team} for more details. - -The deprecated function \FUNC{shmem\_sync\_all} is provided for backward compatibility to synchronize -all \acp{PE} in the computation. This should be replaced in applications by the equivalent -\FUNC{shmem\_sync(SHMEM\_TEAM\_WORLD)}. +on team creation operations. See Section~\ref{subsec:team} for more details. } \begin{DeprecateBlock} + +\subsubsection*{\newtext{Active-set-based collectives}} + The \newtext{active-set-based} collective routines require all \acp{PE} in the active set to simultaneously call the routine. A \ac{PE} that is not in the active set calling the collective @@ -101,9 +102,9 @@ routines defined in the \openshmem Specification are: \begin{itemize} -\item \FUNC{shmem\_barrier\_all} +\item \oldtext{\FUNC{shmem\_barrier\_all}} \item \FUNC{shmem\_barrier} -\item \FUNC{shmem\_sync\_all} +\item \oldtext{\FUNC{shmem\_sync\_all}} \item \FUNC{shmem\_sync} \item \FUNC{shmem\_broadcast\{32, 64\}} \item \FUNC{shmem\_collect\{32, 64\}} @@ -113,15 +114,19 @@ \item \FUNC{shmem\_alltoalls\{32, 64\}} \end{itemize} +\end{DeprecateBlock} + {\color{Green} -The active-set-based \FUNC{shmem\_barrier} and routine has been deprecated and -no team-based barrier routines will be defined. In future, the behavior -previously provided by \FUNC{shmem\_barrier} should be realized by first calling -\FUNC{shmem\_ctx\_quiet} on any relevant communication contexts followed by a call -to \FUNC{shmem\_sync} by some \openshmem team. - -Calls to \FUNC{shmem\_barrier\_all} -should be replaced with a call to quiet the default communication context followed -by a call to \FUNC{shmem\_sync} by \LibHandleRef{SHMEM\_TEAM\_WORLD}. + +\subsubsection*{Team-implicit collectives} + +The \FUNC{shmem\_sync\_all} routine synchronizes all \acp{PE} in the +computation through the default team. This routine is equivalent to a +call to \FUNC{shmem\_team\_sync} on the default team. + +The \FUNC{shmem\_barrier\_all} routine synchronizes all \acp{PE} in +the default team and ensures completion of all local and remote memory +updates issued via the default context. This routine is equivalent to +a call to \FUNC{shmem\_ctx\_quiet} on the default context followed by a +call to \FUNC{shmem\_team\_sync} on the default team. } -\end{DeprecateBlock} From 5bfa27c468ce08dfc12404b92de593dec94344e6 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Thu, 13 Jun 2019 16:53:57 -0400 Subject: [PATCH 229/319] Revise team/ctx-specific stuff in RMA intro --- content/rma_intro.tex | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/content/rma_intro.tex b/content/rma_intro.tex index 479ca843f..0ba8223a4 100644 --- a/content/rma_intro.tex +++ b/content/rma_intro.tex @@ -16,14 +16,16 @@ routine, \GET{}, the origin \ac{PE} provides the \dest{} data object and the destination \ac{PE} provides the \source{} data object. +{ \color{Green} The destination \ac{PE} is specified as an integer representing the \ac{PE} number. -This \ac{PE} number is relative to the \openshmem team associated with the -communication context being using for the operation. If no context is passed to the routine, -then the \ac{PE} number is relative to the default team, and is the global \ac{PE} -number. If the \ac{PE} number passed to the routine is invalid, being negative -or greater than or equal to the size of the \openshmem team, then routine behavior is undefined. -\color{Black} +This \ac{PE} number is relative to the team associated with the +communication context being using for the operation. If no context argument is passed to the routine, +then the routine operates on the default context, which implies that +the \ac{PE} number is relative to the default team. +If the \ac{PE} number passed to the routine is invalid, being negative +or greater than or equal to the size of the \openshmem team, then the behavior is undefined. +} Where appropriate compiler support is available, \openshmem provides type-generic one-sided communication interfaces via \Cstd[11] generic selection From 9efe2a21781e2ed1f1f782788e0f826991f0c91f Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Thu, 13 Jun 2019 16:59:49 -0400 Subject: [PATCH 230/319] Remove outdated note about "locally created" contexts --- content/shmem_ctx_destroy.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_ctx_destroy.tex b/content/shmem_ctx_destroy.tex index c0607d0a4..553e5e3aa 100644 --- a/content/shmem_ctx_destroy.tex +++ b/content/shmem_ctx_destroy.tex @@ -1,5 +1,5 @@ \apisummary{ - Destroy a \newtext{locally created} communication context. + Destroy a communication context. } \begin{apidefinition} From 875e10f4014432d0ce1268880e837652a21d1fbd Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Wed, 19 Jun 2019 12:31:28 -0500 Subject: [PATCH 231/319] Fix links for reference in put-with-signal defn --- content/shmem_signal_wait_until.tex | 2 +- main_spec.tex | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/content/shmem_signal_wait_until.tex b/content/shmem_signal_wait_until.tex index cd0064c43..4afde58e9 100644 --- a/content/shmem_signal_wait_until.tex +++ b/content/shmem_signal_wait_until.tex @@ -25,7 +25,7 @@ satisfies the wait condition. In an \openshmem program with single-threaded or multithreaded \acp{PE}, the \VAR{sig\_addr} object at the calling \ac{PE} is expected only to be updated as a signal, through the signalling operations - available in Section~\ref{subsec:shmem_signal}. + available in Section~\ref{sec:shmem_signal}. This routine can be used to implement point-to-point synchronization between \acp{PE} or between threads within the same \ac{PE}. A call to this routine diff --git a/main_spec.tex b/main_spec.tex index 6b17b65b5..665b3c2bd 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -189,7 +189,7 @@ \subsubsection{\textbf{SHMEM\_GET\_NBI}}\label{subsec:shmem_get_nbi} \input{content/shmem_get_nbi.tex} -\subsection{Signalling Operations}\label{sec:signal} +\subsection{Signalling Operations}\label{sec:shmem_signal} \subsubsection{\textbf{SHMEM\_PUT\_SIGNAL}}\label{subsec:shmem_put_signal} \input{content/shmem_put_signal.tex} @@ -322,6 +322,9 @@ \subsubsection{\textbf{SHMEM\_TEST\_ANY}}\label{subsec:shmem_test_any} \subsubsection{\textbf{SHMEM\_TEST\_SOME}}\label{subsec:shmem_test_some} \input{content/shmem_test_some.tex} +\subsubsection{\textbf{SHMEM\_SIGNAL\_WAIT\_UNTIL}}\label{subsec:shmem_signal_wait_until} +\input{content/shmem_signal_wait_until.tex} + From a7154c4089cac16f3b3c582750c5693e67a60b2b Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Thu, 20 Jun 2019 11:47:38 -0500 Subject: [PATCH 232/319] Add shmem_signal_fetch --- content/library_constants.tex | 8 +++---- content/shmem_put_signal.tex | 19 +++++++++------ content/shmem_put_signal_nbi.tex | 18 ++++++++++----- content/shmem_signal_fetch.tex | 36 +++++++++++++++++++++++++++++ content/shmem_signal_wait_until.tex | 3 ++- main_spec.tex | 21 ++++++++++------- 6 files changed, 79 insertions(+), 26 deletions(-) create mode 100644 content/shmem_signal_fetch.tex diff --git a/content/library_constants.tex b/content/library_constants.tex index dfbcba344..df9835ca3 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -70,16 +70,16 @@ \color{ForestGreen} \LibConstDecl{SHMEM\_SIGNAL\_SET} & \color{ForestGreen} -An integer constant expression corresponding to the signal update operation of -type set. See Section~\ref{subsec:shmem_put_signal} and +An integer constant expression corresponding to the signal update set operation. +See Section~\ref{subsec:shmem_put_signal} and Section~\ref{subsec:shmem_put_signal_nbi} for more detail about its use. \tabularnewline \hline %% \color{ForestGreen} \LibConstDecl{SHMEM\_SIGNAL\_ADD} & \color{ForestGreen} -An integer constant expression corresponding to the signal update operation of -type add. See Section~\ref{subsec:shmem_put_signal} and +An integer constant expression corresponding to the signal update add operation. +See Section~\ref{subsec:shmem_put_signal} and Section~\ref{subsec:shmem_put_signal_nbi} for more detail about its use. \tabularnewline \hline %% diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index f632db851..6598d18f2 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -62,13 +62,18 @@ corresponding \dest{} data words into the data object on the remote \ac{PE}. An update to the \VAR{sig\_addr} signal data object through a put-with-signal - routine completes as if performed atomically with respect to any other - blocking or non-blocking variant of the put-with-signal routine that updates - the \VAR{sig\_addr} signal data object using the same \VAR{sig\_op} signal - update operator and any point-to-point synchronization routine that accesses - the \VAR{sig\_addr} signal data object. With the above described atomicity - guarantees, the following options can be used as the \VAR{sig\_op} signal - operator. + routine completes as if performed atomically with respect to the following: + \begin{itemize} + \item other blocking or non-blocking variant of the put-with-signal + routine that updates the \VAR{sig\_addr} signal data object using the + same \VAR{sig\_op} signal update operator, + \item \FUNC{shmem\_signal\_fetch} routine that fetches the + \VAR{sig\_addr} signal data object on the specified remote \ac{PE}, and + \item any point-to-point synchronization routine that accesses the + \VAR{sig\_addr} signal data object. + \end{itemize} + With the above described atomicity guarantees, the following options can be + used as the \VAR{sig\_op} signal operator. \apitablerow{\LibConstRef{SHMEM\_SIGNAL\_SET}}{An update to \VAR{sig\_addr} signal data object is an atomic set operation. It writes the \VAR{signal} diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index efd242ef1..b2f195ba8 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -71,12 +71,18 @@ An update to the \VAR{sig\_addr} signal data object through a non-blocking put-with-signal routine completes as if performed atomically with respect to - any other blocking or non-blocking variant of the put-with-signal routine - that updates the \VAR{sig\_addr} signal data object using the same - \VAR{sig\_op} signal update operator and any point-to-point synchronization - routine that accesses the \VAR{sig\_addr} signal data object. With the above - described atomicity guarantees, the following options can be used as the - \VAR{sig\_op} signal operator. + the following: + \begin{itemize} + \item other blocking or non-blocking variant of the put-with-signal + routine that updates the \VAR{sig\_addr} signal data object using the + same \VAR{sig\_op} signal update operator, + \item \FUNC{shmem\_signal\_fetch} routine that fetches the + \VAR{sig\_addr} signal data object on the specified remote \ac{PE}, and + \item any point-to-point synchronization routine that accesses the + \VAR{sig\_addr} signal data object. + \end{itemize} + With the above described atomicity guarantees, the following options can be + used as the \VAR{sig\_op} signal operator. \apitablerow{\LibConstRef{SHMEM\_SIGNAL\_SET}}{An update to \VAR{sig\_addr} signal data object is an atomic set operation. It writes the \VAR{signal} diff --git a/content/shmem_signal_fetch.tex b/content/shmem_signal_fetch.tex new file mode 100644 index 000000000..233e49dff --- /dev/null +++ b/content/shmem_signal_fetch.tex @@ -0,0 +1,36 @@ +\color{ForestGreen} +\apisummary{ + Atomically fetches the signal update on a local data object. +} + +\begin{apidefinition} + +\begin{Csynopsis} +uint64_t @\FuncDecl{shmem\_signal\_fetch}@(const TYPE *sig_addr); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{sig\_addr}{A pointer to a remotely accessible variable.} +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_signal\_fetch} performs an atomic fetch operation and returns + the contents of the \VAR{sig\_addr} signal data object as an atomic operation. + In an \openshmem program with single-threaded or multithreaded \acp{PE}, the + \VAR{sig\_addr} object at the calling \ac{PE} is expected only to be updated + as a signal, through the signalling operations available in + Section~\ref{subsec:shmem_put_signal} and + Section~\ref{subsec:shmem_put_signal_nbi}. +} + +\apireturnvalues{ + Returns the contents of the symmetric data object, \VAR{sig\_addr}, at the + calling \ac{PE}. +} + +\apinotes{ + None. +} + +\end{apidefinition} +\color{Black} diff --git a/content/shmem_signal_wait_until.tex b/content/shmem_signal_wait_until.tex index 4afde58e9..d00947205 100644 --- a/content/shmem_signal_wait_until.tex +++ b/content/shmem_signal_wait_until.tex @@ -25,7 +25,8 @@ satisfies the wait condition. In an \openshmem program with single-threaded or multithreaded \acp{PE}, the \VAR{sig\_addr} object at the calling \ac{PE} is expected only to be updated as a signal, through the signalling operations - available in Section~\ref{sec:shmem_signal}. + available in Section~\ref{subsec:shmem_put_signal} and + Section~\ref{subsec:shmem_put_signal_nbi}. This routine can be used to implement point-to-point synchronization between \acp{PE} or between threads within the same \ac{PE}. A call to this routine diff --git a/main_spec.tex b/main_spec.tex index 665b3c2bd..285997c69 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -189,14 +189,6 @@ \subsubsection{\textbf{SHMEM\_GET\_NBI}}\label{subsec:shmem_get_nbi} \input{content/shmem_get_nbi.tex} -\subsection{Signalling Operations}\label{sec:shmem_signal} - -\subsubsection{\textbf{SHMEM\_PUT\_SIGNAL}}\label{subsec:shmem_put_signal} -\input{content/shmem_put_signal.tex} - -\subsubsection{\textbf{SHMEM\_PUT\_SIGNAL\_NBI}}\label{subsec:shmem_put_signal_nbi} -\input{content/shmem_put_signal_nbi.tex} - \subsection{Atomic Memory Operations}\label{sec:amo} \input{content/atomics_intro} @@ -260,6 +252,19 @@ \subsubsection{\textbf{SHMEM\_ATOMIC\_XOR}} +\subsection{Signalling Operations}\label{sec:shmem_signal} + +\subsubsection{\textbf{SHMEM\_PUT\_SIGNAL}}\label{subsec:shmem_put_signal} +\input{content/shmem_put_signal.tex} + +\subsubsection{\textbf{SHMEM\_PUT\_SIGNAL\_NBI}}\label{subsec:shmem_put_signal_nbi} +\input{content/shmem_put_signal_nbi.tex} + +\subsubsection{\textbf{SHMEM\_SIGNAL\_FETCH}}\label{subsec:shmem_signal_fetch} +\input{content/shmem_signal_fetch.tex} + + + \subsection{Collective Routines}\label{subsec:coll} \input{content/collective_intro.tex} From 428406bbe73a15458b197348c103599677b024ee Mon Sep 17 00:00:00 2001 From: "David M. Ozog" Date: Thu, 27 Jun 2019 16:30:32 -0400 Subject: [PATCH 233/319] Match type size in reduce example, add finalize() Signed-off-by: David M. Ozog --- example_code/shmem_reduce_example.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/example_code/shmem_reduce_example.c b/example_code/shmem_reduce_example.c index 3c2c7a0b8..68977a0e4 100644 --- a/example_code/shmem_reduce_example.c +++ b/example_code/shmem_reduce_example.c @@ -23,15 +23,15 @@ int main(void) int npes = shmem_n_pes(); size_t num = 32; - long *values = shmem_malloc(num * sizeof(int)); - long *sums = shmem_malloc(num * sizeof(int)); - - unsigned char *valid_me = shmem_malloc(num * sizeof(int)); - unsigned char *valid_all = shmem_malloc(num * sizeof(int)); + long *values = shmem_malloc(num * sizeof(long)); + long *sums = shmem_malloc(num * sizeof(long)); + + unsigned char *valid_me = shmem_malloc(num * sizeof(unsigned char)); + unsigned char *valid_all = shmem_malloc(num * sizeof(unsigned char)); values[0] = recv_a_value((unsigned)me, npes); valid_me[0] = is_valid(values[0], npes); - + for (int i=1; i < num; i++) { values[i] = recv_a_value((unsigned)values[i-1], npes); valid_me[i] = is_valid(values[i], npes); @@ -58,5 +58,7 @@ int main(void) printf ("[%d] = invalid on one or more pe\n", i); } } + + shmem_finalize(); + return 0; } - From 27232a5197f78eece986224574f4a14a12331878 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Mon, 8 Jul 2019 14:10:39 -0400 Subject: [PATCH 234/319] Add advice to implementors to wait --- content/shmem_wait_until.tex | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/content/shmem_wait_until.tex b/content/shmem_wait_until.tex index d96b7118d..ad2d8e9b8 100644 --- a/content/shmem_wait_until.tex +++ b/content/shmem_wait_until.tex @@ -90,6 +90,16 @@ where \VAR{cmp} is \CONST{SHMEM\_CMP\_NE}. } +\apiimpnotes{ + Some platforms may allow wait operations to efficiently poll or block on an + update to \VAR{ivar}. On others, an atomic-compatible read operation may be + needed to preserve the atomicity of updates to \VAR{ivar}. On platforms + where atomic-compatible read operations negatively impact performance, + implementations may be able to limit the number of atomic-compatible reads + performed by using non-atomic reads of \VAR{ivar} to wait for a change to + occur, followed by an atomic-compatible read operation to fetch the updated + value. +} \begin{apiexamples} From 714439393706acefbda6c5fc778265bcaaeff3ef Mon Sep 17 00:00:00 2001 From: James Dinan Date: Mon, 8 Jul 2019 14:26:44 -0400 Subject: [PATCH 235/319] Add AMO and read fence text to wait/test multiple APIs Signed-off-by: James Dinan --- content/shmem_test_all.tex | 17 +++++++++++------ content/shmem_test_any.tex | 25 +++++++++++++++---------- content/shmem_test_some.tex | 23 ++++++++++++++--------- content/shmem_wait_until_all.tex | 25 ++++++++++++------------- content/shmem_wait_until_any.tex | 19 +++++++++---------- content/shmem_wait_until_some.tex | 19 +++++++++---------- 6 files changed, 70 insertions(+), 58 deletions(-) diff --git a/content/shmem_test_all.tex b/content/shmem_test_all.tex index 601cf4f3b..ee044f0f3 100644 --- a/content/shmem_test_all.tex +++ b/content/shmem_test_all.tex @@ -34,12 +34,14 @@ \apidescription{ The \FUNC{shmem\_test\_all} routine indicates whether all entries in the test set specified by \VAR{ivars} and \VAR{status} have satisfied the test - condition at the calling \ac{PE}. This routine does not block and returns zero if - not all entries in \VAR{ivars} satisfied the test condition. This routine - compares each of the \VAR{nelems} elements in the \VAR{ivars} array with - the value \VAR{cmp\_value} according to the comparison operator \VAR{cmp} - at the calling \ac{PE}. - If \VAR{nelems} is 0, the test set is empty and this routine returns 1. + condition at the calling \ac{PE}. The \VAR{ivar} object at the calling + \ac{PE} may be updated by an \ac{AMO} performed by a thread located within + the calling \ac{PE} or within another \ac{PE}. This routine does not block + and returns zero if not all entries in \VAR{ivars} satisfied the test + condition. This routine compares each of the \VAR{nelems} elements in the + \VAR{ivars} array with the value \VAR{cmp\_value} according to the + comparison operator \VAR{cmp} at the calling \ac{PE}. If \VAR{nelems} is + 0, the test set is empty and this routine returns 1. The optional \VAR{status} is a mask array of length \VAR{nelems} where each element corresponds to the respective element in \VAR{ivars} and indicates whether @@ -50,6 +52,9 @@ ignored and all elements in \VAR{ivars} are included in the test set. The \VAR{ivars}, \VAR{indices}, and \VAR{status} arrays must not overlap in memory. + + Implementations must ensure that \FUNC{shmem\_test\_all} does not return 1 + before the update of the memory indicated by \VAR{ivar} is fully complete. } \apireturnvalues{ diff --git a/content/shmem_test_any.tex b/content/shmem_test_any.tex index bd2462242..217c30498 100644 --- a/content/shmem_test_any.tex +++ b/content/shmem_test_any.tex @@ -33,16 +33,18 @@ \end{apiarguments} \apidescription{ - The \FUNC{shmem\_test\_any} routine indicates whether any entry in the - test set specified by \VAR{ivars} and \VAR{status} has satisfied the test - condition at the calling \ac{PE}. This routine does not block and returns \CONST{SIZE\_MAX} if - no entries in \VAR{ivars} satisfied the test condition. This routine - compares each of the \VAR{nelems} elements in the \VAR{ivars} array with - the value \VAR{cmp\_value} according to the comparison operator \VAR{cmp} - at the calling \ac{PE}. The order in which these elements are tested is - unspecified. If an entry $i$ in \VAR{ivars} within the test set satisfies - the test condition, a series of calls to \FUNC{shmem\_test\_any} must - eventually return $i$. + The \FUNC{shmem\_test\_any} routine indicates whether any entry in the test + set specified by \VAR{ivars} and \VAR{status} has satisfied the test + condition at the calling \ac{PE}. The \VAR{ivar} object at the calling + \ac{PE} may be updated by an \ac{AMO} performed by a thread located within + the calling \ac{PE} or within another \ac{PE}. This routine does not block + and returns \CONST{SIZE\_MAX} if no entries in \VAR{ivars} satisfied the + test condition. This routine compares each of the \VAR{nelems} elements in + the \VAR{ivars} array with the value \VAR{cmp\_value} according to the + comparison operator \VAR{cmp} at the calling \ac{PE}. The order in which + these elements are tested is unspecified. If an entry $i$ in \VAR{ivars} + within the test set satisfies the test condition, a series of calls to + \FUNC{shmem\_test\_any} must eventually return $i$. The optional \VAR{status} is a mask array of length \VAR{nelems} where each element corresponds to the respective element in \VAR{ivars} and indicates whether @@ -53,6 +55,9 @@ null pointer, it is ignored and all elements in \VAR{ivars} are included in the test set. The \VAR{ivars} and \VAR{status} arrays must not overlap in memory. + + Implementations must ensure that \FUNC{shmem\_test\_any} does not return 1 + before the update of the memory indicated by \VAR{ivar} is fully complete. } \apireturnvalues{ diff --git a/content/shmem_test_some.tex b/content/shmem_test_some.tex index e619e293a..4f26d6c58 100644 --- a/content/shmem_test_some.tex +++ b/content/shmem_test_some.tex @@ -37,15 +37,17 @@ \apidescription{ The \FUNC{shmem\_test\_some} routine indicates whether at least one entry in the test set specified by \VAR{ivars} and \VAR{status} satisfies the - test condition at the calling \ac{PE}. This routine does not block and returns zero if - no entries in \VAR{ivars} satisfied the test condition. This routine - compares each element of the \VAR{ivars} array in the test set with the - value \VAR{cmp\_value} according to the comparison operator \VAR{cmp} at - the calling \ac{PE}. This routine tests all elements of \VAR{ivars} in the - test set at least once, and the order in which the elements are tested is - unspecified. If an entry $i$ in \VAR{ivars} within the test set satisfies - the test condition, a series of calls to \FUNC{shmem\_test\_some} must - eventually return $i$. + test condition at the calling \ac{PE}. The \VAR{ivar} object at the + calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located + within the calling \ac{PE} or within another \ac{PE}. This routine does + not block and returns zero if no entries in \VAR{ivars} satisfied the test + condition. This routine compares each element of the \VAR{ivars} array in + the test set with the value \VAR{cmp\_value} according to the comparison + operator \VAR{cmp} at the calling \ac{PE}. This routine tests all elements + of \VAR{ivars} in the test set at least once, and the order in which the + elements are tested is unspecified. If an entry $i$ in \VAR{ivars} within + the test set satisfies the test condition, a series of calls to + \FUNC{shmem\_test\_some} must eventually return $i$. Upon return, the \VAR{indices} array contains the indices of the elements in the test set that satisfied the test condition during the call to @@ -69,6 +71,9 @@ empty and this routine returns 0. If \VAR{status} is a null pointer, it is ignored and all elements in \VAR{ivars} are included in the test set. The \VAR{ivars}, \VAR{indices}, and \VAR{status} arrays must not overlap in memory. + + Implementations must ensure that \FUNC{shmem\_test\_some} does not return 1 + before the update of the memory indicated by \VAR{ivar} is fully complete. } \apireturnvalues{ diff --git a/content/shmem_wait_until_all.tex b/content/shmem_wait_until_all.tex index 8b947e099..ff1b529b5 100644 --- a/content/shmem_wait_until_all.tex +++ b/content/shmem_wait_until_all.tex @@ -33,12 +33,14 @@ \apidescription{ The \FUNC{shmem\_wait\_until\_all} routine waits until all entries in the - wait set specified by \VAR{ivars} and \VAR{status} have satisfied the wait condition at the - calling \ac{PE}. If \VAR{nelems} is 0, the wait set is empty and this routine returns immediately. - This routine is semantically similar to - \FUNC{shmem\_wait\_until} in Section~\ref{subsec:shmem_wait_until}, but - adds support for point-to-point synchronization involving an array of - symmetric data objects. + wait set specified by \VAR{ivars} and \VAR{status} have satisfied the wait + condition at the calling \ac{PE}. The \VAR{ivar} object at the calling + \ac{PE} may be updated by an \ac{AMO} performed by a thread located within + the calling \ac{PE} or within another \ac{PE}. If \VAR{nelems} is 0, the + wait set is empty and this routine returns immediately. This routine is + semantically similar to \FUNC{shmem\_wait\_until} in + Section~\ref{subsec:shmem_wait_until}, but adds support for point-to-point + synchronization involving an array of symmetric data objects. The optional \VAR{status} is a mask array of length \VAR{nelems} where each element corresponds to the respective element in \VAR{ivars} and indicates @@ -49,6 +51,10 @@ immediately. If \VAR{status} is a null pointer, it is ignored and all elements in \VAR{ivars} are included in the wait set. The \VAR{ivars} and \VAR{status} arrays must not overlap in memory. + + Implementations must ensure that \FUNC{shmem\_wait\_until\_all} does not + return before the update of the memory indicated by \VAR{ivar} is fully + complete. } @@ -60,13 +66,6 @@ None. } -\apiimpnotes{ - Implementations must ensure that \FUNC{shmem\_wait\_until\_all} does not - return before the update of the memory indicated by \VAR{ivars} is fully - complete. Partial updates to the memory must not cause - \FUNC{shmem\_wait\_until\_all} to return. -} - \begin{apiexamples} \apicexample diff --git a/content/shmem_wait_until_any.tex b/content/shmem_wait_until_any.tex index 349c1ae75..063978bbe 100644 --- a/content/shmem_wait_until_any.tex +++ b/content/shmem_wait_until_any.tex @@ -35,9 +35,11 @@ \apidescription{ The \FUNC{shmem\_wait\_until\_any} routine waits until any one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies the wait - condition at the calling \ac{PE}. The order in which these elements are - waited upon is unspecified. If an entry $i$ in \VAR{ivars} within the wait - set satisfies the wait condition, a series of calls to + condition at the calling \ac{PE}. The \VAR{ivar} object at the calling + \ac{PE} may be updated by an \ac{AMO} performed by a thread located within + the calling \ac{PE} or within another \ac{PE}. The order in which these + elements are waited upon is unspecified. If an entry $i$ in \VAR{ivars} + within the wait set satisfies the wait condition, a series of calls to \FUNC{shmem\_wait\_until\_any} must eventually return $i$. The optional \VAR{status} is a mask array of length \VAR{nelems} where each @@ -50,6 +52,10 @@ \VAR{status} is a null pointer, it is ignored and all elements in \VAR{ivars} are included in the wait set. The \VAR{ivars} and \VAR{status} arrays must not overlap in memory. + + Implementations must ensure that \FUNC{shmem\_wait\_until\_any} does not + return before the update of the memory indicated by \VAR{ivar} is fully + complete. } \apireturnvalues{ @@ -62,13 +68,6 @@ None. } -\apiimpnotes{ - Implementations must ensure that \FUNC{shmem\_wait\_until\_any} does not - return before the update of the memory indicated by the completed index of \VAR{ivars} is fully - executed. Partial updates to the memory must not cause - \FUNC{shmem\_wait\_until\_any} to return. -} - \begin{apiexamples} \apicexample diff --git a/content/shmem_wait_until_some.tex b/content/shmem_wait_until_some.tex index 2729f09ba..bfbf76f8c 100644 --- a/content/shmem_wait_until_some.tex +++ b/content/shmem_wait_until_some.tex @@ -37,9 +37,11 @@ \apidescription{ The \FUNC{shmem\_wait\_until\_some} routine waits until at least one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies the - wait condition at the calling \ac{PE}. This routine tests all elements of - \VAR{ivars} in the wait set at least once, and the order in which the - elements are waited upon is unspecified. + wait condition at the calling \ac{PE}. The \VAR{ivar} object at the + calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located + within the calling \ac{PE} or within another \ac{PE}. This routine tests + all elements of \VAR{ivars} in the wait set at least once, and the order in + which the elements are waited upon is unspecified. Upon return, the \VAR{indices} array contains the indices of at least one element in the wait set that satisfied the wait condition during the call @@ -66,6 +68,10 @@ and all elements in \VAR{ivars} are included in the wait set. The \VAR{ivars}, \VAR{indices}, and \VAR{status} arrays must not overlap in memory. + + Implementations must ensure that \FUNC{shmem\_wait\_until\_some} does not + return before the update of the memory indicated by \VAR{ivar} is fully + complete. } @@ -78,13 +84,6 @@ None. } -\apiimpnotes{ - Implementations must ensure that \FUNC{shmem\_wait\_until\_some} does not - return before the update of the memory indicated by the completed indices of \VAR{ivars} is fully - executed. Partial updates to the memory must not cause - \FUNC{shmem\_wait\_until\_some} to return. -} - \begin{apiexamples} \apicexample From acc1b54273e867eae8131cedc908b0d938479576 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Mon, 8 Jul 2019 14:42:48 -0400 Subject: [PATCH 236/319] Add changelog entries --- content/backmatter.tex | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/content/backmatter.tex b/content/backmatter.tex index 31bba3188..46c6e0f03 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -510,6 +510,15 @@ \section{Version 1.5} The following list describes the specific changes in \openshmem[1.5]: \begin{itemize} % +\item Clarified that point-to-point synchronization routines preserve the + atomicity of OpenSHMEM \acp{AMO}. +\\ See Section~\ref{subsec:amo_guarantees}. +% +\item Clarified that symmetric variables used as \VAR{ivar} arguments to + point-to-point synchronization routines must be updated using OpenSHMEM + \acp{AMO}. +\\ See Section~\ref{subsec:p2p_intro}. +% \item Added support for multipliers in \VAR{SHMEM\_SYMMETRIC\_SIZE} environment variables. \\ See Section \ref{subsec:environment_variables}. From bdedc543bfb62ec7038f7e5628e932c25ea95828 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 8 Jul 2019 19:19:57 -0500 Subject: [PATCH 237/319] Change point-to-point phrasing in teams intro --- content/teams_intro.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/teams_intro.tex b/content/teams_intro.tex index be736b8fd..de3d730b7 100644 --- a/content/teams_intro.tex +++ b/content/teams_intro.tex @@ -1,11 +1,11 @@ The \acp{PE} in an \openshmem program communicate using either -point-to-point routines, which specify the \ac{PE} number of the target +\ac{RMA} and \ac{AMO} routines, which specify the \ac{PE} number of the target \ac{PE}, or collective routines, which operate over a set of \acp{PE}. In \openshmem, teams allow programs to group a set of \acp{PE} for communication. Team-based collective communications operate across all the \acp{PE} in a valid team. -Point-to-point communication can make use of team-relative \ac{PE} +\ac{RMA} and \ac{AMO} communication can make use of team-relative \ac{PE} numbering through team-based contexts (see Section~\ref{sec:ctx}) or \ac{PE} number translation. From a7d13d8703c0877f932e219b5931a9173aeb920e Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 8 Jul 2019 19:34:19 -0500 Subject: [PATCH 238/319] Cleanup team creation discussion in intro --- content/teams_intro.tex | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/content/teams_intro.tex b/content/teams_intro.tex index de3d730b7..baea3d152 100644 --- a/content/teams_intro.tex +++ b/content/teams_intro.tex @@ -74,7 +74,12 @@ \subsubsection*{Team Creation} Team creation is a collective operation on the parent team object. New teams result from a \FUNC{shmem\_team\_split\_*} routine, which takes a parent team and other arguments and produces new teams that are a subset of the parent -team. Teams that are created by a \FUNC{shmem\_team\_split\_*} routine may be +team. All \acp{PE} in a parent team must participate in a split operation +to create new teams. If a \ac{PE} from the parent team is not a member of any +resulting new teams, it will receive a value of \CONST{SHMEM\_TEAM\_INVALID} +as the value for the new team handle. + +Teams that are created by a \FUNC{shmem\_team\_split\_*} routine may be provided a configuration argument that specifies attributes of each new team. This configuration argument is of type \CTYPE{shmem\_team\_config\_t}, which is detailed further in Section~\ref{subsec:shmem_team_config_t}. @@ -85,18 +90,8 @@ \subsubsection*{Team Creation} As with any collective routine on a team, team creation is matched across PEs based on ordering. So, team creation events must occur in the same order on all \acp{PE} -in the parent team. Additionally, there must not be team creation -operations from the same parent team simultaneously occurring that involve -the same \acp{PE} in any resulting child teams. - -\begin{itemize} -\item[] The following rule of practice will avoid any conflicts on team -object resources during team creation: -\item[] \emph{When a parent team is split multiple times, and the resulting child teams -have overlapping membership, the program must call the \FUNC{shmem\_team\_sync} -routine on the parent team between subsequent calls to split routines.} -\end{itemize} - -Upon completion of a team creation operation, any resulting child teams will be -immediately usable for any team-based operations, including creating new child teams, -without any intervening synchronization. +in the parent team. + +Upon completion of a team creation operation, the parent and any resulting child teams +will be immediately usable for any team-based operations, including creating new child +teams, without any intervening synchronization. From 3da9243be443641ba233142dc9aff6865a91c78f Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Mon, 8 Jul 2019 19:42:57 -0500 Subject: [PATCH 239/319] Update put-with-signal proposal The following changes are made: 1. Create a new section for generic put-with-signal atomicity 2. Create a new section for signal-operator explanation 3. Cleanup fetch-signal and put-with-signal operation descriptions --- content/shmem_put_signal.tex | 29 +++++------------ content/shmem_put_signal_nbi.tex | 24 +++------------ content/shmem_signal_fetch.tex | 17 +++++----- content/shmem_signal_wait_until.tex | 13 ++++---- main_spec.tex | 48 +++++++++++++++++++++++++++-- 5 files changed, 71 insertions(+), 60 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 6598d18f2..84ff894d7 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -39,8 +39,8 @@ \apiargument{IN}{source}{Data object containing the data to be copied.} \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd.} - \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote - \ac{PE} as the signal. This signal data object must be remotely accessible.} + \apiargument{OUT}{sig\_addr}{signal data object to be updated on the remote + \ac{PE} as a signal. This signal data object must be remotely accessible.} \apiargument{IN}{signal}{Unsigned 64-bit value that is used for updating the remote \VAR{sig\_addr} signal data object.} \apiargument{IN}{sig\_op}{Signal operator that represents the type of update @@ -61,26 +61,11 @@ \VAR{signal} flag on the remote \ac{PE} indicates the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. - An update to the \VAR{sig\_addr} signal data object through a put-with-signal - routine completes as if performed atomically with respect to the following: - \begin{itemize} - \item other blocking or non-blocking variant of the put-with-signal - routine that updates the \VAR{sig\_addr} signal data object using the - same \VAR{sig\_op} signal update operator, - \item \FUNC{shmem\_signal\_fetch} routine that fetches the - \VAR{sig\_addr} signal data object on the specified remote \ac{PE}, and - \item any point-to-point synchronization routine that accesses the - \VAR{sig\_addr} signal data object. - \end{itemize} - With the above described atomicity guarantees, the following options can be - used as the \VAR{sig\_op} signal operator. - - \apitablerow{\LibConstRef{SHMEM\_SIGNAL\_SET}}{An update to \VAR{sig\_addr} - signal data object is an atomic set operation. It writes the \VAR{signal} - value into \VAR{sig\_addr} on \VAR{PE} as an atomic operation.} - \apitablerow{\LibConstRef{SHMEM\_SIGNAL\_ADD}}{An update to \VAR{sig\_addr} - signal data object is an atomic add operation. It adds the \VAR{signal} - value into \VAR{sig\_addr} on \VAR{PE} as an atomic operation.} + An update to the \VAR{sig\_addr} signal data object through a + put-with-signal routine completes as if performed atomically as described in + Section~\ref{subsec:signal_atomicity}. The various options as described in + Section~\ref{subsec:signal_operator} can be used as the \VAR{sig\_op} signal + operator. } \apireturnvalues{ diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index b2f195ba8..def87af14 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -70,26 +70,10 @@ performed on the remote \VAR{sig\_addr} signal data object. An update to the \VAR{sig\_addr} signal data object through a non-blocking - put-with-signal routine completes as if performed atomically with respect to - the following: - \begin{itemize} - \item other blocking or non-blocking variant of the put-with-signal - routine that updates the \VAR{sig\_addr} signal data object using the - same \VAR{sig\_op} signal update operator, - \item \FUNC{shmem\_signal\_fetch} routine that fetches the - \VAR{sig\_addr} signal data object on the specified remote \ac{PE}, and - \item any point-to-point synchronization routine that accesses the - \VAR{sig\_addr} signal data object. - \end{itemize} - With the above described atomicity guarantees, the following options can be - used as the \VAR{sig\_op} signal operator. - - \apitablerow{\LibConstRef{SHMEM\_SIGNAL\_SET}}{An update to \VAR{sig\_addr} - signal data object is an atomic set operation. It writes the \VAR{signal} - value into \VAR{sig\_addr} on \VAR{PE} as an atomic operation.} - \apitablerow{\LibConstRef{SHMEM\_SIGNAL\_ADD}}{An update to \VAR{sig\_addr} - signal data object is an atomic add operation. It adds the \VAR{signal} - value into \VAR{sig\_addr} on \VAR{PE} as an atomic operation.} + put-with-signal routine completes as if performed atomically as described in + Section~\ref{subsec:signal_atomicity}. The various options as described in + Section~\ref{subsec:signal_operator} can be used as the \VAR{sig\_op} signal + operator. } \apireturnvalues{ diff --git a/content/shmem_signal_fetch.tex b/content/shmem_signal_fetch.tex index 233e49dff..c838742a7 100644 --- a/content/shmem_signal_fetch.tex +++ b/content/shmem_signal_fetch.tex @@ -1,12 +1,12 @@ \color{ForestGreen} \apisummary{ - Atomically fetches the signal update on a local data object. + Fetches the signal update on a local data object. } \begin{apidefinition} \begin{Csynopsis} -uint64_t @\FuncDecl{shmem\_signal\_fetch}@(const TYPE *sig_addr); +uint64_t @\FuncDecl{shmem\_signal\_fetch}@(const uint64_t *sig_addr); \end{Csynopsis} \begin{apiarguments} @@ -14,17 +14,14 @@ \end{apiarguments} \apidescription{ - \FUNC{shmem\_signal\_fetch} performs an atomic fetch operation and returns - the contents of the \VAR{sig\_addr} signal data object as an atomic operation. - In an \openshmem program with single-threaded or multithreaded \acp{PE}, the - \VAR{sig\_addr} object at the calling \ac{PE} is expected only to be updated - as a signal, through the signalling operations available in - Section~\ref{subsec:shmem_put_signal} and - Section~\ref{subsec:shmem_put_signal_nbi}. + \FUNC{shmem\_signal\_fetch} performs a fetch operation and returns the + contents of the \VAR{sig\_addr} signal data object. Access to + \VAR{sig\_addr} signal object at the calling \ac{PE} is expected to satisfy + the atomicity guarantees as described in Section~\ref{subsec:signal_atomicity}. } \apireturnvalues{ - Returns the contents of the symmetric data object, \VAR{sig\_addr}, at the + Returns the contents of the signal data object, \VAR{sig\_addr}, at the calling \ac{PE}. } diff --git a/content/shmem_signal_wait_until.tex b/content/shmem_signal_wait_until.tex index d00947205..0825db9fc 100644 --- a/content/shmem_signal_wait_until.tex +++ b/content/shmem_signal_wait_until.tex @@ -1,6 +1,7 @@ \color{ForestGreen} \apisummary{ - Wait for a variable on the local \ac{PE} to change from a signaling operation. + Wait for a variable on the local \ac{PE} to change from a signaling + operation. } \begin{apidefinition} @@ -21,10 +22,10 @@ \apidescription{ \FUNC{shmem\_signal\_wait\_until} operation blocks until the value contained - in the symmetric data object, \VAR{sig\_addr}, at the calling \ac{PE} - satisfies the wait condition. In an \openshmem program with single-threaded - or multithreaded \acp{PE}, the \VAR{sig\_addr} object at the calling \ac{PE} - is expected only to be updated as a signal, through the signalling operations + in the signal data object, \VAR{sig\_addr}, at the calling \ac{PE} satisfies + the wait condition. In an \openshmem program with single-threaded or + multithreaded \acp{PE}, the \VAR{sig\_addr} object at the calling \ac{PE} is + expected only to be updated as a signal, through the signalling operations available in Section~\ref{subsec:shmem_put_signal} and Section~\ref{subsec:shmem_put_signal_nbi}. @@ -36,7 +37,7 @@ } \apireturnvalues{ - Return the contents of the symmetric data object, \VAR{sig\_addr}, at the + Return the contents of the signal data object, \VAR{sig\_addr}, at the calling \ac{PE} that satisfies the wait condition. } diff --git a/main_spec.tex b/main_spec.tex index 285997c69..9851ae47b 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -23,7 +23,7 @@ \section{Execution Model}\label{subsec:execution_model} \input{content/execution_model} \section{Language Bindings and Conformance}\label{subsec:bindings} -\input{content/language_bindings_and_conformance} +\input{content/language_bindings_and_conformance}re \section{Library Constants}\label{subsec:library_constants} \input{content/library_constants} @@ -252,7 +252,51 @@ \subsubsection{\textbf{SHMEM\_ATOMIC\_XOR}} -\subsection{Signalling Operations}\label{sec:shmem_signal} +\color{ForestGreen} +\subsection{Signaling Operations}\label{sec:shmem_signal} +This section specifies the OpenSHMEM support for \emph{put-with-signal}, +non-blocking \emph{put-with-signal}, and \emph{signal-fetch} routines. The +put-with-signal routines provide a method for copying data from a contiguous +local data object to a data object on a specified \ac{PE} and subsequently +updating a remote flag to signal completion. The signal-fetch routine provides +support for fetching a signal update operation. + +\subsubsection{Atomicity Guarantees of Signalling Operations} +\label{subsec:signal_atomicity} +All signaling operations put-with-signal, non-blocking put-with-signal, and +signal-fetch are performed on a signal data object, a remotely accessible +symmetric object of type \VAR{uint64\_t}. A signal operator in the +put-with-signal routine is a \openshmem library constant that determines the +type of update to be performed as a signal on the signal data object. + +All signaling operations on the signal data object completes as if performed +atomically with respect to the following: +\begin{itemize} + \item other blocking or non-blocking variant of the put-with-signal routine + that updates the signal data object using the same signal update operator; + \item signal-fetch routine that fetches the signal data object; and + \item any point-to-point synchronization routine that accesses the signal + data object. +\end{itemize} + +\subsubsection{Available Signal Operators} +\label{subsec:signal_operator} + +With the atomicity guarantees as described in +Section~\ref{subsec:signal_atomicity}, the following options can be used as a +signal operator. + + \apitablerow{\LibConstRef{SHMEM\_SIGNAL\_SET}}{An update to signal data + object is an atomic set operation. It writes an unsigned 64-bit value as a + signal into the signal data object on a remote \VAR{PE} as an atomic + operation.} + + \apitablerow{\LibConstRef{SHMEM\_SIGNAL\_ADD}}{An update to signal data + object is an atomic add operation. It adds an unsigned 64-bit value as a + signal into the signal data object on a remote \VAR{PE} as an atomic + operation.} + +\color{Black} \subsubsection{\textbf{SHMEM\_PUT\_SIGNAL}}\label{subsec:shmem_put_signal} \input{content/shmem_put_signal.tex} From 84c554453d2036d7951f682063516fde5530c6ca Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Mon, 8 Jul 2019 19:46:08 -0500 Subject: [PATCH 240/319] Minor spell-check fix --- content/shmem_signal_wait_until.tex | 2 +- main_spec.tex | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_signal_wait_until.tex b/content/shmem_signal_wait_until.tex index 0825db9fc..81502a0b7 100644 --- a/content/shmem_signal_wait_until.tex +++ b/content/shmem_signal_wait_until.tex @@ -25,7 +25,7 @@ in the signal data object, \VAR{sig\_addr}, at the calling \ac{PE} satisfies the wait condition. In an \openshmem program with single-threaded or multithreaded \acp{PE}, the \VAR{sig\_addr} object at the calling \ac{PE} is - expected only to be updated as a signal, through the signalling operations + expected only to be updated as a signal, through the signaling operations available in Section~\ref{subsec:shmem_put_signal} and Section~\ref{subsec:shmem_put_signal_nbi}. diff --git a/main_spec.tex b/main_spec.tex index 9851ae47b..808e8ac07 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -261,7 +261,7 @@ \subsection{Signaling Operations}\label{sec:shmem_signal} updating a remote flag to signal completion. The signal-fetch routine provides support for fetching a signal update operation. -\subsubsection{Atomicity Guarantees of Signalling Operations} +\subsubsection{Atomicity Guarantees of Signaling Operations} \label{subsec:signal_atomicity} All signaling operations put-with-signal, non-blocking put-with-signal, and signal-fetch are performed on a signal data object, a remotely accessible From d082f0cdb43e42304360a377adb690171ed9203c Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Mon, 8 Jul 2019 21:02:06 -0500 Subject: [PATCH 241/319] Add put-with-signal in memory ordering routines --- content/shmem_fence.tex | 32 ++++++++++++++++++-------------- content/shmem_quiet.tex | 22 +++++++++++----------- main_spec.tex | 7 ++++--- 3 files changed, 33 insertions(+), 28 deletions(-) diff --git a/content/shmem_fence.tex b/content/shmem_fence.tex index 9583c07b7..2ca0a1df1 100644 --- a/content/shmem_fence.tex +++ b/content/shmem_fence.tex @@ -1,6 +1,7 @@ \apisummary{ - Assures ordering of delivery of \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} routines - to symmetric data objects. + Assures ordering of delivery of \PUT{}, \ac{AMO}, \emph{put-with-signal}, + memory store, nonblocking \PUT{}, and \emph{put-with-signal} + routines to symmetric data objects. } \begin{apidefinition} @@ -21,12 +22,15 @@ \end{apiarguments} \apidescription{ - This routine assures ordering of delivery of \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} - routines to symmetric data objects. All \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} - routines to symmetric data objects issued to a particular remote \ac{PE} + This routine assures ordering of delivery of \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, nonblocking \PUT{}, + and \emph{put-with-signal} + routines to symmetric data objects. All \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, nonblocking \PUT{}, + and \emph{put-with-signal} + routines to symmetric data objects issued to a particular remote \ac{PE} on the given context prior to the call to \FUNC{shmem\_fence} are guaranteed to be delivered before any - subsequent \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} routines to symmetric data + subsequent \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, nonblocking \PUT{}, and \emph{put-with-signal} + routines to symmetric data objects to the same \ac{PE}. \FUNC{shmem\_fence} guarantees order of delivery, not completion. It does not guarantee order of delivery of nonblocking \GET{} routines. If \VAR{ctx} has the value \CONST{SHMEM\_CTX\_INVALID}, no operation is @@ -39,24 +43,24 @@ \apinotes{ \FUNC{shmem\_fence} only provides per-\ac{PE} ordering guarantees and does not - guarantee completion of delivery. - \FUNC{shmem\_fence} also does not have an effect on the ordering between memory + guarantee completion of delivery. + \FUNC{shmem\_fence} also does not have an effect on the ordering between memory accesses issued by the target PE. \FUNC{shmem\_wait\_until}, \FUNC{shmem\_test}, - \FUNC{shmem\_barrier}, \FUNC{shmem\_barrier\_all} routines can be called by the target PE to guarantee + \FUNC{shmem\_barrier}, \FUNC{shmem\_barrier\_all} routines can be called by the target PE to guarantee ordering of its memory accesses. There is a subtle difference between \FUNC{shmem\_fence} and \FUNC{shmem\_quiet}, in that, \FUNC{shmem\_quiet} - guarantees completion of \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} routines to + guarantees completion of \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, nonblocking \PUT{}, and \emph{put-with-signal} routines to symmetric data objects which makes the updates visible to all other - \acp{PE}. - + \acp{PE}. + The \FUNC{shmem\_quiet} routine should be called if completion of \PUT{}, - \ac{AMO}, memory store, and nonblocking \PUT{} routines to symmetric data objects is desired + \ac{AMO}, \emph{put-with-signal}, memory store, nonblocking \PUT{}, and \emph{put-with-signal} routines to symmetric data objects is desired when multiple remote \acp{PE} are involved. In an \openshmem program with multithreaded \acp{PE}, it is the user's responsibility to ensure ordering between operations issued by the threads - in a \ac{PE} that target symmetric memory (e.g. \PUT{}, \ac{AMO}, memory stores, + in a \ac{PE} that target symmetric memory (e.g. \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory stores, and nonblocking routines) and calls by threads in that \ac{PE} to \FUNC{shmem\_fence}. The \FUNC{shmem\_fence} routine can enforce memory store ordering only for the calling thread. Thus, to ensure ordering for memory stores performed by a thread that is diff --git a/content/shmem_quiet.tex b/content/shmem_quiet.tex index 98b52ecd8..ab3377a05 100644 --- a/content/shmem_quiet.tex +++ b/content/shmem_quiet.tex @@ -1,6 +1,6 @@ \apisummary{ - Waits for completion of all outstanding \PUT{}, \ac{AMO}, memory store, - and nonblocking \PUT{} and \GET{} routines to symmetric data + Waits for completion of all outstanding \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, + and nonblocking \PUT{}, \emph{put-with-signal} and \GET{} routines to symmetric data objects issued by a \ac{PE}. } @@ -21,13 +21,13 @@ the default context.} \end{apiarguments} -\apidescription{ +\apidescription{ The \FUNC{shmem\_quiet} routine ensures completion of \PUT{}, \ac{AMO}, - memory store, and nonblocking \PUT{} and \GET{} routines on + \emph{put-with-signal}, memory store, and nonblocking \PUT{},\emph{put-with-signal}, and \GET{} routines on symmetric data objects issued by the calling \ac{PE} on the given context. All \PUT{}, \ac{AMO}, - memory store, and nonblocking \PUT{} and \GET{} routines to + \emph{put-with-signal}, memory store, and nonblocking \PUT{}, \emph{put-with-signal}, and \GET{} routines to symmetric data objects are guaranteed to be completed and visible to all - \acp{PE} when \FUNC{shmem\_quiet} returns. + \acp{PE} when \FUNC{shmem\_quiet} returns. If \VAR{ctx} has the value \CONST{SHMEM\_CTX\_INVALID}, no operation is performed. } @@ -37,22 +37,22 @@ None. } -\apinotes{ +\apinotes{ \FUNC{shmem\_quiet} is most useful as a way of ensuring completion of - several \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} - and \GET{} routines to symmetric data objects initiated by the calling + several \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, and nonblocking \PUT{}, + \emph{put-with-signal}, and \GET{} routines to symmetric data objects initiated by the calling \ac{PE}. For example, one might use \FUNC{shmem\_quiet} to await delivery of a block of data before issuing another \PUT{} or nonblocking \PUT{} routine, which sets a completion flag on another \ac{PE}. \FUNC{shmem\_quiet} is not usually needed if \FUNC{shmem\_barrier\_all} or \FUNC{shmem\_barrier} are called. The barrier routines wait for the completion of outstanding writes (\PUT{}, \ac{AMO}, - memory stores, and nonblocking \PUT{} and \GET{} routines) to + \emph{put-with-signal}, memory stores, and nonblocking \PUT{},\emph{put-with-signal}, and \GET{} routines) to symmetric data objects on all \acp{PE}. In an \openshmem program with multithreaded \acp{PE}, it is the user's responsibility to ensure ordering between operations issued by the threads - in a \ac{PE} that target symmetric memory (e.g. \PUT{}, \ac{AMO}, memory stores, + in a \ac{PE} that target symmetric memory (e.g. \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory stores, and nonblocking routines) and calls by threads in that \ac{PE} to \FUNC{shmem\_quiet}. The \FUNC{shmem\_quiet} routine can enforce memory store ordering only for the calling thread. Thus, to ensure ordering for memory stores performed by a thread that is diff --git a/main_spec.tex b/main_spec.tex index 808e8ac07..b3e201b23 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -261,7 +261,7 @@ \subsection{Signaling Operations}\label{sec:shmem_signal} updating a remote flag to signal completion. The signal-fetch routine provides support for fetching a signal update operation. -\subsubsection{Atomicity Guarantees of Signaling Operations} +\subsubsection{Atomicity Guarantees for Signaling Operations} \label{subsec:signal_atomicity} All signaling operations put-with-signal, non-blocking put-with-signal, and signal-fetch are performed on a signal data object, a remotely accessible @@ -379,8 +379,9 @@ \subsubsection{\textbf{SHMEM\_SIGNAL\_WAIT\_UNTIL}}\label{subsec:shmem_signal_wa \subsection{Memory Ordering Routines}\label{subsec:memory_order} The following section discusses \openshmem \acp{API} that provide mechanisms to -ensure ordering and/or delivery of \OPR{Put}, \ac{AMO}, memory store, -and non-blocking \PUT{} and \GET{} routines to symmetric data objects. +ensure ordering and/or delivery of \OPR{Put}, \ac{AMO}, \emph{put-with-signal}, +memory store, and non-blocking \PUT{},\emph{put-with-signal}, and \GET{} +routines to symmetric data objects. \subsubsection{\textbf{SHMEM\_FENCE}}\label{subsec:shmem_fence} \input{content/shmem_fence.tex} From 99514f1f7e53d9d00f553ed46c9689563de45b2e Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 24 Jul 2019 06:43:07 -0400 Subject: [PATCH 242/319] Const qualify config args in team_split_2d; mark as 'IN' args closes #135 --- content/shmem_team_split_2d.tex | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/content/shmem_team_split_2d.tex b/content/shmem_team_split_2d.tex index bfed1b43d..d9c6c4290 100644 --- a/content/shmem_team_split_2d.tex +++ b/content/shmem_team_split_2d.tex @@ -8,8 +8,8 @@ \begin{Csynopsis} int @\FuncDecl{shmem\_team\_split\_2d}@(shmem_team_t parent_team, int xrange, - shmem_team_config_t *xaxis_config, long xaxis_mask, shmem_team_t *xaxis_team, - shmem_team_config_t *yaxis_config, long yaxis_mask, shmem_team_t *yaxis_team); + const shmem_team_config_t *xaxis_config, long xaxis_mask, shmem_team_t *xaxis_team, + const shmem_team_config_t *yaxis_config, long yaxis_mask, shmem_team_t *yaxis_team); \end{Csynopsis} \begin{apiarguments} @@ -19,7 +19,7 @@ \apiargument{IN}{xrange}{A nonnegative integer representing the number of elements in the first dimension.} -\apiargument{INOUT}{xaxis\_config}{ +\apiargument{IN}{xaxis\_config}{ A pointer to the configuration parameters for the new \VAR{x}-axis team.} \apiargument{IN}{xaxis\_mask}{ @@ -30,7 +30,7 @@ subset consisting of all the \acp{PE} that have the same coordinate along the \VAR{x}-axis as the calling \ac{PE}.} -\apiargument{INOUT}{yaxis\_config}{ +\apiargument{IN}{yaxis\_config}{ A pointer to the configuration parameters for the new \VAR{y}-axis team.} \apiargument{IN}{yaxis\_mask}{ From 6bd587bfdbd590fc46bfd04d7b160b814c327e8f Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 24 Jul 2019 07:51:39 -0400 Subject: [PATCH 243/319] Formatting and typo fixes in split_2d --- content/shmem_team_split_2d.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/shmem_team_split_2d.tex b/content/shmem_team_split_2d.tex index d9c6c4290..32d125291 100644 --- a/content/shmem_team_split_2d.tex +++ b/content/shmem_team_split_2d.tex @@ -65,8 +65,8 @@ The \acp{PE} are numbered in the new teams based on the coordinate of the \ac{PE} along the given axis. So, another way to think of the result of the split -operation is that the value returned by \FUNC{shmem\_team\_my\_pe}(\VAR(xteam)) is the -x-coordinate and the value returned by \FUNC{shmem\_team\_my\_pe}(\VAR(yteam)) +operation is that the value returned by \FUNC{shmem\_team\_my\_pe(\VAR{xteam})} is the +x-coordinate and the value returned by \FUNC{shmem\_team\_my\_pe(\VAR{yteam})} is the y-coordinate of the calling \ac{PE}. Any valid \openshmem team can be used as the parent team. This routine must be @@ -82,7 +82,7 @@ The \acp{PE} in the parent team \emph{do not} have to all provide the same parameters for new teams. -The \VAR{xaxis\_mask} and\VAR{xaxis\_mask} arguments are a bitwise masks +The \VAR{xaxis\_mask} and \VAR{yaxis\_mask} arguments are a bitwise masks representing the set of configuration parameters to use from \VAR{xaxis\_config} and \VAR{yaxis\_config}, respectively. A mask value of \CONST{0} indicates that the team From 2f437c36d3dc0f2e4adfac6ab17c6ea897a3afd8 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 24 Jul 2019 07:53:49 -0400 Subject: [PATCH 244/319] Make team_split_strided collective across parent closes #137 --- content/shmem_team_split_strided.tex | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index bfbc75354..6355dc950 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -6,8 +6,8 @@ \begin{apidefinition} \begin{Csynopsis} -int @\FuncDecl{shmem\_team\_split\_strided}@(shmem_team_t parent_team, int start, int stride, - int size, const shmem_team_config_t *config, long config_mask, shmem_team_t *new_team); +int @\FuncDecl{shmem\_team\_split\_strided}@(shmem_team_t parent_team, int start, int stride, int size, + const shmem_team_config_t *config, long config_mask, shmem_team_t *new_team); \end{Csynopsis} \begin{apiarguments} @@ -51,13 +51,18 @@ \end{equation*} where $N$ is the number of \acp{PE} in the parent team. -This routine must be called by all \acp{PE} contained in the \ac{PE} triplet -specification. It may be called by additional \acp{PE} not included in the -triplet specification, but for those \acp{PE} a \VAR{new\_team} value of -\LibConstRef{SHMEM\_TEAM\_INVALID} is returned. All \acp{PE} must provide the -same values for the \ac{PE} triplet. This routine will return a \VAR{new\_team} -containing the \ac{PE} subset specified by the triplet and ordered by the -existing global \ac{PE} number. +This routine must be called by all \acp{PE} in the parent team. +All \acp{PE} must provide the same values for the \ac{PE} triplet. +This routine will return a \VAR{new\_team} containing the \ac{PE} +subset specified by the triplet and ordered by the existing global +\ac{PE} number. + +On successful creation of the new team, the \VAR{new\_team} handle +will reference a valid team for the subset of \acp{PE} in the parent +team specified by the triplet. +Those \acp{PE} in the parent team that are not in the subset specified +by the triplet will have \VAR{new\_team} assigned to +\LibConstRef{SHMEM\_TEAM\_INVALID}. The \VAR{config} argument specifies team configuration parameters, which are described in Section~\ref{subsec:shmem_team_config_t}. @@ -74,7 +79,8 @@ will be created and \VAR{new\_team} will be assigned the value \LibConstRef{SHMEM\_TEAM\_INVALID}; otherwise, the behavior is undefined. -If an invalid \ac{PE} triplet is provided, then the \VAR{new\_team} will not be created. +If an invalid \ac{PE} triplet is provided, then the \VAR{new\_team} +will not be created. If \VAR{new\_team} cannot be created, then it will be assigned the value \LibConstRef{SHMEM\_TEAM\_INVALID}. From 9cee6892b64ad9bab86167a890b2426dc8bae3ca Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 24 Jul 2019 08:01:32 -0400 Subject: [PATCH 245/319] Consolidate deprecation rationale for shmem_barrier closes #138 --- content/shmem_barrier.tex | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/content/shmem_barrier.tex b/content/shmem_barrier.tex index c9ec30512..275f6e71b 100644 --- a/content/shmem_barrier.tex +++ b/content/shmem_barrier.tex @@ -58,12 +58,6 @@ The same \VAR{pSync} array may be reused on consecutive calls to \FUNC{shmem\_barrier} if the same active set is used. -{\color{Green} - \FUNC{shmem\_barrier} has been deprecated in favor of the equivalent - call to \FUNC{shmem\_quiet} followed by a call to - \FUNC{shmem\_sync} on a team or active set with the desired - set of \acp{PE}. -} } \apireturnvalues{ @@ -71,6 +65,16 @@ } \apinotes{ + \newtext{ + As of \openshmem[1.5], \FUNC{shmem\_barrier} has been deprecated. + No team-based barrier is provided by \openshmem, as a team may have any + number of communication contexts associated with the team. + Applications seeking such an idiom should call + \FUNC{shmem\_ctx\_quiet} on the desired communication context, + followed by a call to \FUNC{shmem\_team\_sync} on the desired + team. + } + If the \VAR{pSync} array is initialized at the run time, all \acp{PE} must be synchronized before the first call to \FUNC{shmem\_barrier} (e.g., by \FUNC{shmem\_barrier\_all}) to ensure the array has been initialized @@ -87,14 +91,6 @@ Calls to \FUNC{shmem\_ctx\_quiet} can be performed prior to calling the barrier routine to ensure completion of operations issued on additional contexts. - - \newtext{ - No team-based barrier is provided by \openshmem, as a team may have any - number of communication contexts associated with the team. - Applications seeking such an idiom should call \FUNC{shmem\_ctx\_quiet} - on the desired context, followed by a call to \FUNC{shmem\_team\_sync} - on the desired team. - } } \begin{apiexamples} From 360acea2e0ccaffa53bd3e588af3d34710ba4eaa Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 24 Jul 2019 08:11:40 -0400 Subject: [PATCH 246/319] Fixup shmem_team_translate -> shmem_team_translate_pe in example closes #108 --- example_code/shmem_team_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example_code/shmem_team_context.c b/example_code/shmem_team_context.c index 343417fec..ab7dd12e6 100644 --- a/example_code/shmem_team_context.c +++ b/example_code/shmem_team_context.c @@ -15,7 +15,7 @@ int my_ctx_translate_pe(shmem_ctx_t src_ctx, int src_pe, shmem_ctx_t dest_ctx) shmem_team_t src_team, dest_team; shmem_ctx_get_team(src_ctx, &src_team); shmem_ctx_get_team(dest_ctx, &dest_team); - return shmem_team_translate(src_team, src_pe, dest_pe); + return shmem_team_translate_pe(src_team, src_pe, dest_pe); } shmem_ctx_t my_team_create_ctx(shmem_team_t team) { From 02d31e15b06bc0410bb02c08ff3fb5ec1b51a879 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 24 Jul 2019 08:20:17 -0400 Subject: [PATCH 247/319] split_2d: remove note about parameters and symmetric memory --- content/shmem_team_split_2d.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_team_split_2d.tex b/content/shmem_team_split_2d.tex index 32d125291..50822c037 100644 --- a/content/shmem_team_split_2d.tex +++ b/content/shmem_team_split_2d.tex @@ -72,7 +72,7 @@ Any valid \openshmem team can be used as the parent team. This routine must be called by all \acp{PE} in the parent team. The value of \VAR{xrange} must be nonnegative and all \acp{PE} in the parent team must pass the same value for -\VAR{xrange}. None of the parameters need to reside in symmetric memory. +\VAR{xrange}. The \VAR{xaxis\_config} and \VAR{yaxis\_config} arguments specify team configuration parameters for the \VAR{x}- and \VAR{y}-axis teams, respectively. From f65edc64b338c713c3474c8070a2a7d5a4c1cc19 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 24 Jul 2019 08:53:01 -0400 Subject: [PATCH 248/319] Minor formatting/editorial fixes --- content/shmem_alltoall.tex | 2 +- content/shmem_alltoalls.tex | 6 +++--- content/shmem_broadcast.tex | 2 +- content/shmem_collect.tex | 2 +- content/shmem_reductions.tex | 2 +- content/shmem_sync.tex | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index f6cd5c576..6719ad779 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -166,7 +166,7 @@ \apinotes{ \newtext{% There are no specifically defined error codes for these routines. - See section \ref{subsec:error_handling} for expected error checking and + See Section~\ref{subsec:error_handling} for expected error checking and return code behavior specific to implementations. For portable error checking and debugging behavior, programs should do their own checks for invalid team handles or \LibConstRef{SHMEM\_TEAM\_INVALID}. diff --git a/content/shmem_alltoalls.tex b/content/shmem_alltoalls.tex index ca2a3c5c0..053f79587 100644 --- a/content/shmem_alltoalls.tex +++ b/content/shmem_alltoalls.tex @@ -114,8 +114,8 @@ \ac{PE} \VAR{j}. {\color{Green} - See the description of \FUNC{shmem\_alltoall} in section - \ref{subsec:shmem_alltoall} for: + See the description of \FUNC{shmem\_alltoall} in + Section~\ref{subsec:shmem_alltoall} for: \begin{itemize} \item Data element sizes for the different sized and typed \FUNC{shmem\_alltoalls} variants. \item Rules for \ac{PE} participation in the collective routine. @@ -132,7 +132,7 @@ } \apinotes{ - \newtext{See notes for \FUNC{shmem\_alltoall} in section \ref{subsec:shmem_alltoall}}. + \newtext{See notes for \FUNC{shmem\_alltoall} in Section~\ref{subsec:shmem_alltoall}}. } \begin{apiexamples} diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index d73171003..61ef8d8fa 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -161,7 +161,7 @@ \apinotes{ \newtext{% There are no specifically defined error codes for these routines. - See section \ref{subsec:error_handling} for expected error checking and + See Section~\ref{subsec:error_handling} for expected error checking and return code behavior specific to implementations. For portable error checking and debugging behavior, programs should do their own checks for invalid team handles or \LibConstRef{SHMEM\_TEAM\_INVALID} diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 209099fe0..08910ce7a 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -160,7 +160,7 @@ \apinotes{ \newtext{% There are no specifically defined error codes for these routines. - See section \ref{subsec:error_handling} for expected error checking and + See Section~\ref{subsec:error_handling} for expected error checking and return code behavior specific to implementations. For portable error checking and debugging behavior, programs should do their own checks for invalid team handles or \LibConstRef{SHMEM\_TEAM\_INVALID}. diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 3c5b294cd..d4e69311c 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -437,7 +437,7 @@ \apinotes{ \newtext{% There are no specifically defined error codes for this routine. - See section \ref{subsec:error_handling} for expected error checking and + See Section~\ref{subsec:error_handling} for expected error checking and return code behavior specific to implementations. For portable error checking and debugging behavior, programs should do their own checks for invalid team handles or \LibConstRef{SHMEM\_TEAM\_INVALID} diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 633b44ef5..37f46ba1d 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -51,7 +51,7 @@ {\color{Green} The routine registers the arrival of a \ac{PE} at a synchronization point in the program. This is a fast mechanism for synchronizing all \acp{PE} that participate in this - collective call. The routine blocks the calling \ac{PE} until all \ac{PE} in the + collective call. The routine blocks the calling \ac{PE} until all \acp{PE} in the specified team or active set have called \FUNC{shmem\_sync}. In a multithreaded \openshmem program, only the calling thread is blocked. @@ -90,7 +90,7 @@ \newtext{% There are no specifically defined error codes for sync operations. - See section \ref{subsec:error_handling} for expected error checking and + See Section~\ref{subsec:error_handling} for expected error checking and return code behavior specific to implementations. For portable error checking and debugging behavior, programs should do their own checks for invalid team handles or \LibConstRef{SHMEM\_TEAM\_INVALID} From 86c3333961a7215bb91987322004e58cd97f4487 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 24 Jul 2019 09:07:23 -0400 Subject: [PATCH 249/319] Un-deprecate shmem_sync_all; update deprecation annex This commit un-deprecates shmem_sync_all. Additionally, the shmem_barrier_all function was un-deprecated in 41fa466, but the associated changes to the deprecation annex were missed. This commit removes shmem_{barrier,sync}_all from the deprecation annex. --- content/backmatter.tex | 18 ------------------ content/shmem_sync_all.tex | 2 -- 2 files changed, 20 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 304914e91..b973b4813 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -364,12 +364,6 @@ \section{Overview}\label{subsec:dep_overview} \color{Green} \CorCpp: \FuncRef{shmem\_barrier} & 1.5 & Current & \FUNC{shmem\_quiet}; \FUNC{shmem\_team\_sync} \\ \hline - \color{Green} - \CorCpp: \FuncRef{shmem\_barrier\_all} & 1.5 & Current & - \FUNC{shmem\_quiet}; \FUNC{shmem\_team\_sync}(\LibConstRef{SHMEM\_TEAM\_WORLD}) \\ \hline - \color{Green} - \CorCpp: \FuncRef{shmem\_sync\_all} & 1.5 & Current & - \FUNC{shmem\_team\_sync}(\LibConstRef{SHMEM\_TEAM\_WORLD}) \\ \hline \end{longtable} \end{center} @@ -538,18 +532,6 @@ \subsection{\CorCpp: \FUNC{shmem\_barrier}} followed by a call to \FUNC{shmem\_sync} in order to explicitly indicate which context to quiesce. -\subsection{\CorCpp: \FUNC{shmem\_barrier\_all}, \FUNC{shmem\_sync\_all}} -With the addition of \openshmem teams combined, the notion of all \acp{PE} has -been encapsulated as \LibConstRef{SHMEM\_TEAM\_WORLD}. The previous -method of specifying active sets to \FUNC{shmem\_barrier} and \FUNC{shmem\_sync} -was verbose. So, shorthand versions of these functions were helpful both in -readability and ability to improve performance by not requiring tests of -active set parameters to determine that the routine involved all \acp{PE}. -With teams, becomes readable in a program to simply call a barrier or sync -on \LibConstRef{SHMEM\_TEAM\_WORLD}. Implementations need only test one constant -to determine that the operation involves all \acp{PE}. -} - \chapter{Changes to this Document}\label{sec:changelog} \section{Version 1.5} diff --git a/content/shmem_sync_all.tex b/content/shmem_sync_all.tex index 6179796c9..bd192a660 100644 --- a/content/shmem_sync_all.tex +++ b/content/shmem_sync_all.tex @@ -1,4 +1,3 @@ -\begin{DeprecateBlock} \apisummary{ \newtext{Performs all operations described in the \FUNC{shmem\_sync} interface but implicitly operates on \LibConstRef{SHMEM\_TEAM\_WORLD}.} @@ -41,4 +40,3 @@ } \end{apidefinition} -\end{DeprecateBlock} From 46735a183cd0968b9e171e6e1782f5fb387cfc14 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 24 Jul 2019 10:08:12 -0400 Subject: [PATCH 250/319] Fix change highlighting for shmem_sync[_all] --- content/shmem_sync.tex | 16 +++++++++++++--- content/shmem_sync_all.tex | 37 ++++++++++++++++++++++++++----------- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 37f46ba1d..7f8c25fd9 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -1,7 +1,11 @@ \apisummary{ \newtext{Registers the arrival of a \ac{PE} at a synchronization point and suspends \ac{PE} execution until all other \acp{PE} in a given \openshmem team or active set - arrive at the same synchronization point.} + arrive at a synchronization point.} + \oldtext{% + Performs all operations described in the \FUNC{shmem\_sync\_all} interface + but with respect to a subset of \acp{PE} defined by the active set. + } } \begin{apidefinition} @@ -45,8 +49,13 @@ \end{apiarguments} \apidescription{ - \FUNC{shmem\_sync} is a collective synchronization routine over - \newtext{an existing \openshmem team or} an active set + \FUNC{shmem\_sync} is a collective synchronization routine over an + \newtext{existing \openshmem team or} active set. + \oldtext{% + Control returns from \FUNC{shmem\_sync} after all \acp{PE} in + the active set (specified by \VAR{PE\_start}, \VAR{logPE\_stride}, and + \VAR{PE\_size}) have called \FUNC{shmem\_sync}. + } {\color{Green} The routine registers the arrival of a \ac{PE} at a synchronization point in the program. @@ -84,6 +93,7 @@ \apireturnvalues{ \newtext{Zero on successful local completion. Nonzero otherwise.} + \oldtext{None.} } \apinotes{ diff --git a/content/shmem_sync_all.tex b/content/shmem_sync_all.tex index bd192a660..57a63e1d9 100644 --- a/content/shmem_sync_all.tex +++ b/content/shmem_sync_all.tex @@ -1,6 +1,6 @@ \apisummary{ - \newtext{Performs all operations described in the \FUNC{shmem\_sync} interface - but implicitly operates on \LibConstRef{SHMEM\_TEAM\_WORLD}.} + Registers the arrival of a \ac{PE} at a \newtext{synchronization point} \oldtext{barrier} and suspends \ac{PE} + execution until all other \acp{PE} \newtext{in the default team} arrive at \newtext{a synchronization point} \oldtext{the barrier}. } \begin{apidefinition} @@ -16,19 +16,24 @@ \end{apiarguments} \apidescription{ -{\color{Green} - This routine blocks the \ac{PE} until all \acp{PE} in the \openshmem - program have called \FUNC{shmem\_sync\_all}. In a multithreaded \openshmem - program, only the calling thread is blocked. + + \newtext{% + This routine blocks the calling \ac{PE} until all \acp{PE} in the + default team have called \FUNC{shmem\_sync\_all}. + } + \oldtext{% + The \FUNC{shmem\_sync\_all} routine registers the arrival of a \ac{PE} at a + barrier. Barriers are a fast mechanism for synchronizing all \acp{PE} at + once. This routine blocks the \ac{PE} until all \acp{PE} have called + \FUNC{shmem\_sync\_all}. + } + In a multithreaded \openshmem program, only the calling thread is + blocked. In contrast with the \FUNC{shmem\_barrier\_all} routine, \FUNC{shmem\_sync\_all} only ensures completion and visibility of previously issued memory stores and does not ensure completion of remote memory updates issued via \openshmem routines. - - The \FUNC{shmem\_sync\_all} routine is deprecated in favor of the equivalent call to - \FUNC{shmem\_sync(SHMEM\_TEAM\_WORLD)}. -} } \apireturnvalues{ @@ -36,7 +41,17 @@ } \apinotes{ - None. + \newtext{% + The \FUNC{shmem\_sync\_all} routine is equivalent to calling + \FUNC{shmem\_team\_sync} on the default team. + } + \oldtext{% + The \FUNC{shmem\_sync\_all} routine can be used to portably ensure that + memory access operations observe remote updates in the order enforced by the + initiator \acp{PE}, provided that the initiator PE ensures completion of remote + updates with a call to \FUNC{shmem\_quiet} prior to the call to the + \FUNC{shmem\_sync\_all} routine. + } } \end{apidefinition} From 8f157554b467bad6d41e253fa20a1f26958d47bd Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 24 Jul 2019 10:29:14 -0400 Subject: [PATCH 251/319] Add "default team" reference to symmetric (de|re)allocation routines --- content/shmem_calloc.tex | 3 ++- content/shmem_malloc.tex | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/content/shmem_calloc.tex b/content/shmem_calloc.tex index a29eb2d9f..235e552d5 100644 --- a/content/shmem_calloc.tex +++ b/content/shmem_calloc.tex @@ -15,7 +15,8 @@ \apidescription{ - The \FUNC{shmem\_calloc} routine is a collective operation that allocates a + The \FUNC{shmem\_calloc} routine is a collective operation + \newtext{on the default team} that allocates a region of remotely-accessible memory for an array of \VAR{count} objects of \VAR{size} bytes each and returns a pointer to the lowest byte address of the allocated symmetric diff --git a/content/shmem_malloc.tex b/content/shmem_malloc.tex index c7fe958a1..26ba59ec7 100644 --- a/content/shmem_malloc.tex +++ b/content/shmem_malloc.tex @@ -23,7 +23,7 @@ \apidescription{ The \FUNC{shmem\_malloc}, \FUNC{shmem\_free}, \FUNC{shmem\_realloc}, and \FUNC{shmem\_align} routines are collective operations that require - participation by all \acp{PE}. + participation by all \acp{PE} \newtext{in the default team}. The \FUNC{shmem\_malloc} routine returns a pointer to a block of at least \VAR{size} bytes, which shall be suitably aligned so that it may be From b8e46a8c5e3a3ea0ddd937e04d5c741402fb7422 Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 24 Jul 2019 10:48:52 -0400 Subject: [PATCH 252/319] Editorial clarifications in teams intro --- content/teams_intro.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/teams_intro.tex b/content/teams_intro.tex index baea3d152..4bb06d5c2 100644 --- a/content/teams_intro.tex +++ b/content/teams_intro.tex @@ -1,11 +1,11 @@ The \acp{PE} in an \openshmem program communicate using either -\ac{RMA} and \ac{AMO} routines, which specify the \ac{PE} number of the target +point-to-point routines---such as \ac{RMA} and \ac{AMO} routines---which specify the \ac{PE} number of the target \ac{PE}, or collective routines, which operate over a set of \acp{PE}. In \openshmem, teams allow programs to group a set of \acp{PE} for communication. Team-based collective communications operate across all the \acp{PE} in a valid team. -\ac{RMA} and \ac{AMO} communication can make use of team-relative \ac{PE} +Point-to-point communication can make use of team-relative \ac{PE} numbering through team-based contexts (see Section~\ref{sec:ctx}) or \ac{PE} number translation. @@ -27,7 +27,7 @@ \subsubsection*{Team Handles} A ``team handle'' is an opaque object with type \CTYPE{shmem\_team\_t} that is used to reference a team. -Team handles are not remotely accessible objects +Team handles are not remotely accessible objects. The predefined teams may be accessed via the team handles listed in Section~\ref{subsec:library_handles}. From 58d79007c28983fdf52188e2250c068c38f83463 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Mon, 5 Aug 2019 13:37:46 -0400 Subject: [PATCH 253/319] Revised shmem_wait advice to implementors Signed-off-by: James Dinan --- content/shmem_wait_until.tex | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/content/shmem_wait_until.tex b/content/shmem_wait_until.tex index ad2d8e9b8..d95e57937 100644 --- a/content/shmem_wait_until.tex +++ b/content/shmem_wait_until.tex @@ -92,12 +92,11 @@ \apiimpnotes{ Some platforms may allow wait operations to efficiently poll or block on an - update to \VAR{ivar}. On others, an atomic-compatible read operation may be - needed to preserve the atomicity of updates to \VAR{ivar}. On platforms - where atomic-compatible read operations negatively impact performance, - implementations may be able to limit the number of atomic-compatible reads - performed by using non-atomic reads of \VAR{ivar} to wait for a change to - occur, followed by an atomic-compatible read operation to fetch the updated + update to \VAR{ivar}. On others, an atomic read operation may be needed to + observe updates to \VAR{ivar}. On platforms where atomic read operations + incur high overhead, implementations may be able to reduce the number of + atomic reads performed by using non-atomic reads of \VAR{ivar} to wait for a + change to occur, followed by an atomic read operation to fetch the updated value. } From 4b13219087b23142e3178ce1c480d63b6a08d9aa Mon Sep 17 00:00:00 2001 From: James Dinan Date: Mon, 5 Aug 2019 13:56:08 -0400 Subject: [PATCH 254/319] Revert Add AMO text to wait/test multiple APIs Signed-off-by: James Dinan --- content/shmem_test_all.tex | 14 ++++++-------- content/shmem_test_any.tex | 22 ++++++++++------------ content/shmem_test_some.tex | 20 +++++++++----------- content/shmem_wait_until_all.tex | 14 ++++++-------- content/shmem_wait_until_any.tex | 8 +++----- content/shmem_wait_until_some.tex | 8 +++----- 6 files changed, 37 insertions(+), 49 deletions(-) diff --git a/content/shmem_test_all.tex b/content/shmem_test_all.tex index ee044f0f3..fe2c98891 100644 --- a/content/shmem_test_all.tex +++ b/content/shmem_test_all.tex @@ -34,14 +34,12 @@ \apidescription{ The \FUNC{shmem\_test\_all} routine indicates whether all entries in the test set specified by \VAR{ivars} and \VAR{status} have satisfied the test - condition at the calling \ac{PE}. The \VAR{ivar} object at the calling - \ac{PE} may be updated by an \ac{AMO} performed by a thread located within - the calling \ac{PE} or within another \ac{PE}. This routine does not block - and returns zero if not all entries in \VAR{ivars} satisfied the test - condition. This routine compares each of the \VAR{nelems} elements in the - \VAR{ivars} array with the value \VAR{cmp\_value} according to the - comparison operator \VAR{cmp} at the calling \ac{PE}. If \VAR{nelems} is - 0, the test set is empty and this routine returns 1. + condition at the calling \ac{PE}. This routine does not block and returns zero if + not all entries in \VAR{ivars} satisfied the test condition. This routine + compares each of the \VAR{nelems} elements in the \VAR{ivars} array with + the value \VAR{cmp\_value} according to the comparison operator \VAR{cmp} + at the calling \ac{PE}. + If \VAR{nelems} is 0, the test set is empty and this routine returns 1. The optional \VAR{status} is a mask array of length \VAR{nelems} where each element corresponds to the respective element in \VAR{ivars} and indicates whether diff --git a/content/shmem_test_any.tex b/content/shmem_test_any.tex index 217c30498..9b1c05ae4 100644 --- a/content/shmem_test_any.tex +++ b/content/shmem_test_any.tex @@ -33,18 +33,16 @@ \end{apiarguments} \apidescription{ - The \FUNC{shmem\_test\_any} routine indicates whether any entry in the test - set specified by \VAR{ivars} and \VAR{status} has satisfied the test - condition at the calling \ac{PE}. The \VAR{ivar} object at the calling - \ac{PE} may be updated by an \ac{AMO} performed by a thread located within - the calling \ac{PE} or within another \ac{PE}. This routine does not block - and returns \CONST{SIZE\_MAX} if no entries in \VAR{ivars} satisfied the - test condition. This routine compares each of the \VAR{nelems} elements in - the \VAR{ivars} array with the value \VAR{cmp\_value} according to the - comparison operator \VAR{cmp} at the calling \ac{PE}. The order in which - these elements are tested is unspecified. If an entry $i$ in \VAR{ivars} - within the test set satisfies the test condition, a series of calls to - \FUNC{shmem\_test\_any} must eventually return $i$. + The \FUNC{shmem\_test\_any} routine indicates whether any entry in the + test set specified by \VAR{ivars} and \VAR{status} has satisfied the test + condition at the calling \ac{PE}. This routine does not block and returns \CONST{SIZE\_MAX} if + no entries in \VAR{ivars} satisfied the test condition. This routine + compares each of the \VAR{nelems} elements in the \VAR{ivars} array with + the value \VAR{cmp\_value} according to the comparison operator \VAR{cmp} + at the calling \ac{PE}. The order in which these elements are tested is + unspecified. If an entry $i$ in \VAR{ivars} within the test set satisfies + the test condition, a series of calls to \FUNC{shmem\_test\_any} must + eventually return $i$. The optional \VAR{status} is a mask array of length \VAR{nelems} where each element corresponds to the respective element in \VAR{ivars} and indicates whether diff --git a/content/shmem_test_some.tex b/content/shmem_test_some.tex index 4f26d6c58..b1ab8b4d5 100644 --- a/content/shmem_test_some.tex +++ b/content/shmem_test_some.tex @@ -37,17 +37,15 @@ \apidescription{ The \FUNC{shmem\_test\_some} routine indicates whether at least one entry in the test set specified by \VAR{ivars} and \VAR{status} satisfies the - test condition at the calling \ac{PE}. The \VAR{ivar} object at the - calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located - within the calling \ac{PE} or within another \ac{PE}. This routine does - not block and returns zero if no entries in \VAR{ivars} satisfied the test - condition. This routine compares each element of the \VAR{ivars} array in - the test set with the value \VAR{cmp\_value} according to the comparison - operator \VAR{cmp} at the calling \ac{PE}. This routine tests all elements - of \VAR{ivars} in the test set at least once, and the order in which the - elements are tested is unspecified. If an entry $i$ in \VAR{ivars} within - the test set satisfies the test condition, a series of calls to - \FUNC{shmem\_test\_some} must eventually return $i$. + test condition at the calling \ac{PE}. This routine does not block and returns zero if + no entries in \VAR{ivars} satisfied the test condition. This routine + compares each element of the \VAR{ivars} array in the test set with the + value \VAR{cmp\_value} according to the comparison operator \VAR{cmp} at + the calling \ac{PE}. This routine tests all elements of \VAR{ivars} in the + test set at least once, and the order in which the elements are tested is + unspecified. If an entry $i$ in \VAR{ivars} within the test set satisfies + the test condition, a series of calls to \FUNC{shmem\_test\_some} must + eventually return $i$. Upon return, the \VAR{indices} array contains the indices of the elements in the test set that satisfied the test condition during the call to diff --git a/content/shmem_wait_until_all.tex b/content/shmem_wait_until_all.tex index ff1b529b5..1a4be4f52 100644 --- a/content/shmem_wait_until_all.tex +++ b/content/shmem_wait_until_all.tex @@ -33,14 +33,12 @@ \apidescription{ The \FUNC{shmem\_wait\_until\_all} routine waits until all entries in the - wait set specified by \VAR{ivars} and \VAR{status} have satisfied the wait - condition at the calling \ac{PE}. The \VAR{ivar} object at the calling - \ac{PE} may be updated by an \ac{AMO} performed by a thread located within - the calling \ac{PE} or within another \ac{PE}. If \VAR{nelems} is 0, the - wait set is empty and this routine returns immediately. This routine is - semantically similar to \FUNC{shmem\_wait\_until} in - Section~\ref{subsec:shmem_wait_until}, but adds support for point-to-point - synchronization involving an array of symmetric data objects. + wait set specified by \VAR{ivars} and \VAR{status} have satisfied the wait condition at the + calling \ac{PE}. If \VAR{nelems} is 0, the wait set is empty and this routine returns immediately. + This routine is semantically similar to + \FUNC{shmem\_wait\_until} in Section~\ref{subsec:shmem_wait_until}, but + adds support for point-to-point synchronization involving an array of + symmetric data objects. The optional \VAR{status} is a mask array of length \VAR{nelems} where each element corresponds to the respective element in \VAR{ivars} and indicates diff --git a/content/shmem_wait_until_any.tex b/content/shmem_wait_until_any.tex index 063978bbe..97eb7a1c4 100644 --- a/content/shmem_wait_until_any.tex +++ b/content/shmem_wait_until_any.tex @@ -35,11 +35,9 @@ \apidescription{ The \FUNC{shmem\_wait\_until\_any} routine waits until any one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies the wait - condition at the calling \ac{PE}. The \VAR{ivar} object at the calling - \ac{PE} may be updated by an \ac{AMO} performed by a thread located within - the calling \ac{PE} or within another \ac{PE}. The order in which these - elements are waited upon is unspecified. If an entry $i$ in \VAR{ivars} - within the wait set satisfies the wait condition, a series of calls to + condition at the calling \ac{PE}. The order in which these elements are + waited upon is unspecified. If an entry $i$ in \VAR{ivars} within the wait + set satisfies the wait condition, a series of calls to \FUNC{shmem\_wait\_until\_any} must eventually return $i$. The optional \VAR{status} is a mask array of length \VAR{nelems} where each diff --git a/content/shmem_wait_until_some.tex b/content/shmem_wait_until_some.tex index bfbf76f8c..f18e250be 100644 --- a/content/shmem_wait_until_some.tex +++ b/content/shmem_wait_until_some.tex @@ -37,11 +37,9 @@ \apidescription{ The \FUNC{shmem\_wait\_until\_some} routine waits until at least one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies the - wait condition at the calling \ac{PE}. The \VAR{ivar} object at the - calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located - within the calling \ac{PE} or within another \ac{PE}. This routine tests - all elements of \VAR{ivars} in the wait set at least once, and the order in - which the elements are waited upon is unspecified. + wait condition at the calling \ac{PE}. This routine tests all elements of + \VAR{ivars} in the wait set at least once, and the order in which the + elements are waited upon is unspecified. Upon return, the \VAR{indices} array contains the indices of at least one element in the wait set that satisfied the wait condition during the call From 5e957a744ad0291751d00c6a2c88314c360f6525 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Mon, 5 Aug 2019 14:07:51 -0400 Subject: [PATCH 255/319] Fix read fence text in wait/test multiple APIs Signed-off-by: James Dinan --- content/shmem_test_all.tex | 2 +- content/shmem_test_any.tex | 5 +++-- content/shmem_test_some.tex | 5 +++-- content/shmem_wait_until_all.tex | 2 +- content/shmem_wait_until_any.tex | 2 +- content/shmem_wait_until_some.tex | 2 +- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/content/shmem_test_all.tex b/content/shmem_test_all.tex index fe2c98891..62cc709ec 100644 --- a/content/shmem_test_all.tex +++ b/content/shmem_test_all.tex @@ -52,7 +52,7 @@ memory. Implementations must ensure that \FUNC{shmem\_test\_all} does not return 1 - before the update of the memory indicated by \VAR{ivar} is fully complete. + before the update of the memory indicated by \VAR{ivars} is fully complete. } \apireturnvalues{ diff --git a/content/shmem_test_any.tex b/content/shmem_test_any.tex index 9b1c05ae4..01b4a1d63 100644 --- a/content/shmem_test_any.tex +++ b/content/shmem_test_any.tex @@ -54,8 +54,9 @@ elements in \VAR{ivars} are included in the test set. The \VAR{ivars} and \VAR{status} arrays must not overlap in memory. - Implementations must ensure that \FUNC{shmem\_test\_any} does not return 1 - before the update of the memory indicated by \VAR{ivar} is fully complete. + Implementations must ensure that \FUNC{shmem\_test\_any} does not return an + index before the update of the memory indicated by the corresponding + \VAR{ivars} element is fully complete. } \apireturnvalues{ diff --git a/content/shmem_test_some.tex b/content/shmem_test_some.tex index b1ab8b4d5..a027fa17a 100644 --- a/content/shmem_test_some.tex +++ b/content/shmem_test_some.tex @@ -70,8 +70,9 @@ elements in \VAR{ivars} are included in the test set. The \VAR{ivars}, \VAR{indices}, and \VAR{status} arrays must not overlap in memory. - Implementations must ensure that \FUNC{shmem\_test\_some} does not return 1 - before the update of the memory indicated by \VAR{ivar} is fully complete. + Implementations must ensure that \FUNC{shmem\_test\_some} does not return + indices before the updates of the memory indicated by the corresponding + \VAR{ivars} elements are fully complete. } \apireturnvalues{ diff --git a/content/shmem_wait_until_all.tex b/content/shmem_wait_until_all.tex index 1a4be4f52..31838b058 100644 --- a/content/shmem_wait_until_all.tex +++ b/content/shmem_wait_until_all.tex @@ -51,7 +51,7 @@ and \VAR{status} arrays must not overlap in memory. Implementations must ensure that \FUNC{shmem\_wait\_until\_all} does not - return before the update of the memory indicated by \VAR{ivar} is fully + return before the update of the memory indicated by \VAR{ivars} is fully complete. } diff --git a/content/shmem_wait_until_any.tex b/content/shmem_wait_until_any.tex index 97eb7a1c4..a7a4ef529 100644 --- a/content/shmem_wait_until_any.tex +++ b/content/shmem_wait_until_any.tex @@ -52,7 +52,7 @@ arrays must not overlap in memory. Implementations must ensure that \FUNC{shmem\_wait\_until\_any} does not - return before the update of the memory indicated by \VAR{ivar} is fully + return before the update of the memory indicated by \VAR{ivars} is fully complete. } diff --git a/content/shmem_wait_until_some.tex b/content/shmem_wait_until_some.tex index f18e250be..c70f091fb 100644 --- a/content/shmem_wait_until_some.tex +++ b/content/shmem_wait_until_some.tex @@ -68,7 +68,7 @@ memory. Implementations must ensure that \FUNC{shmem\_wait\_until\_some} does not - return before the update of the memory indicated by \VAR{ivar} is fully + return before the update of the memory indicated by \VAR{ivars} is fully complete. } From c34548c5a68957fe90976c8836fd19d55fdfc51a Mon Sep 17 00:00:00 2001 From: James Dinan Date: Mon, 5 Aug 2019 14:22:18 -0400 Subject: [PATCH 256/319] Add AMO text to wait/test multiple APIs Signed-off-by: James Dinan --- content/shmem_test_all.tex | 5 ++++- content/shmem_test_any.tex | 5 ++++- content/shmem_test_some.tex | 5 ++++- content/shmem_wait_until_all.tex | 5 ++++- content/shmem_wait_until_any.tex | 5 ++++- content/shmem_wait_until_some.tex | 5 ++++- 6 files changed, 24 insertions(+), 6 deletions(-) diff --git a/content/shmem_test_all.tex b/content/shmem_test_all.tex index 62cc709ec..c54f9a20c 100644 --- a/content/shmem_test_all.tex +++ b/content/shmem_test_all.tex @@ -34,7 +34,10 @@ \apidescription{ The \FUNC{shmem\_test\_all} routine indicates whether all entries in the test set specified by \VAR{ivars} and \VAR{status} have satisfied the test - condition at the calling \ac{PE}. This routine does not block and returns zero if + condition at the calling \ac{PE}. The \VAR{ivars} objects at the calling + \ac{PE} may be updated by an \ac{AMO} performed by a thread located within + the calling \ac{PE} or within another \ac{PE}. + This routine does not block and returns zero if not all entries in \VAR{ivars} satisfied the test condition. This routine compares each of the \VAR{nelems} elements in the \VAR{ivars} array with the value \VAR{cmp\_value} according to the comparison operator \VAR{cmp} diff --git a/content/shmem_test_any.tex b/content/shmem_test_any.tex index 01b4a1d63..80182a549 100644 --- a/content/shmem_test_any.tex +++ b/content/shmem_test_any.tex @@ -35,7 +35,10 @@ \apidescription{ The \FUNC{shmem\_test\_any} routine indicates whether any entry in the test set specified by \VAR{ivars} and \VAR{status} has satisfied the test - condition at the calling \ac{PE}. This routine does not block and returns \CONST{SIZE\_MAX} if + condition at the calling \ac{PE}. The \VAR{ivars} objects at the calling + \ac{PE} may be updated by an \ac{AMO} performed by a thread located within + the calling \ac{PE} or within another \ac{PE}. + This routine does not block and returns \CONST{SIZE\_MAX} if no entries in \VAR{ivars} satisfied the test condition. This routine compares each of the \VAR{nelems} elements in the \VAR{ivars} array with the value \VAR{cmp\_value} according to the comparison operator \VAR{cmp} diff --git a/content/shmem_test_some.tex b/content/shmem_test_some.tex index a027fa17a..ed9492176 100644 --- a/content/shmem_test_some.tex +++ b/content/shmem_test_some.tex @@ -37,7 +37,10 @@ \apidescription{ The \FUNC{shmem\_test\_some} routine indicates whether at least one entry in the test set specified by \VAR{ivars} and \VAR{status} satisfies the - test condition at the calling \ac{PE}. This routine does not block and returns zero if + test condition at the calling \ac{PE}. The \VAR{ivars} objects at the + calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located + within the calling \ac{PE} or within another \ac{PE}. + This routine does not block and returns zero if no entries in \VAR{ivars} satisfied the test condition. This routine compares each element of the \VAR{ivars} array in the test set with the value \VAR{cmp\_value} according to the comparison operator \VAR{cmp} at diff --git a/content/shmem_wait_until_all.tex b/content/shmem_wait_until_all.tex index 31838b058..59295d775 100644 --- a/content/shmem_wait_until_all.tex +++ b/content/shmem_wait_until_all.tex @@ -34,7 +34,10 @@ \apidescription{ The \FUNC{shmem\_wait\_until\_all} routine waits until all entries in the wait set specified by \VAR{ivars} and \VAR{status} have satisfied the wait condition at the - calling \ac{PE}. If \VAR{nelems} is 0, the wait set is empty and this routine returns immediately. + calling \ac{PE}. The \VAR{ivars} objects at the calling \ac{PE} may be + updated by an \ac{AMO} performed by a thread located within the calling + \ac{PE} or within another \ac{PE}. + If \VAR{nelems} is 0, the wait set is empty and this routine returns immediately. This routine is semantically similar to \FUNC{shmem\_wait\_until} in Section~\ref{subsec:shmem_wait_until}, but adds support for point-to-point synchronization involving an array of diff --git a/content/shmem_wait_until_any.tex b/content/shmem_wait_until_any.tex index a7a4ef529..7c78731f0 100644 --- a/content/shmem_wait_until_any.tex +++ b/content/shmem_wait_until_any.tex @@ -35,7 +35,10 @@ \apidescription{ The \FUNC{shmem\_wait\_until\_any} routine waits until any one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies the wait - condition at the calling \ac{PE}. The order in which these elements are + condition at the calling \ac{PE}. The \VAR{ivars} objects at the calling + \ac{PE} may be updated by an \ac{AMO} performed by a thread located within + the calling \ac{PE} or within another \ac{PE}. + The order in which these elements are waited upon is unspecified. If an entry $i$ in \VAR{ivars} within the wait set satisfies the wait condition, a series of calls to \FUNC{shmem\_wait\_until\_any} must eventually return $i$. diff --git a/content/shmem_wait_until_some.tex b/content/shmem_wait_until_some.tex index c70f091fb..eeb45bcab 100644 --- a/content/shmem_wait_until_some.tex +++ b/content/shmem_wait_until_some.tex @@ -37,7 +37,10 @@ \apidescription{ The \FUNC{shmem\_wait\_until\_some} routine waits until at least one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies the - wait condition at the calling \ac{PE}. This routine tests all elements of + wait condition at the calling \ac{PE}. The \VAR{ivars} objects at the + calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located + within the calling \ac{PE} or within another \ac{PE}. + This routine tests all elements of \VAR{ivars} in the wait set at least once, and the order in which the elements are waited upon is unspecified. From ade3c480c7c43e18ec019ae7f984d3cdcc497503 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Mon, 5 Aug 2019 14:44:19 -0400 Subject: [PATCH 257/319] Update examples to use AMOs with wait/test Signed-off-by: James Dinan --- example_code/shmem_test_any_example.c | 2 +- example_code/shmem_test_example1.c | 2 +- example_code/shmem_test_some_example.c | 4 ++-- example_code/shmem_wait3_example.c | 1 - example_code/shmem_wait_until_all.c | 2 +- example_code/shmem_wait_until_any_all2all_sum.c | 2 +- example_code/shmem_wait_until_some_all2all_sum.c | 2 +- 7 files changed, 7 insertions(+), 8 deletions(-) diff --git a/example_code/shmem_test_any_example.c b/example_code/shmem_test_any_example.c index 144d2f0eb..64c784c3e 100644 --- a/example_code/shmem_test_any_example.c +++ b/example_code/shmem_test_any_example.c @@ -11,7 +11,7 @@ int main(void) int *status = calloc(npes, sizeof(int)); for (int i = 0; i < npes; i++) - shmem_p(&flags[mype], 1, i); + shmem_atomic_set(&flags[mype], 1, i); int ncompleted = 0; size_t completed_idx; diff --git a/example_code/shmem_test_example1.c b/example_code/shmem_test_example1.c index d3f760f44..8c0f5c9e9 100644 --- a/example_code/shmem_test_example1.c +++ b/example_code/shmem_test_example1.c @@ -22,7 +22,7 @@ int main(void) printf("PE %d observed first update from PE %d\n", mype, who); } else - shmem_p(&wait_vars[mype], mype, 0); + shmem_atomic_set(&wait_vars[mype], mype, 0); shmem_free(wait_vars); shmem_finalize(); diff --git a/example_code/shmem_test_some_example.c b/example_code/shmem_test_some_example.c index 27cfb9db3..9c73a6cdc 100644 --- a/example_code/shmem_test_some_example.c +++ b/example_code/shmem_test_some_example.c @@ -27,12 +27,12 @@ int main(void) shmem_fence(); for (int i = 0; i < npes; i++) - shmem_p(&flags[mype], 1, i); + shmem_atomic_set(&flags[mype], 1, i); int ncompleted = 0; while (ncompleted < npes) { - int ntested = shmem_test_some(flags, npes, indices, status, SHMEM_CMP_NE, 0); + int ntested = shmem_test_some(flags, npes, indices, status, SHMEM_CMP_NE, 0); if (ntested > 0) { for (int i = 0; i < ntested; i++) { for (int j = 0; j < N; j++) { diff --git a/example_code/shmem_wait3_example.c b/example_code/shmem_wait3_example.c index 2a6c3ee04..2b0e85dfd 100644 --- a/example_code/shmem_wait3_example.c +++ b/example_code/shmem_wait3_example.c @@ -1,4 +1,3 @@ -#include #include int ivar; diff --git a/example_code/shmem_wait_until_all.c b/example_code/shmem_wait_until_all.c index 8d530cf6d..ed2eacaf7 100644 --- a/example_code/shmem_wait_until_all.c +++ b/example_code/shmem_wait_until_all.c @@ -10,7 +10,7 @@ int main(void) int *status = NULL; for (int i = 0; i < npes; i++) - shmem_p(&flags[mype], 1, i); + shmem_atomic_set(&flags[mype], 1, i); shmem_wait_until_all(flags, npes, status, SHMEM_CMP_EQ, 1); diff --git a/example_code/shmem_wait_until_any_all2all_sum.c b/example_code/shmem_wait_until_any_all2all_sum.c index eb139071f..3317fec6a 100644 --- a/example_code/shmem_wait_until_any_all2all_sum.c +++ b/example_code/shmem_wait_until_any_all2all_sum.c @@ -26,7 +26,7 @@ int main(void) shmem_fence(); for (int i = 0; i < npes; i++) - shmem_p(&flags[mype], 1, i); + shmem_atomic_set(&flags[mype], 1, i); for (int i = 0; i < npes; i++) { size_t completed_idx = shmem_wait_until_any(flags, npes, status, SHMEM_CMP_NE, 0); diff --git a/example_code/shmem_wait_until_some_all2all_sum.c b/example_code/shmem_wait_until_some_all2all_sum.c index 83caa6c5c..1c1a2f951 100644 --- a/example_code/shmem_wait_until_some_all2all_sum.c +++ b/example_code/shmem_wait_until_some_all2all_sum.c @@ -27,7 +27,7 @@ int main(void) shmem_fence(); for (int i = 0; i < npes; i++) - shmem_p(&flags[mype], 1, i); + shmem_atomic_set(&flags[mype], 1, i); size_t ncompleted; while ((ncompleted = shmem_wait_until_some(flags, npes, indices, From c7f002a31bfdc1e90de8d42841404ecd912e935f Mon Sep 17 00:00:00 2001 From: "David M. Ozog" Date: Thu, 8 Aug 2019 15:43:38 -0400 Subject: [PATCH 258/319] Change team_split2D example comment to latex prose Signed-off-by: David M. Ozog --- content/shmem_team_split_2d.tex | 95 +++++++++++++++++++++++++++++- example_code/shmem_team_split_2D.c | 86 ++------------------------- utils/packages.tex | 1 + 3 files changed, 99 insertions(+), 83 deletions(-) diff --git a/content/shmem_team_split_2d.tex b/content/shmem_team_split_2d.tex index bfed1b43d..f1a1ef829 100644 --- a/content/shmem_team_split_2d.tex +++ b/content/shmem_team_split_2d.tex @@ -145,7 +145,100 @@ to generate a 3D Cartesian split. This method can be extrapolated to generate splits of any number of dimensions.} {./example_code/shmem_team_split_2D.c} - {} + { + The example above splits \LibHandleRef{SHMEM\_TEAM\_WORLD} into a 3D team + with dimensions 3x4xN. For example, if \VAR{npes} = 16, \VAR{xdim} = 3, + and \VAR{ydim} = 4, then the final dimensions are 3x4x2. In this case, the + first split of \LibHandleRef{SHMEM\_TEAM\_WORLD} results in 6 \VAR{xteams} + and 3 \VAR{yzteams}: + + \begin{center} + \begin{tabular}{|l|l|l|l|l|} + \hline + \multicolumn{2}{|c|}{} & \multicolumn{3}{c|}{\VAR{yzteam}} \\ \cline{3-5} + \multicolumn{2}{|c|}{} & \VAR{x} = 0 & \VAR{x} = 1 & \VAR{x} = 2 \\ \hline +\multirow{6}{*}{\VAR{xteam}} & \VAR{yz} = 0 & 0 & 1 & 2 \\ \cline{2-5} + & \VAR{yz} = 1 & 3 & 4 & 5 \\ \cline{2-5} + & \VAR{yz} = 2 & 6 & 7 & 8 \\ \cline{2-5} + & \VAR{yz} = 3 & 9 & 10 & 11 \\ \cline{2-5} + & \VAR{yz} = 4 & 12 & 13 & 14 \\ \cline{2-5} + & \VAR{yz} = 5 & 15 \\ + \cline{0-2} + \end{tabular} + \end{center} + + The second split of \VAR{yzteam} for \VAR{x} = 0, \VAR{ydim} = 4 results in 2 + \VAR{yteams} and 4 \VAR{zteams}: + + + \begin{center} + \begin{tabular}{|l|l|l|l|l|l|} + \hline + \multicolumn{2}{|c|}{} & \multicolumn{4}{c|}{\VAR{zteam}} \\ \cline{3-6} + \multicolumn{2}{|c|}{} & \VAR{y} = 0 & \VAR{y} = 1 & \VAR{y} = 2 & \VAR{y} = 3 \\ \hline +\multirow{2}{*}{\VAR{yteam}} & \VAR{z} = 0 & 0 & 3 & 6 & 9 \\ \cline{2-6} + & \VAR{z} = 1 & 12 & 15 \\ + \cline{0-3} + \end{tabular} + \end{center} + + The second split of \VAR{yzteam} for \VAR{x} = 1, \VAR{ydim} = 4 results in + 2 \VAR{yteams} and 4 \VAR{zteams}: + + \begin{center} + \begin{tabular}{|l|l|l|l|l|l|} + \hline + \multicolumn{2}{|c|}{} & \multicolumn{4}{c|}{\VAR{zteam}} \\ \cline{3-6} + \multicolumn{2}{|c|}{} & \VAR{y} = 0 & \VAR{y} = 1 & \VAR{y} = 2 & \VAR{y} = 3 \\ \hline +\multirow{2}{*}{\VAR{yteam}} & \VAR{z} = 0 & 1 & 4 & 7 & 10 \\ \cline{2-6} + & \VAR{z} = 1 & 13 \\ + \cline{0-2} + \end{tabular} + \end{center} + + The second split of \VAR{yzteam} for \VAR{x} = 2, \VAR{ydim} = 4 results in + 2 \VAR{yteams} and 4 \VAR{zteams}: + + \begin{center} + \begin{tabular}{|l|l|l|l|l|l|} + \hline + \multicolumn{2}{|c|}{} & \multicolumn{4}{c|}{\VAR{zteam}} \\ \cline{3-6} + \multicolumn{2}{|c|}{} & \VAR{y} = 0 & \VAR{y} = 1 & \VAR{y} = 2 & \VAR{y} = 3 \\ \hline +\multirow{2}{*}{\VAR{yteam}} & \VAR{z} = 0 & 2 & 5 & 8 & 11 \\ \cline{2-6} + & \VAR{z} = 1 & 14 \\ + \cline{0-2} + \end{tabular} + \end{center} + + The final number of teams for each dimension are: + \begin{itemize} + \item 6 \VAR{xteams}: these are teams where (\VAR{z},\VAR{y}) is fixed and \VAR{x} varies. + \item 6 \VAR{yteams}: these are teams where (\VAR{x},\VAR{z}) is fixed and \VAR{y} varies. + \item 12 \VAR{zteams}: these are teams where (\VAR{x},\VAR{y}) is fixed and \VAR{z} varies. + \end{itemize} + + The expected output is: \\ + \begin{small} + \texttt{ + (0, 0, 0) is me = 0 \\ + (1, 0, 0) is me = 1 \\ + (2, 0, 0) is me = 2 \\ + (0, 1, 0) is me = 3 \\ + (1, 1, 0) is me = 4 \\ + (2, 1, 0) is me = 5 \\ + (0, 2, 0) is me = 6 \\ + (1, 2, 0) is me = 7 \\ + (2, 2, 0) is me = 8 \\ + (0, 3, 0) is me = 9 \\ + (1, 3, 0) is me = 10 \\ + (2, 3, 0) is me = 11 \\ + (0, 0, 1) is me = 12 \\ + (1, 0, 1) is me = 13 \\ + (2, 0, 1) is me = 14 \\ + (0, 1, 1) is me = 15 + } + \end{small} +} \end{apiexamples} diff --git a/example_code/shmem_team_split_2D.c b/example_code/shmem_team_split_2D.c index 678fb70e9..9dbf63fd4 100644 --- a/example_code/shmem_team_split_2D.c +++ b/example_code/shmem_team_split_2D.c @@ -1,11 +1,11 @@ #include #include -int main(void) +int main(void) { int xdim = 3; int ydim = 4; - + shmem_init(); int pe = shmem_my_pe(); int npes = shmem_n_pes(); @@ -15,9 +15,9 @@ int main(void) exit(1); } - int zdim = (npes / (xdim*ydim)) + ( ((npes % (xdim*ydim)) > 0) ? 1 : 0 ); + int zdim = (npes / (xdim*ydim)) + ( ((npes % (xdim*ydim)) > 0) ? 1 : 0 ); shmem_team_t xteam, yzteam, yteam, zteam; - + shmem_team_split_2d(SHMEM_TEAM_WORLD, xdim, NULL, 0, &xteam, NULL, 0, &yzteam); // No synchronization is needed between these split operations // yzteam is immediately ready to be used in collectives @@ -41,81 +41,3 @@ int main(void) shmem_finalize(); } - - -/* -/* Example split of SHMEM_TEAM_WORLD, size 16 into 3D -/* xdim = 3, ydim = 4 -> final dimensions are 3x4x2 -/* -/* First split of SHMEM_TEAM_WORLD, xdim=3 -/* results in 6 xteams and 3 yzteam -/********************************************** -/* x=0 | x=1 | x=2 | -/* ------------------- -/* yz=0 | 0 | 1 | 2 | <-- xteam -/* yz=1 | 3 | 4 | 5 | <-- xteam -/* yz=2 | 6 | 7 | 8 | <-- xteam -/* yz=3 | 9 | 10 | 11 | <-- xteam -/* yz=4 | 12 | 13 | 14 | <-- xteam -/* yz=5 | 15 | | <-- xteam -/* ^ ^ ^ -/* { yzteams are columns } -/********************************************** -/* -/* Second split of yzteam for x=0, ydim=4 -/* results in 2 yteams and 4 zteams -/********************************************** -/* y=0 | y=1 | y=2 | y=3 | -/* ------------------------- -/* z=0 | 0 | 3 | 6 | 9 | <-- yteam -/* z=1 | 12 | 15 | | <-- yteam -/* ^ ^ ^ ^ -/* { zteams are columns } -/********************************************** -/* -/* Second split of yzteam for x=1, ydim=4 -/* results in 2 yteams and 4 zteams -/********************************************** -/* y=0 | y=1 | y=2 | y=3 | -/* ------------------------- -/* z=0 | 1 | 4 | 7 | 10 | <-- yteam -/* z=1 | 13 | | | <-- yteam -/* ^ ^ ^ ^ -/* { zteams are columns } -/********************************************** -/* -/* Second split of yzteam for x=2, ydim=4 -/* results in 2 yteams and 4 zteams -/********************************************** -/* y=0 | y=1 | y=2 | y=3 | -/* ------------------------- -/* z=0 | 2 | 5 | 8 | 11 | <-- yteam -/* z=1 | 14 | | | <-- yteam -/* ^ ^ ^ ^ -/* { zteams are columns } -/********************************************** -/* -/* Final number of teams for each dimension: -/* 6 xteams, these are teams where (z,y) is fixed and x varies -/* 6 yteams, these are teams where (x,z) is fixed and y varies -/* 12 zteams, these are teams where (x,y) is fixed and z varies -/* -/* Expected output: -/* (0, 0, 0) is me = 0 -/* (1, 0, 0) is me = 1 -/* (2, 0, 0) is me = 2 -/* (0, 1, 0) is me = 3 -/* (1, 1, 0) is me = 4 -/* (2, 1, 0) is me = 5 -/* (0, 2, 0) is me = 6 -/* (1, 2, 0) is me = 7 -/* (2, 2, 0) is me = 8 -/* (0, 3, 0) is me = 9 -/* (1, 3, 0) is me = 10 -/* (2, 3, 0) is me = 11 -/* (0, 0, 1) is me = 12 -/* (1, 0, 1) is me = 13 -/* (2, 0, 1) is me = 14 -/* (0, 1, 1) is me = 15 -*/ - diff --git a/utils/packages.tex b/utils/packages.tex index ae18885e0..d3f54c6c4 100644 --- a/utils/packages.tex +++ b/utils/packages.tex @@ -3,6 +3,7 @@ \usepackage[utf8]{inputenc} \usepackage{graphicx} \usepackage{multicol} +\usepackage{multirow} \usepackage[normalem]{ulem} \usepackage{float} \usepackage[usenames,dvipsnames]{color} From a0c57d1b6e8ccb18d599a91c20ca7d1b965764b3 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Sat, 24 Aug 2019 06:28:55 +0530 Subject: [PATCH 259/319] Fix argument info for shmem_signal_wait_until --- content/shmem_signal_wait_until.tex | 2 +- main_spec.tex | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_signal_wait_until.tex b/content/shmem_signal_wait_until.tex index 81502a0b7..dbd6225b0 100644 --- a/content/shmem_signal_wait_until.tex +++ b/content/shmem_signal_wait_until.tex @@ -12,7 +12,7 @@ \begin{apiarguments} -\apiargument{OUT}{sig\_addr}{A pointer to a remotely accessible variable.} +\apiargument{IN}{sig\_addr}{A pointer to a remotely accessible variable.} \apiargument{IN}{cmp}{The comparison operator that compares \VAR{sig\_addr} with \VAR{cmp\_value}.} \apiargument{IN}{cmp\_value}{The value against which the object pointed to diff --git a/main_spec.tex b/main_spec.tex index b3e201b23..3e883e829 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -23,7 +23,7 @@ \section{Execution Model}\label{subsec:execution_model} \input{content/execution_model} \section{Language Bindings and Conformance}\label{subsec:bindings} -\input{content/language_bindings_and_conformance}re +\input{content/language_bindings_and_conformance} \section{Library Constants}\label{subsec:library_constants} \input{content/library_constants} From 065581b9c6a81b9f3b04e136ec68a053dece2d15 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 30 Aug 2019 03:37:57 -0500 Subject: [PATCH 260/319] Fix the NBI AMO descriptions based on May Reading The following minor changes were made to the NBI AMO proposal based on the May 2019 F2F Reading: 1. Change the performs/fetches usage to perform/fetch accordingly 2. Change posting to initiating when explaining the NBI nature of APIs 3. Avoid using "as one atomic operation" to remove the ambiguity in when the value is updated on the remote side and value is fetched on the src side. --- content/shmem_atomic_compare_swap_nbi.tex | 8 ++++---- content/shmem_atomic_fetch_add_nbi.tex | 8 ++++---- content/shmem_atomic_fetch_and_nbi.tex | 11 +++++------ content/shmem_atomic_fetch_inc_nbi.tex | 8 ++++---- content/shmem_atomic_fetch_nbi.tex | 6 +++--- content/shmem_atomic_fetch_or_nbi.tex | 6 +++--- content/shmem_atomic_fetch_xor_nbi.tex | 11 +++++------ content/shmem_atomic_swap_nbi.tex | 8 ++++---- 8 files changed, 32 insertions(+), 34 deletions(-) diff --git a/content/shmem_atomic_compare_swap_nbi.tex b/content/shmem_atomic_compare_swap_nbi.tex index dbca8b7c8..078b5868a 100644 --- a/content/shmem_atomic_compare_swap_nbi.tex +++ b/content/shmem_atomic_compare_swap_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - The nonblocking atomic routine provides a method for performing an atomic + The nonblocking atomic routine provide a method for performing an atomic conditional swap on a remote data object. } @@ -39,9 +39,9 @@ \apidescription{ The nonblocking conditional swap routines conditionally update a \VAR{dest} - data object on the specified \ac{PE} and fetches the prior contents of the - \VAR{dest} data object into the \VAR{fetch} local data object as one atomic - operation. This routine returns after posting the operation. The operation + data object on the specified \ac{PE} and fetch the prior contents of the + \VAR{dest} data object into the \VAR{fetch} local data object. + This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, prior contents of the \VAR{dest} data object is atomically fetched into \VAR{fetch} local data object and the diff --git a/content/shmem_atomic_fetch_add_nbi.tex b/content/shmem_atomic_fetch_add_nbi.tex index ff59fedfc..a6cacaa71 100644 --- a/content/shmem_atomic_fetch_add_nbi.tex +++ b/content/shmem_atomic_fetch_add_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - The nonblocking atomic routine performs an atomic fetch-and-add operation on + The nonblocking atomic routine perform an atomic fetch-and-add operation on a remote data object. } @@ -36,14 +36,14 @@ \apidescription{ The nonblocking \FUNC{shmem\_atomic\_fetch\_add\_nbi} routines perform an - atomic fetch-and-add operation. An atomic fetch-and-add operation fetches + atomic fetch-and-add operation. An atomic fetch-and-add operation fetch the old \VAR{dest} and adds \VAR{value} to \VAR{dest} without the possibility of another atomic operation on the \VAR{dest} between the time - of the fetch and the update. This routine returns after posting the + of the fetch and the update. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, \VAR{value} is added to \VAR{dest} on \VAR{pe} and the prior contents of \VAR{dest} - are fetched into the \VAR{fetch} local data object as one atomic operation. + are fetched into the \VAR{fetch} local data object. } \apireturnvalues{ diff --git a/content/shmem_atomic_fetch_and_nbi.tex b/content/shmem_atomic_fetch_and_nbi.tex index 49bc81601..233ef1265 100644 --- a/content/shmem_atomic_fetch_and_nbi.tex +++ b/content/shmem_atomic_fetch_and_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - This nonblocking atomic operation performs an atomic fetching bitwise AND + This nonblocking atomic operation perform an atomic fetching bitwise AND operation on a remote data object. } @@ -35,15 +35,14 @@ \end{apiarguments} \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_fetch\_and\_nbi} routines performs an + The nonblocking \FUNC{shmem\_atomic\_fetch\_and\_nbi} routines perform an atomic fetching bitwise AND on the remotely accessible data object pointed by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine - returns after posting the operation. The operation is considered complete + returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, these routines performs a fetching bitwise AND on + \FUNC{shmem\_quiet}, these routines perform a fetching bitwise AND on \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the prior - contents of \VAR{dest} into the \VAR{fetch} local data object as one atomic - operation. + contents of \VAR{dest} into the \VAR{fetch} local data object. } \apireturnvalues{ diff --git a/content/shmem_atomic_fetch_inc_nbi.tex b/content/shmem_atomic_fetch_inc_nbi.tex index 869fe59f3..5429f3d1c 100644 --- a/content/shmem_atomic_fetch_inc_nbi.tex +++ b/content/shmem_atomic_fetch_inc_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - This nonblocking atomic routine performs an atomic fetch-and-increment + This nonblocking atomic routine perform an atomic fetch-and-increment operation on a remote data object. } @@ -35,12 +35,12 @@ \apidescription{ - These nonblocking \FUNC{shmem\_atomic\_fetch\_inc\_nbi} routines perform an - atomic fetch-and-increment operation. This routine returns after posting the + The nonblocking \FUNC{shmem\_atomic\_fetch\_inc\_nbi} routines perform an + atomic fetch-and-increment operation. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, the \dest{} on \ac{PE} \VAR{pe} is increased by one and the previous contents of \dest{} are - fetched into the \VAR{fetch} local data object as one atomic operation. + fetched into the \VAR{fetch} local data object. } \apireturnvalues{ diff --git a/content/shmem_atomic_fetch_nbi.tex b/content/shmem_atomic_fetch_nbi.tex index e8ffa160d..8ee4157e9 100644 --- a/content/shmem_atomic_fetch_nbi.tex +++ b/content/shmem_atomic_fetch_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - The nonblocking atomic fetch routine provides a method for atomically + The nonblocking atomic fetch routine provide a method for atomically fetching the value of a remote data object. } @@ -34,9 +34,9 @@ \end{apiarguments} \apidescription{ - The nonblocking atomic fetch routine performs a nonblocking fetching of a + The nonblocking atomic fetch routines perform a nonblocking fetch of a value atomically from a remote data object. This routine returns after - posting the operation. The operation is considered complete after a + initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, contents of the \source{} data object from \ac{PE} is atomically fetched into \VAR{fetch} local data object. diff --git a/content/shmem_atomic_fetch_or_nbi.tex b/content/shmem_atomic_fetch_or_nbi.tex index d66fdf8ca..352f3e00d 100644 --- a/content/shmem_atomic_fetch_or_nbi.tex +++ b/content/shmem_atomic_fetch_or_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - This nonblocking atomic operation performs an atomic fetching bitwise OR + This nonblocking atomic operation perform an atomic fetching bitwise OR operation on a remote data object. } @@ -35,12 +35,12 @@ \end{apiarguments} \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_fetch\_or\_nbi} routines performs an + The nonblocking \FUNC{shmem\_atomic\_fetch\_or\_nbi} routines perform an atomic fetching bitwise OR on the remotely accessible data object pointed by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine returns after posting the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, these routines performs a fetching bitwise OR on + \FUNC{shmem\_quiet}, these routines perform a fetching bitwise OR on \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the prior contents of \VAR{dest} into the \VAR{fetch} local data object as one atomic operation. diff --git a/content/shmem_atomic_fetch_xor_nbi.tex b/content/shmem_atomic_fetch_xor_nbi.tex index b0a2ba4fb..28e2f0bd7 100644 --- a/content/shmem_atomic_fetch_xor_nbi.tex +++ b/content/shmem_atomic_fetch_xor_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - This nonblocking atomic operation performs an atomic fetching bitwise XOR + This nonblocking atomic operation perform an atomic fetching bitwise XOR operation on a remote data object. } @@ -35,15 +35,14 @@ \end{apiarguments} \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_fetch\_xor\_nbi} routines performs an + The nonblocking \FUNC{shmem\_atomic\_fetch\_xor\_nbi} routines perform an atomic fetching bitwise XOR on the remotely accessible data object pointed by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine - returns after posting the operation. The operation is considered complete + returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, these routines performs a fetching bitwise XOR on + \FUNC{shmem\_quiet}, these routines perform a fetching bitwise XOR on \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the prior - contents of \VAR{dest} into the \VAR{fetch} local data object as one atomic - operation. + contents of \VAR{dest} into the \VAR{fetch} local data object. } \apireturnvalues{ diff --git a/content/shmem_atomic_swap_nbi.tex b/content/shmem_atomic_swap_nbi.tex index 620b1feb3..ac886e5db 100644 --- a/content/shmem_atomic_swap_nbi.tex +++ b/content/shmem_atomic_swap_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - This nonblocking atomic operation performs an atomic swap to a remote data + This nonblocking atomic operation perform an atomic swap to a remote data object. } @@ -35,11 +35,11 @@ \apidescription{ The nonblocking \FUNC{shmem\_atomic\_swap\_nbi} routines perform an atomic - swap operation. This routine returns after posting the operation. The + swap operation. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, it writes - \VAR{value} into \dest{} on \ac{PE} and fetches the prior contents of - \dest{} into \VAR{fetch} local data object as one atomic operation. + \VAR{value} into \dest{} on \ac{PE} and fetch the prior contents of + \dest{} into \VAR{fetch} local data object. } \apireturnvalues{ From 28cb0848996a67ae94e0efd557bb16ece82231d1 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 30 Aug 2019 04:02:17 -0500 Subject: [PATCH 261/319] Add NBI AMOs in the fence and quiet routines --- content/shmem_fence.tex | 24 ++++++++++++------------ content/shmem_quiet.tex | 18 +++++++++--------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/content/shmem_fence.tex b/content/shmem_fence.tex index f86b83c3a..f7ac001ab 100644 --- a/content/shmem_fence.tex +++ b/content/shmem_fence.tex @@ -1,5 +1,5 @@ \apisummary{ - Assures ordering of delivery of \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} routines + Assures ordering of delivery of \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} and \ac{AMO} routines to symmetric data objects. } @@ -21,12 +21,12 @@ \end{apiarguments} \apidescription{ - This routine assures ordering of delivery of \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} - routines to symmetric data objects. All \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} - routines to symmetric data objects issued to a particular remote \ac{PE} + This routine assures ordering of delivery of \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} and \ac{AMO} + routines to symmetric data objects. All \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} and \ac{AMO} + routines to symmetric data objects issued to a particular remote \ac{PE} on the given context prior to the call to \FUNC{shmem\_fence} are guaranteed to be delivered before any - subsequent \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} routines to symmetric data + subsequent \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} and \ac{AMO} routines to symmetric data objects to the same \ac{PE}. \FUNC{shmem\_fence} guarantees order of delivery, not completion. It does not guarantee order of delivery of nonblocking \GET{} routines. } @@ -37,19 +37,19 @@ \apinotes{ \FUNC{shmem\_fence} only provides per-\ac{PE} ordering guarantees and does not - guarantee completion of delivery. - \FUNC{shmem\_fence} also does not have an effect on the ordering between memory + guarantee completion of delivery. + \FUNC{shmem\_fence} also does not have an effect on the ordering between memory accesses issued by the target PE. \FUNC{shmem\_wait\_until}, \FUNC{shmem\_test}, - \FUNC{shmem\_barrier}, \FUNC{shmem\_barrier\_all} routines can be called by the target PE to guarantee + \FUNC{shmem\_barrier}, \FUNC{shmem\_barrier\_all} routines can be called by the target PE to guarantee ordering of its memory accesses. There is a subtle difference between \FUNC{shmem\_fence} and \FUNC{shmem\_quiet}, in that, \FUNC{shmem\_quiet} - guarantees completion of \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} routines to + guarantees completion of \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} and \ac{AMO} routines to symmetric data objects which makes the updates visible to all other - \acp{PE}. - + \acp{PE}. + The \FUNC{shmem\_quiet} routine should be called if completion of \PUT{}, - \ac{AMO}, memory store, and nonblocking \PUT{} routines to symmetric data objects is desired + \ac{AMO}, memory store, and nonblocking \PUT{} and \ac{AMO} routines to symmetric data objects is desired when multiple remote \acp{PE} are involved. In an \openshmem program with multithreaded \acp{PE}, it is the diff --git a/content/shmem_quiet.tex b/content/shmem_quiet.tex index 35791b47a..f6ff91fd6 100644 --- a/content/shmem_quiet.tex +++ b/content/shmem_quiet.tex @@ -1,6 +1,6 @@ \apisummary{ Waits for completion of all outstanding \PUT{}, \ac{AMO}, memory store, - and nonblocking \PUT{} and \GET{} routines to symmetric data + and nonblocking \PUT{}, \GET{}, and \ac{AMO} routines to symmetric data objects issued by a \ac{PE}. } @@ -21,13 +21,13 @@ \CONST{SHMEM\_CTX\_DEFAULT}.} \end{apiarguments} -\apidescription{ +\apidescription{ The \FUNC{shmem\_quiet} routine ensures completion of \PUT{}, \ac{AMO}, - memory store, and nonblocking \PUT{} and \GET{} routines on + memory store, and nonblocking \PUT{}, \GET{} and \ac{AMO} routines on symmetric data objects issued by the calling \ac{PE} on the given context. All \PUT{}, \ac{AMO}, - memory store, and nonblocking \PUT{} and \GET{} routines to + memory store, and nonblocking \PUT{}, \GET{} and \ac{AMO} routines to symmetric data objects are guaranteed to be completed and visible to all - \acp{PE} when \FUNC{shmem\_quiet} returns. + \acp{PE} when \FUNC{shmem\_quiet} returns. } @@ -35,17 +35,17 @@ None. } -\apinotes{ +\apinotes{ \FUNC{shmem\_quiet} is most useful as a way of ensuring completion of - several \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} - and \GET{} routines to symmetric data objects initiated by the calling + several \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{}, + \GET{} and \ac{AMO} routines to symmetric data objects initiated by the calling \ac{PE}. For example, one might use \FUNC{shmem\_quiet} to await delivery of a block of data before issuing another \PUT{} or nonblocking \PUT{} routine, which sets a completion flag on another \ac{PE}. \FUNC{shmem\_quiet} is not usually needed if \FUNC{shmem\_barrier\_all} or \FUNC{shmem\_barrier} are called. The barrier routines wait for the completion of outstanding writes (\PUT{}, \ac{AMO}, - memory stores, and nonblocking \PUT{} and \GET{} routines) to + memory stores, and nonblocking \PUT{}, \GET{} and \ac{AMO} routines) to symmetric data objects on all \acp{PE}. In an \openshmem program with multithreaded \acp{PE}, it is the From 47aef7ab2f005a851703631f0833ead6aa25fa08 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 30 Aug 2019 04:16:10 -0500 Subject: [PATCH 262/319] Fix minor and usage in the fence description Separate non-blocking put and put-with-signal in the fence description --- content/shmem_fence.tex | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/content/shmem_fence.tex b/content/shmem_fence.tex index 2ca0a1df1..3156aebd9 100644 --- a/content/shmem_fence.tex +++ b/content/shmem_fence.tex @@ -1,6 +1,6 @@ \apisummary{ Assures ordering of delivery of \PUT{}, \ac{AMO}, \emph{put-with-signal}, - memory store, nonblocking \PUT{}, and \emph{put-with-signal} + memory store, and nonblocking \PUT{} and \emph{put-with-signal} routines to symmetric data objects. } @@ -22,14 +22,14 @@ \end{apiarguments} \apidescription{ - This routine assures ordering of delivery of \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, nonblocking \PUT{}, + This routine assures ordering of delivery of \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, and nonblocking \PUT{} and \emph{put-with-signal} - routines to symmetric data objects. All \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, nonblocking \PUT{}, + routines to symmetric data objects. All \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, and nonblocking \PUT{} and \emph{put-with-signal} routines to symmetric data objects issued to a particular remote \ac{PE} on the given context prior to the call to \FUNC{shmem\_fence} are guaranteed to be delivered before any - subsequent \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, nonblocking \PUT{}, and \emph{put-with-signal} + subsequent \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, and nonblocking \PUT{} and \emph{put-with-signal} routines to symmetric data objects to the same \ac{PE}. \FUNC{shmem\_fence} guarantees order of delivery, not completion. It does not guarantee order of delivery of nonblocking \GET{} routines. @@ -50,12 +50,12 @@ ordering of its memory accesses. There is a subtle difference between \FUNC{shmem\_fence} and \FUNC{shmem\_quiet}, in that, \FUNC{shmem\_quiet} - guarantees completion of \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, nonblocking \PUT{}, and \emph{put-with-signal} routines to + guarantees completion of \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, and nonblocking \PUT{} and \emph{put-with-signal} routines to symmetric data objects which makes the updates visible to all other \acp{PE}. The \FUNC{shmem\_quiet} routine should be called if completion of \PUT{}, - \ac{AMO}, \emph{put-with-signal}, memory store, nonblocking \PUT{}, and \emph{put-with-signal} routines to symmetric data objects is desired + \ac{AMO}, \emph{put-with-signal}, memory store, and nonblocking \PUT{} and \emph{put-with-signal} routines to symmetric data objects is desired when multiple remote \acp{PE} are involved. In an \openshmem program with multithreaded \acp{PE}, it is the From 4d40101cac56d2163ce393e366fe6d2f846942eb Mon Sep 17 00:00:00 2001 From: "David M. Ozog" Date: Thu, 5 Sep 2019 14:37:18 -0400 Subject: [PATCH 263/319] Remove redundant fairness text in test_some_vector Signed-off-by: David M. Ozog --- content/shmem_test_some_vector.tex | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/content/shmem_test_some_vector.tex b/content/shmem_test_some_vector.tex index 4eb866899..2913f19a8 100644 --- a/content/shmem_test_some_vector.tex +++ b/content/shmem_test_some_vector.tex @@ -44,10 +44,7 @@ array in the test set with each respective value in \VAR{cmp\_values} according to the comparison operator \VAR{cmp} at the calling \ac{PE}}. This routine tests all elements of \VAR{ivars} in the test set at least - once, and the order in which the elements are tested is unspecified. If an - entry $i$ in \VAR{ivars} within the test set satisfies the test condition, - a series of calls to \FUNC{shmem\_test\_some\newtext{\_vector}} must - eventually return $i$. + once, and the order in which the elements are tested is unspecified. Upon return, the \VAR{indices} array contains the indices of the elements in the test set that satisfied the test condition during the call to From 4fae811b051dd962dff0357fb807568a1372daf3 Mon Sep 17 00:00:00 2001 From: "David M. Ozog" Date: Thu, 5 Sep 2019 15:08:59 -0400 Subject: [PATCH 264/319] Use shmem_set w/ wait_any_vector, fix verification Signed-off-by: David M. Ozog --- example_code/shmem_wait_until_any_vector.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example_code/shmem_wait_until_any_vector.c b/example_code/shmem_wait_until_any_vector.c index 0977ed66c..266585e51 100644 --- a/example_code/shmem_wait_until_any_vector.c +++ b/example_code/shmem_wait_until_any_vector.c @@ -17,7 +17,7 @@ int main(void) /* All odd PEs put 2 and all even PEs put 1 */ for (int i = 0; i < npes; i++) { - shmem_p(&ivars[mype], mype % 2 + 1, i); + shmem_atomic_set(&ivars[mype], mype % 2 + 1, i); /* Set cmp_values to the expected values coming from each PE */ cmp_values[i] = i % 2 + 1; @@ -31,7 +31,7 @@ int main(void) } /* check the result */ - int correct_result = npes + npes / 2 + npes % 2; + int correct_result = npes + npes / 2; if (total_sum != correct_result) { shmem_global_exit(1); From df3eb6ced55153d24799dbcb8feb30782165ddab Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 9 Sep 2019 14:08:20 -0500 Subject: [PATCH 265/319] Fix team context example variable naming --- example_code/shmem_team_context.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/example_code/shmem_team_context.c b/example_code/shmem_team_context.c index ab7dd12e6..57d8c9621 100644 --- a/example_code/shmem_team_context.c +++ b/example_code/shmem_team_context.c @@ -65,8 +65,6 @@ int main() // Create team with PEs numbered 0, 2, 4, ... shmem_team_spit_strided(SHMEM_TEAM_WORLD, 0, 2, npes / 2, &conf, cmask, &team_2s); - // Sync between splits from same parent team into teams with overlapping membership - shmem_team_sync(SHMEM_TEAM_WORLD); // Create team with PEs numbered 0, 3, 6, ... shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 3, npes / 3, &conf, cmask, &team_3s); @@ -84,9 +82,9 @@ int main() // We will add up some results on pe 4 of team_3s using ctx_2s if ((team_3s != SHMEM_TEAM_INVALID) && (team_2s != SHMEM_TEAM_INVALID)) { - int _pe4_of_3s_in_2s = my_ctx_translate_pe(ctx_3s, 4, ctx_2s); + int pe4_of_3s_in_2s = my_ctx_translate_pe(ctx_3s, 4, ctx_2s); - if (_pe4_of_3s_in_2s < 0) { + if (pe4_of_3s_in_2s < 0) { fprintf (stderr, "Fail to translate pe 4 from 3s context to 2s context\n"); } else { From 649c41362ea29f3f8388c14a6dc54adaa2e6e285 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 9 Sep 2019 14:18:27 -0500 Subject: [PATCH 266/319] Remove erroneous statement about team split synchronization --- content/teams_intro.tex | 4 ---- 1 file changed, 4 deletions(-) diff --git a/content/teams_intro.tex b/content/teams_intro.tex index 4bb06d5c2..7531eae69 100644 --- a/content/teams_intro.tex +++ b/content/teams_intro.tex @@ -63,10 +63,6 @@ \subsubsection*{Collective Ordering} of the \openshmem program to ensure the same ordering of collective routine calls across all \acp{PE} in a team. -There is no need for explicit synchronization between subsequent calls -to collective routines across the team, except in the special case discussed -below for team creation of overlapping child teams from a common parent team. - A full discussion of collective semantics follows in Section~\ref{subsec:coll}. \subsubsection*{Team Creation} From a8e439f1e70d8943bb7a1bafd1aae8788ff2f1f9 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 9 Sep 2019 14:33:42 -0500 Subject: [PATCH 267/319] Clarify team numbering in newly created teams in teams intro --- content/teams_intro.tex | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/content/teams_intro.tex b/content/teams_intro.tex index 7531eae69..ddca69dcc 100644 --- a/content/teams_intro.tex +++ b/content/teams_intro.tex @@ -80,6 +80,13 @@ \subsubsection*{Team Creation} This configuration argument is of type \CTYPE{shmem\_team\_config\_t}, which is detailed further in Section~\ref{subsec:shmem_team_config_t}. +\acp{PE} in a newly created teams are consecutively numbered with starting with +\ac{PE} number 0. \acp{PE} are always ordered by the existing global \ac{PE} number that +would be returned by the \FUNC{shmem\_my\_pe} routine. Team relative \ac{PE} +numbers can be used for point-to-point operations through team-based +contexts (see Section~\ref{sec:ctx}) or using the translation routine +\FUNC{shmem\_team\_translate\_pe}. + As with any collective routine on a team, the program must ensure that there are no simultaneous split operations occurring on the same parent team on a given \ac{PE}, i.e. in separate threads. From 65161bbb27580a39442def5acfe9a5b0d0c53c9d Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 9 Sep 2019 15:10:46 -0500 Subject: [PATCH 268/319] Add detail that team context creation is local event --- content/shmem_team_create_ctx.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_team_create_ctx.tex b/content/shmem_team_create_ctx.tex index c5ed2a5e1..dce63be92 100644 --- a/content/shmem_team_create_ctx.tex +++ b/content/shmem_team_create_ctx.tex @@ -1,5 +1,5 @@ \apisummary{ - Create a communication context from a team. + Create a communication context from a team locally. } \begin{apidefinition} From 41ed8575da67610c413fe22232887587c296cdc4 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Mon, 9 Sep 2019 15:12:21 -0500 Subject: [PATCH 269/319] Update list of APIs in fence and quiet --- content/shmem_fence.tex | 22 ++++++++++++++-------- content/shmem_quiet.tex | 25 +++++++++++++++---------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/content/shmem_fence.tex b/content/shmem_fence.tex index 3156aebd9..839d8e0e7 100644 --- a/content/shmem_fence.tex +++ b/content/shmem_fence.tex @@ -1,6 +1,7 @@ \apisummary{ - Assures ordering of delivery of \PUT{}, \ac{AMO}, \emph{put-with-signal}, - memory store, and nonblocking \PUT{} and \emph{put-with-signal} + Assures ordering of delivery of memory store, blocking \PUT{}, + \ac{AMO}, and \emph{put-with-signal}, as well as nonblocking + \PUT{}, and \emph{put-with-signal} routines to symmetric data objects. } @@ -22,14 +23,17 @@ \end{apiarguments} \apidescription{ - This routine assures ordering of delivery of \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, and nonblocking \PUT{} + This routine assures ordering of delivery of memory store, blocking \PUT{}, + \ac{AMO}, and \emph{put-with-signal}, as well as nonblocking \PUT{}, and \emph{put-with-signal} - routines to symmetric data objects. All \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, and nonblocking \PUT{} + routines to symmetric data objects. All memory store, blocking \PUT{}, + \ac{AMO}, amd \emph{put-with-signal}, as well as nonblocking \PUT{}, and \emph{put-with-signal} routines to symmetric data objects issued to a particular remote \ac{PE} on the given context prior to the call to \FUNC{shmem\_fence} are guaranteed to be delivered before any - subsequent \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, and nonblocking \PUT{} and \emph{put-with-signal} + subsequent memory store, blocking \PUT{}, \ac{AMO}, and \emph{put-with-signal}, + as well as nonblocking \PUT{}, and \emph{put-with-signal} routines to symmetric data objects to the same \ac{PE}. \FUNC{shmem\_fence} guarantees order of delivery, not completion. It does not guarantee order of delivery of nonblocking \GET{} routines. @@ -50,12 +54,14 @@ ordering of its memory accesses. There is a subtle difference between \FUNC{shmem\_fence} and \FUNC{shmem\_quiet}, in that, \FUNC{shmem\_quiet} - guarantees completion of \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, and nonblocking \PUT{} and \emph{put-with-signal} routines to + guarantees completion of memory store, blocking \PUT{}, \ac{AMO}, and + \emph{put-with-signal}, as well as nonblocking \PUT{}, and \emph{put-with-signal} routines to symmetric data objects which makes the updates visible to all other \acp{PE}. - The \FUNC{shmem\_quiet} routine should be called if completion of \PUT{}, - \ac{AMO}, \emph{put-with-signal}, memory store, and nonblocking \PUT{} and \emph{put-with-signal} routines to symmetric data objects is desired + The \FUNC{shmem\_quiet} routine should be called if completion of memory + store, blocking \PUT{}, \ac{AMO}, and \emph{put-with-signal}, as well as + nonblocking \PUT{}, and \emph{put-with-signal} routines to symmetric data objects is desired when multiple remote \acp{PE} are involved. In an \openshmem program with multithreaded \acp{PE}, it is the diff --git a/content/shmem_quiet.tex b/content/shmem_quiet.tex index ab3377a05..e3d9ae41e 100644 --- a/content/shmem_quiet.tex +++ b/content/shmem_quiet.tex @@ -1,7 +1,8 @@ \apisummary{ - Waits for completion of all outstanding \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, - and nonblocking \PUT{}, \emph{put-with-signal} and \GET{} routines to symmetric data - objects issued by a \ac{PE}. + Waits for completion of all outstanding memory store, blocking + \PUT{}, \ac{AMO}, and \emph{put-with-signal}, as well as + nonblocking \PUT{}, \emph{put-with-signal}, and \GET{} routines + to symmetric data objects issued by a \ac{PE}. } \begin{apidefinition} @@ -22,10 +23,12 @@ \end{apiarguments} \apidescription{ - The \FUNC{shmem\_quiet} routine ensures completion of \PUT{}, \ac{AMO}, - \emph{put-with-signal}, memory store, and nonblocking \PUT{},\emph{put-with-signal}, and \GET{} routines on - symmetric data objects issued by the calling \ac{PE} on the given context. All \PUT{}, \ac{AMO}, - \emph{put-with-signal}, memory store, and nonblocking \PUT{}, \emph{put-with-signal}, and \GET{} routines to + The \FUNC{shmem\_quiet} routine ensures completion of memory store, blocking + \PUT{}, \ac{AMO}, and + \emph{put-with-signal}, as well as nonblocking \PUT{},\emph{put-with-signal}, and \GET{} routines on + symmetric data objects issued by the calling \ac{PE} on the given context. + All memory store, blocking \PUT{}, \ac{AMO}, and \emph{put-with-signal}, as + well as nonblocking \PUT{}, \emph{put-with-signal}, and \GET{} routines to symmetric data objects are guaranteed to be completed and visible to all \acp{PE} when \FUNC{shmem\_quiet} returns. If \VAR{ctx} has the value \CONST{SHMEM\_CTX\_INVALID}, no operation is @@ -39,15 +42,17 @@ \apinotes{ \FUNC{shmem\_quiet} is most useful as a way of ensuring completion of - several \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory store, and nonblocking \PUT{}, + several memory store, blocking \PUT{}, \ac{AMO}, and \emph{put-with-signal}, + as well as nonblocking \PUT{}, \emph{put-with-signal}, and \GET{} routines to symmetric data objects initiated by the calling \ac{PE}. For example, one might use \FUNC{shmem\_quiet} to await delivery of a block of data before issuing another \PUT{} or nonblocking \PUT{} routine, which sets a completion flag on another \ac{PE}. \FUNC{shmem\_quiet} is not usually needed if \FUNC{shmem\_barrier\_all} or \FUNC{shmem\_barrier} are called. The barrier - routines wait for the completion of outstanding writes (\PUT{}, \ac{AMO}, - \emph{put-with-signal}, memory stores, and nonblocking \PUT{},\emph{put-with-signal}, and \GET{} routines) to + routines wait for the completion of outstanding writes (memory store, + blocking \PUT{}, \ac{AMO}, and \emph{put-with-signal}, as well as nonblocking \PUT{}, + \emph{put-with-signal}, and \GET{} routines) to symmetric data objects on all \acp{PE}. In an \openshmem program with multithreaded \acp{PE}, it is the From c19ae5b25aa16ccc76a9c756e62cb44cb88f0ac5 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 9 Sep 2019 15:28:07 -0500 Subject: [PATCH 270/319] Add clarification of effect of threading on shmem_sync --- content/shmem_sync.tex | 5 +++-- content/shmem_sync_all.tex | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 7f8c25fd9..e6af59ae6 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -1,7 +1,8 @@ \apisummary{ - \newtext{Registers the arrival of a \ac{PE} at a synchronization point and suspends \ac{PE} + \newtext{Registers the arrival of a \ac{PE} at a synchronization point and suspends execution until all other \acp{PE} in a given \openshmem team or active set - arrive at a synchronization point.} + arrive at a synchronization point. For multithreaded programs, execution is suspended + as specified by the threading model (Section \ref{subsec:thread_support}).} \oldtext{% Performs all operations described in the \FUNC{shmem\_sync\_all} interface but with respect to a subset of \acp{PE} defined by the active set. diff --git a/content/shmem_sync_all.tex b/content/shmem_sync_all.tex index 57a63e1d9..96937a9f7 100644 --- a/content/shmem_sync_all.tex +++ b/content/shmem_sync_all.tex @@ -1,6 +1,7 @@ \apisummary{ - Registers the arrival of a \ac{PE} at a \newtext{synchronization point} \oldtext{barrier} and suspends \ac{PE} - execution until all other \acp{PE} \newtext{in the default team} arrive at \newtext{a synchronization point} \oldtext{the barrier}. + Registers the arrival of a \ac{PE} at a \newtext{synchronization point} \oldtext{barrier} and suspends + execution until all other \acp{PE} \newtext{in the default team} arrive at \newtext{a synchronization point} \oldtext{the barrier}. \newtext{For multithreaded programs, execution is suspended + as specified by the threading model (Section \ref{subsec:thread_support})}. } \begin{apidefinition} From e6c9d9a86f7ab82438bb068374d23c0ea5b2b229 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 9 Sep 2019 15:32:03 -0500 Subject: [PATCH 271/319] Remove fortran interface from shmem_collect table --- content/shmem_collect.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 07636d243..ed6cb4beb 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -127,10 +127,10 @@ constraints, which are as follows: }{Routine}{Data type of \VAR{dest} and \VAR{source}} \apitablerow{\FUNC{shmem\_collectmem}, \FUNC{shmem\_fcollectmem}}{\Cstd: Any data type. \VAR{nelems} is scaled in bytes.}% -\apitablerow{\FUNC{shmem\_collect8}, \FUNC{shmem\_collect64}, \FUNC{shmem\_fcollect8}, \FUNC{shmem\_fcollect64}}% +\apitablerow{\FUNC{shmem\_collect64}, \FUNC{shmem\_fcollect64}}% {Any noncharacter type that has an element size of \CONST{64} bits. No \Fortran derived types nor \CorCpp{} structures are allowed.} -\apitablerow{\FUNC{shmem\_collect4}, \FUNC{shmem\_collect32}, \FUNC{shmem\_fcollect4}, \FUNC{shmem\_fcollect32}}% +\apitablerow{\FUNC{shmem\_collect32}, \FUNC{shmem\_fcollect32}}% {Any noncharacter type that has an element size of \CONST{32} bits. No \Fortran derived types nor \CorCpp{} structures are allowed.} } From 62c1d1c98e6d9a5420ff9f54ec3d160fd38d3a85 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 9 Sep 2019 15:36:50 -0500 Subject: [PATCH 272/319] Clarify behavior for invalid PE number in point-to-point --- main_spec.tex | 2 ++ 1 file changed, 2 insertions(+) diff --git a/main_spec.tex b/main_spec.tex index 935c3737a..edba9dbe8 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -188,6 +188,8 @@ \subsection{Communication Management Routines} explicitly or implicitly, are performed. All point-to-point routines that operate on this context will do so with respect to the team-relative \ac{PE} numbering of the associated team. +If the PE number passed to such a routine is invalid, being negative or greater +than or equal to the size of the \openshmem team, then the behavior is undefined. } \subsubsection{\textbf{SHMEM\_CTX\_CREATE}} From 838252d560b8dfbfe5490d786e415f2e262e5b0f Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 9 Sep 2019 15:49:32 -0500 Subject: [PATCH 273/319] Remove C++ interface from C11 reduction example --- example_code/shmem_reduce_example.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/example_code/shmem_reduce_example.c b/example_code/shmem_reduce_example.c index 68977a0e4..24f8cbb2b 100644 --- a/example_code/shmem_reduce_example.c +++ b/example_code/shmem_reduce_example.c @@ -40,15 +40,8 @@ int main(void) /* Wait for all PEs to initialize reductions arrays */ shmem_sync(SHMEM_TEAM_WORLD); -#if __STDC_VERSION__ >= 201112 - /* C11 generic interface */ shmem_and_reduce(SHMEM_TEAM_WORLD, valid_all, valid_me, num); shmem_sum_reduce(SHMEM_TEAM_WORLD, sums, values, num); -#else - /* C/C++ interface without generic support */ - shmem_uchar_and_reduce(SHMEM_TEAM_WORLD, valid_all, valid_me, num); - shmem_long_sum_reduce(SHMEM_TEAM_WORLD, sums, values, num); -#endif for (int i=0; i < num; i++) { if (valid_all[i]) { From d37bed7600d4548a28b779bfb3cce0ef2038ccfb Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 9 Sep 2019 15:54:12 -0500 Subject: [PATCH 274/319] Remove erroneous statements about inter-split synchronization --- example_code/shmem_sync_example.c | 4 ---- example_code/shmem_team_split_2D.c | 1 - 2 files changed, 5 deletions(-) diff --git a/example_code/shmem_sync_example.c b/example_code/shmem_sync_example.c index bff252880..2e367a428 100644 --- a/example_code/shmem_sync_example.c +++ b/example_code/shmem_sync_example.c @@ -18,10 +18,6 @@ int main(void) shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, npes / 2, config, 0, &twos_team); - /* The "threes" team below overlaps with the "twos" team, so - * synchronize on the parent team */ - shmem_sync(SHMEM_TEAM_WORLD); - shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 3, npes / 3 + odd_npes, config, 0, &threes_team); diff --git a/example_code/shmem_team_split_2D.c b/example_code/shmem_team_split_2D.c index 9dbf63fd4..2059f3279 100644 --- a/example_code/shmem_team_split_2D.c +++ b/example_code/shmem_team_split_2D.c @@ -19,7 +19,6 @@ int main(void) shmem_team_t xteam, yzteam, yteam, zteam; shmem_team_split_2d(SHMEM_TEAM_WORLD, xdim, NULL, 0, &xteam, NULL, 0, &yzteam); - // No synchronization is needed between these split operations // yzteam is immediately ready to be used in collectives shmem_team_split_2d(yzteam, ydim, NULL, 0, &yteam, NULL, 0, &zteam); From 2d794944c2631cf8eb510194ab5c83c5af0dd864 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 9 Sep 2019 16:23:34 -0500 Subject: [PATCH 275/319] Add individual deprecated functions to Annex E --- content/backmatter.tex | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 991494ec5..20aafdc5e 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -329,13 +329,30 @@ \section{Overview}\label{subsec:dep_overview} \\ \CorCpp: \FuncRef{shmem\_\FuncParam{TYPENAME}\_add}} & 1.4 & Current & \hyperref[subsec:shmem_atomic_add]{\FUNC{shmem\_atomic\_add}} \\ \hline Entire \Fortran API & 1.4 & Current & (none) \\ \hline - - \color{Green} - All active-set-based collective routines & 1.5 & Current & Teams-based collective routines \\ \hline \color{Green} - \CorCpp: \FuncRef{shmem\_barrier} & 1.5 & Current & - \FUNC{shmem\_quiet}; \FUNC{shmem\_team\_sync} \\ \hline - + \CorCpp: \FuncRef{shmem\_barrier} & 1.5 & Current & + \hyperref[subsec:shmem_quiet]{\FUNC{shmem\_quiet}}; \hyperref[subsec:shmem_sync]{\FUNC{shmem\_sync}} \\ \hline + \color{Green} + \CorCpp: Active set based \FuncRef{shmem\_sync} & 1.5 & Current & + Team based \hyperref[subsec:shmem_sync]{\FUNC{shmem\_sync}} \\ \hline + \color{Green} + \CorCpp: \FuncRef{shmem\_broadcast[32,64]} & 1.5 & Current & + \hyperref[subsec:shmem_broadcast]{\FUNC{shmem\_broadcast}} \\ \hline + \color{Green} + \CorCpp: \FuncRef{shmem\_collect[32,64]} & 1.5 & Current & + \hyperref[subsec:shmem_collect]{\FUNC{shmem\_collect}} \\ \hline + \color{Green} + \CorCpp: \FuncRef{shmem\_fcollect[32,64]} & 1.5 & Current & + \hyperref[subsec:shmem_collect]{\FUNC{shmem\_fcollect}} \\ \hline + \color{Green} + \CorCpp: \FuncRef{shmem\_\TYPENAME\_OP\_to\_all} & 1.5 & Current & + \hyperref[subsec:shmem_collect]{\FUNC{shmem\_\TYPENAME\_OP\_reduce}} \\ \hline + \color{Green} + \CorCpp: \FuncRef{shmem\_alltoall[32,64]} & 1.5 & Current & + \hyperref[subsec:shmem_alltoall]{\FUNC{shmem\_alltoall}} \\ \hline + \color{Green} + \CorCpp: \FuncRef{shmem\_alltoalls[32,64]} & 1.5 & Current & + \hyperref[subsec:shmem_alltoalls]{\FUNC{shmem\_alltoalls}} \\ \hline \end{longtable} \end{center} From fee757e26af0927c9babf09367bc99de78248af8 Mon Sep 17 00:00:00 2001 From: Megan Grodowitz Date: Mon, 9 Sep 2019 16:47:14 -0500 Subject: [PATCH 276/319] Remove error handling section, move text to collective intro --- content/collective_intro.tex | 5 +++++ content/error_handling.tex | 13 ------------- content/shmem_alltoall.tex | 8 -------- content/shmem_broadcast.tex | 8 -------- content/shmem_collect.tex | 8 -------- content/shmem_reductions.tex | 8 -------- content/shmem_sync.tex | 9 --------- main_spec.tex | 5 ----- 8 files changed, 5 insertions(+), 59 deletions(-) delete mode 100644 content/error_handling.tex diff --git a/content/collective_intro.tex b/content/collective_intro.tex index dda5afb2b..943c7d56e 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -129,4 +129,9 @@ \subsubsection*{Team-implicit collectives} updates issued via the default context. This routine is equivalent to a call to \FUNC{shmem\_ctx\_quiet} on the default context followed by a call to \FUNC{shmem\_team\_sync} on the default team. + +\subsubsection*{Error codes returned from collectives} +\CorCpp routines that return an integer error code follow the convention that \CONST{0} indicates successful local completion of the operation. This is considered a best effort of the implementation to indicate that all required local operations have been performed correctly inside the routine and the internal \openshmem state on the calling \ac{PE} is consistent with the description of the routine and its arguments upon completion. Implementations may use an integer return value from a routine to define integer error codes specific to the implementation as long as those codes are not already explicitly defined for that routine by the \openshmem specification. + +Collective operations involving many \acp{PE} may return values indicating success while other \acp{PE} are still executing the collective operation. Return values indicating success of a collective routine on one \ac{PE} do not indicate that all \acp{PE} involved in the collective operation will return from the routine successfully. In the case where successful local completion of a collective implies the success of some global operation, such as team creation resulting in a valid team on all involved \acp{PE}, the implementation should not return \CONST{0} from the routine if the implied or stated global guarantees of the routine are not met. } diff --git a/content/error_handling.tex b/content/error_handling.tex deleted file mode 100644 index e6c30a922..000000000 --- a/content/error_handling.tex +++ /dev/null @@ -1,13 +0,0 @@ -In many cases, \openshmem routines will guarantee the correct completion of operations without any need for programs to check for error states, diagnose system problems, or retry operations. For example, there are no error codes returned for remote memory operations. The implementation is expected to internally attempt any feasible checking and recovery to best guarantee completion as specified. However, there are also cases where routines return error codes to allow programs to detect problems that may be correctable at the application layer, e.g. requests for system resources that cannot be fulfilled at runtime. - -\CorCpp routines that return an integer error code follow the convention that \CONST{0} indicates successful local completion of the operation. This is considered a best effort of the implementation to indicate that all required local operations have been performed correctly inside the routine and the internal \openshmem state on the calling \ac{PE} is consistent with the description of the routine and its arguments upon completion. Implementations may use an integer return value from a routine to define integer error codes specific to the implementation as long as those codes are not already explicitly defined for that routine. - -Because \openshmem defines asynchronous communication operations, errors may arise at any time as communications proceed. In these cases, the implementation might generate error messages or abort the application when errors occur. The \openshmem specification cannot define these types of errors, and leaves it to the implementation to determine how these types of errors should be handled. - -Collective operations involving many \acp{PE} may return values indicating success while other \acp{PE} are still executing the collective operation. By default, return values indicating success of a collective routine on one \ac{PE} do not indicate that all \acp{PE} involved in the collective operation will return from the routine successfully. If a collective routine provides any such global error checking behavior, it will be explicitly stated in the description of that routine. - -If some routine specified in this document does not explicity state resulting error behavior when a program violates the routine assumptions and requirements, then the behavior is undefined. See Annex \ref{sec:undefined} for more details on undefined behavior in \openshmem. - -\openshmem implementations are encouraged but not required to attempt to continue execution in the face of resource allocation errors, such as lack of network resources or memory resources. In these cases, if resource allocation fails inside a routine with an integer return code, library implementations should return some nonzero value, which may have implementation specific definition. If the routine has some other out parameter, such as pointer to a new memory allocation, routines may specify that the out parameter has some sentinel value to indicate failure to complete the operation. - - diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index ebf9d7527..5e21dabc6 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -154,14 +154,6 @@ } \apinotes{ -\newtext{% - There are no specifically defined error codes for these routines. - See Section~\ref{subsec:error_handling} for expected error checking and - return code behavior specific to implementations. For portable - error checking and debugging behavior, programs should do their own checks - for invalid team handles or \LibConstRef{SHMEM\_TEAM\_INVALID}. -} - This routine restores \VAR{pSync} to its original contents. Multiple calls to \openshmem\ routines that use the same \VAR{pSync} array do not require that \VAR{pSync} be reinitialized after the first call. diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index c49ddeb56..e5667ab5b 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -144,14 +144,6 @@ } \apinotes{ -\newtext{% - There are no specifically defined error codes for these routines. - See Section~\ref{subsec:error_handling} for expected error checking and - return code behavior specific to implementations. For portable - error checking and debugging behavior, programs should do their own checks - for invalid team handles or \LibConstRef{SHMEM\_TEAM\_INVALID} - } - All \openshmem broadcast routines restore \VAR{pSync} to its original contents. Multiple calls to \openshmem routines that use the same \VAR{pSync} array do not require that \VAR{pSync} be reinitialized after the first call. diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 07636d243..beb7b2de7 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -140,14 +140,6 @@ } \apinotes{ -\newtext{% - There are no specifically defined error codes for these routines. - See Section~\ref{subsec:error_handling} for expected error checking and - return code behavior specific to implementations. For portable - error checking and debugging behavior, programs should do their own checks - for invalid team handles or \LibConstRef{SHMEM\_TEAM\_INVALID}. -} - All \openshmem collective routines reset the values in \VAR{pSync} before they return, so a particular \VAR{pSync} buffer need only be initialized the first time it is used. diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 3eea323a2..308758fc4 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -330,14 +330,6 @@ } \apinotes{ -\newtext{% - There are no specifically defined error codes for this routine. - See Section~\ref{subsec:error_handling} for expected error checking and - return code behavior specific to implementations. For portable - error checking and debugging behavior, programs should do their own checks - for invalid team handles or \LibConstRef{SHMEM\_TEAM\_INVALID} - } - All \openshmem reduction routines reset the values in \VAR{pSync} before they return, so a particular \VAR{pSync} buffer need only be initialized the first time it is used. The user must ensure that the \VAR{pSync} array is not being updated on any \ac{PE} diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 7f8c25fd9..2b9585b2d 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -97,15 +97,6 @@ } \apinotes{ - -\newtext{% - There are no specifically defined error codes for sync operations. - See Section~\ref{subsec:error_handling} for expected error checking and - return code behavior specific to implementations. For portable - error checking and debugging behavior, programs should do their own checks - for invalid team handles or \LibConstRef{SHMEM\_TEAM\_INVALID} - } - If the \VAR{pSync} array is initialized at run time, another method of synchronization (e.g., \FUNC{shmem\_sync\_all}) must be used before the initial use of that \VAR{pSync} array by \FUNC{shmem\_sync}. diff --git a/main_spec.tex b/main_spec.tex index 935c3737a..551647f39 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -34,11 +34,6 @@ \section{Library Handles}\label{subsec:library_handles} \section{Environment Variables }\label{subsec:environment_variables} \input{content/environment_variables} -{\color{Green} -\section{Error Handling}\label{subsec:error_handling} -\input{content/error_handling} -} - \clearpage From b3bde10e5b14b62f7ebb26b5c0efa7ccce7902f6 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 30 Aug 2019 04:41:58 -0500 Subject: [PATCH 277/319] Add NBI AMO in the main memory order list Update fence and quiet API list Remove contents specifying the fetch in NBI as atomic Previously, we had statements that implies that the fetching operation into the local data object as an atomic operation in NBI AMOs, this is incorrect. Only the remote updates are atomic. Hence, fixing those references. Rollback singular/plural usage in NBI AMOs --- content/shmem_atomic_compare_swap_nbi.tex | 6 +++--- content/shmem_atomic_fetch_add_nbi.tex | 4 ++-- content/shmem_atomic_fetch_and_nbi.tex | 6 +++--- content/shmem_atomic_fetch_inc_nbi.tex | 4 ++-- content/shmem_atomic_fetch_nbi.tex | 6 +++--- content/shmem_atomic_fetch_or_nbi.tex | 9 ++++----- content/shmem_atomic_fetch_xor_nbi.tex | 6 +++--- content/shmem_atomic_swap_nbi.tex | 6 +++--- content/shmem_fence.tex | 23 +++++++++++++++-------- content/shmem_quiet.tex | 14 ++++++++------ main_spec.tex | 5 +++-- 11 files changed, 49 insertions(+), 40 deletions(-) diff --git a/content/shmem_atomic_compare_swap_nbi.tex b/content/shmem_atomic_compare_swap_nbi.tex index 078b5868a..b988d73b2 100644 --- a/content/shmem_atomic_compare_swap_nbi.tex +++ b/content/shmem_atomic_compare_swap_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - The nonblocking atomic routine provide a method for performing an atomic + The nonblocking atomic routine provides a method for performing an atomic conditional swap on a remote data object. } @@ -39,12 +39,12 @@ \apidescription{ The nonblocking conditional swap routines conditionally update a \VAR{dest} - data object on the specified \ac{PE} and fetch the prior contents of the + data object on the specified \ac{PE} as an atomic operation and fetches the prior contents of the \VAR{dest} data object into the \VAR{fetch} local data object. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, prior contents of the \VAR{dest} data - object is atomically fetched into \VAR{fetch} local data object and the + object is fetched into \VAR{fetch} local data object and the contents of \VAR{value} is conditionally updated into \VAR{dest} on the remote \ac{PE}. } diff --git a/content/shmem_atomic_fetch_add_nbi.tex b/content/shmem_atomic_fetch_add_nbi.tex index a6cacaa71..727b7120f 100644 --- a/content/shmem_atomic_fetch_add_nbi.tex +++ b/content/shmem_atomic_fetch_add_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - The nonblocking atomic routine perform an atomic fetch-and-add operation on + The nonblocking atomic routine performs an atomic fetch-and-add operation on a remote data object. } @@ -36,7 +36,7 @@ \apidescription{ The nonblocking \FUNC{shmem\_atomic\_fetch\_add\_nbi} routines perform an - atomic fetch-and-add operation. An atomic fetch-and-add operation fetch + atomic fetch-and-add operation. An atomic fetch-and-add operation fetches the old \VAR{dest} and adds \VAR{value} to \VAR{dest} without the possibility of another atomic operation on the \VAR{dest} between the time of the fetch and the update. This routine returns after initiating the diff --git a/content/shmem_atomic_fetch_and_nbi.tex b/content/shmem_atomic_fetch_and_nbi.tex index 233ef1265..1fb4b871a 100644 --- a/content/shmem_atomic_fetch_and_nbi.tex +++ b/content/shmem_atomic_fetch_and_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - This nonblocking atomic operation perform an atomic fetching bitwise AND + This nonblocking atomic operation performs an atomic fetching bitwise AND operation on a remote data object. } @@ -35,12 +35,12 @@ \end{apiarguments} \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_fetch\_and\_nbi} routines perform an + The nonblocking \FUNC{shmem\_atomic\_fetch\_and\_nbi} routines performs an atomic fetching bitwise AND on the remotely accessible data object pointed by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, these routines perform a fetching bitwise AND on + \FUNC{shmem\_quiet}, these routines performs a fetching bitwise AND on \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the prior contents of \VAR{dest} into the \VAR{fetch} local data object. } diff --git a/content/shmem_atomic_fetch_inc_nbi.tex b/content/shmem_atomic_fetch_inc_nbi.tex index 5429f3d1c..b116d1140 100644 --- a/content/shmem_atomic_fetch_inc_nbi.tex +++ b/content/shmem_atomic_fetch_inc_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - This nonblocking atomic routine perform an atomic fetch-and-increment + This nonblocking atomic routine performs an atomic fetch-and-increment operation on a remote data object. } @@ -35,7 +35,7 @@ \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_fetch\_inc\_nbi} routines perform an + The nonblocking \FUNC{shmem\_atomic\_fetch\_inc\_nbi} routines performs an atomic fetch-and-increment operation. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, the \dest{} on diff --git a/content/shmem_atomic_fetch_nbi.tex b/content/shmem_atomic_fetch_nbi.tex index 8ee4157e9..a66b564a4 100644 --- a/content/shmem_atomic_fetch_nbi.tex +++ b/content/shmem_atomic_fetch_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - The nonblocking atomic fetch routine provide a method for atomically + The nonblocking atomic fetch routine provides a method for atomically fetching the value of a remote data object. } @@ -34,12 +34,12 @@ \end{apiarguments} \apidescription{ - The nonblocking atomic fetch routines perform a nonblocking fetch of a + The nonblocking atomic fetch routines performs a nonblocking fetch of a value atomically from a remote data object. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, contents of the \source{} data object from \ac{PE} is - atomically fetched into \VAR{fetch} local data object. + fetched into \VAR{fetch} local data object. } \apireturnvalues{ diff --git a/content/shmem_atomic_fetch_or_nbi.tex b/content/shmem_atomic_fetch_or_nbi.tex index 352f3e00d..5b8251a52 100644 --- a/content/shmem_atomic_fetch_or_nbi.tex +++ b/content/shmem_atomic_fetch_or_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - This nonblocking atomic operation perform an atomic fetching bitwise OR + This nonblocking atomic operation performs an atomic fetching bitwise OR operation on a remote data object. } @@ -35,15 +35,14 @@ \end{apiarguments} \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_fetch\_or\_nbi} routines perform an + The nonblocking \FUNC{shmem\_atomic\_fetch\_or\_nbi} routines performs an atomic fetching bitwise OR on the remotely accessible data object pointed by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine returns after posting the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, these routines perform a fetching bitwise OR on + \FUNC{shmem\_quiet}, these routines performs a fetching bitwise OR on \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the prior - contents of \VAR{dest} into the \VAR{fetch} local data object as one atomic - operation. + contents of \VAR{dest} into the \VAR{fetch} local data object. } \apireturnvalues{ diff --git a/content/shmem_atomic_fetch_xor_nbi.tex b/content/shmem_atomic_fetch_xor_nbi.tex index 28e2f0bd7..cd7f1eb73 100644 --- a/content/shmem_atomic_fetch_xor_nbi.tex +++ b/content/shmem_atomic_fetch_xor_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - This nonblocking atomic operation perform an atomic fetching bitwise XOR + This nonblocking atomic operation performs an atomic fetching bitwise XOR operation on a remote data object. } @@ -35,12 +35,12 @@ \end{apiarguments} \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_fetch\_xor\_nbi} routines perform an + The nonblocking \FUNC{shmem\_atomic\_fetch\_xor\_nbi} routines performs an atomic fetching bitwise XOR on the remotely accessible data object pointed by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, these routines perform a fetching bitwise XOR on + \FUNC{shmem\_quiet}, these routines performs a fetching bitwise XOR on \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the prior contents of \VAR{dest} into the \VAR{fetch} local data object. } diff --git a/content/shmem_atomic_swap_nbi.tex b/content/shmem_atomic_swap_nbi.tex index ac886e5db..61f62d2be 100644 --- a/content/shmem_atomic_swap_nbi.tex +++ b/content/shmem_atomic_swap_nbi.tex @@ -1,6 +1,6 @@ \color{Green} \apisummary{ - This nonblocking atomic operation perform an atomic swap to a remote data + This nonblocking atomic operation performs an atomic swap to a remote data object. } @@ -34,11 +34,11 @@ \end{apiarguments} \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_swap\_nbi} routines perform an atomic + The nonblocking \FUNC{shmem\_atomic\_swap\_nbi} routines performs an atomic swap operation. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, it writes - \VAR{value} into \dest{} on \ac{PE} and fetch the prior contents of + \VAR{value} into \dest{} on \ac{PE} and fetches the prior contents of \dest{} into \VAR{fetch} local data object. } diff --git a/content/shmem_fence.tex b/content/shmem_fence.tex index 9f4da3f30..1ef6276aa 100644 --- a/content/shmem_fence.tex +++ b/content/shmem_fence.tex @@ -1,5 +1,6 @@ \apisummary{ - Assures ordering of delivery of \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} and \ac{AMO} routines + Assures ordering of delivery of memory store, blocking \PUT{}, and \ac{AMO}, + as well as nonblocking \PUT{} and \ac{AMO} routines to symmetric data objects. } @@ -17,14 +18,18 @@ \end{apiarguments} \apidescription{ - This routine assures ordering of delivery of \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} and \ac{AMO} - routines to symmetric data objects. All \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} and \ac{AMO} + This routine assures ordering of delivery of memory store, blocking \PUT{}, + and \ac{AMO}, as well as nonblocking \PUT{}, and \ac{AMO} + routines to symmetric data objects. All memory store, blocking \PUT{}, and + \ac{AMO}, as well as nonblocking \PUT{}, and \ac{AMO} routines to symmetric data objects issued to a particular remote \ac{PE} on the given context prior to the call to \FUNC{shmem\_fence} are guaranteed to be delivered before any - subsequent \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} and \ac{AMO} routines to symmetric data + subsequent memory store, blocking \PUT{}, and \ac{AMO}, as well as nonblocking + \PUT{}, and \ac{AMO} routines to symmetric data objects to the same \ac{PE}. \FUNC{shmem\_fence} guarantees order of delivery, - not completion. It does not guarantee order of delivery of nonblocking \GET{} routines. + not completion. It does not guarantee order of delivery of nonblocking + \GET{} or values fetched by nonblocking \ac{AMO} routines. If \VAR{ctx} has the value \CONST{SHMEM\_CTX\_INVALID}, no operation is performed. } @@ -42,12 +47,14 @@ ordering of its memory accesses. There is a subtle difference between \FUNC{shmem\_fence} and \FUNC{shmem\_quiet}, in that, \FUNC{shmem\_quiet} - guarantees completion of \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} and \ac{AMO} routines to + guarantees completion of memory store, blocking \PUT{}, and \ac{AMO}, as + well as nonblocking \PUT{}, and \ac{AMO} routines to symmetric data objects which makes the updates visible to all other \acp{PE}. - The \FUNC{shmem\_quiet} routine should be called if completion of \PUT{}, - \ac{AMO}, memory store, and nonblocking \PUT{} and \ac{AMO} routines to symmetric data objects is desired + The \FUNC{shmem\_quiet} routine should be called if completion of memory + store, blocking \PUT{}, and \ac{AMO}, as well as nonblocking \PUT{}, and + \ac{AMO} routines to symmetric data objects is desired when multiple remote \acp{PE} are involved. In an \openshmem program with multithreaded \acp{PE}, it is the diff --git a/content/shmem_quiet.tex b/content/shmem_quiet.tex index 866256716..27e91e120 100644 --- a/content/shmem_quiet.tex +++ b/content/shmem_quiet.tex @@ -1,6 +1,7 @@ \apisummary{ - Waits for completion of all outstanding \PUT{}, \ac{AMO}, memory store, - and nonblocking \PUT{}, \GET{}, and \ac{AMO} routines to symmetric data + Waits for completion of all outstanding memory store, blocking + \PUT{}, and \ac{AMO}, as well as nonblocking + \PUT{}, \GET{}, and \ac{AMO} routines to symmetric data objects issued by a \ac{PE}. } @@ -35,15 +36,16 @@ \apinotes{ \FUNC{shmem\_quiet} is most useful as a way of ensuring completion of - several \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{}, - \GET{} and \ac{AMO} routines to symmetric data objects initiated by the calling + several memory store, blocking \PUT{}, and \ac{AMO}, as well as nonblocking \PUT{}, + \GET{}, and \ac{AMO} routines to symmetric data objects initiated by the calling \ac{PE}. For example, one might use \FUNC{shmem\_quiet} to await delivery of a block of data before issuing another \PUT{} or nonblocking \PUT{} routine, which sets a completion flag on another \ac{PE}. \FUNC{shmem\_quiet} is not usually needed if \FUNC{shmem\_barrier\_all} or \FUNC{shmem\_barrier} are called. The barrier - routines wait for the completion of outstanding writes (\PUT{}, \ac{AMO}, - memory stores, and nonblocking \PUT{}, \GET{} and \ac{AMO} routines) to + routines wait for the completion of outstanding writes (memory store, + blocking \PUT{}, and \ac{AMO}, + as well as nonblocking \PUT{}, \GET{} and \ac{AMO} routines) to symmetric data objects on all \acp{PE}. In an \openshmem program with multithreaded \acp{PE}, it is the diff --git a/main_spec.tex b/main_spec.tex index 4c144d298..91aa42192 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -337,8 +337,9 @@ \subsubsection{\textbf{SHMEM\_TEST\_SOME}}\label{subsec:shmem_test_some} \subsection{Memory Ordering Routines}\label{subsec:memory_order} The following section discusses \openshmem \acp{API} that provide mechanisms to -ensure ordering and/or delivery of \OPR{Put}, \ac{AMO}, memory store, -and non-blocking \PUT{} and \GET{} routines to symmetric data objects. +ensure ordering and/or delivery of memory store, blocking \OPR{Put}, and \ac{AMO}, +as well as non-blocking \PUT{}, \GET{}, and \ac{AMO} routines to symmetric data +objects. \subsubsection{\textbf{SHMEM\_FENCE}}\label{subsec:shmem_fence} \input{content/shmem_fence.tex} From 0fba24caf548b190ad74fead658c5ed63452917e Mon Sep 17 00:00:00 2001 From: James Dinan Date: Wed, 25 Sep 2019 14:23:47 -0400 Subject: [PATCH 278/319] Fix example intro affected by Fortran removal Signed-off-by: James Dinan --- content/shmem_ptr.tex | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/content/shmem_ptr.tex b/content/shmem_ptr.tex index cd5e726c3..2eefc9a5c 100644 --- a/content/shmem_ptr.tex +++ b/content/shmem_ptr.tex @@ -39,7 +39,9 @@ \begin{apiexamples} \apicexample - {This is the equivalent program written in \Cstd[11]:} + {In the following \Cstd[11] example, \ac{PE} 0 uses the \FUNC{shmem\_ptr} + routine to query a pointer and directly access the \VAR{dest} array on + \ac{PE} 1:} {./example_code/shmem_ptr_example.c} {} From ff7b03b633ac7c621a8be0bab69b8bf10c45e451 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Thu, 26 Sep 2019 10:19:52 -0400 Subject: [PATCH 279/319] Resolve merge conflict --- content/shmem_wait_until.tex | 10 ---------- example_code/shmem_wait3_example.c | 7 ------- 2 files changed, 17 deletions(-) delete mode 100644 example_code/shmem_wait3_example.c diff --git a/content/shmem_wait_until.tex b/content/shmem_wait_until.tex index bf1b1a8e2..420920bbc 100644 --- a/content/shmem_wait_until.tex +++ b/content/shmem_wait_until.tex @@ -80,14 +80,4 @@ value. } -\begin{apiexamples} - -\apicexample -{The following \CorCpp{} routine waits until the value in \VAR{ivar} is set to -be less than zero by a transfer from a remote PE:} -{./example_code/shmem_wait3_example.c} -{} - -\end{apiexamples} - \end{apidefinition} diff --git a/example_code/shmem_wait3_example.c b/example_code/shmem_wait3_example.c deleted file mode 100644 index 2b0e85dfd..000000000 --- a/example_code/shmem_wait3_example.c +++ /dev/null @@ -1,7 +0,0 @@ -#include - -int ivar; - -void wait_on_ivar(void) { - shmem_int_wait_until(&ivar, SHMEM_CMP_LT, 0); -} From 2bd032d1fbddc2e4922676026b7ccf3708a3c31b Mon Sep 17 00:00:00 2001 From: James Dinan Date: Thu, 26 Sep 2019 10:40:24 -0400 Subject: [PATCH 280/319] Merge changes from #267 --- content/shmem_test_all_vector.tex | 9 ++++++++- content/shmem_test_any_vector.tex | 9 ++++++++- content/shmem_test_some_vector.tex | 9 ++++++++- content/shmem_wait_until_all_vector.tex | 16 ++++++++-------- content/shmem_wait_until_any_vector.tex | 16 ++++++++-------- content/shmem_wait_until_some_vector.tex | 15 +++++++-------- 6 files changed, 47 insertions(+), 27 deletions(-) diff --git a/content/shmem_test_all_vector.tex b/content/shmem_test_all_vector.tex index 58036bb5e..4eaa0c5e8 100644 --- a/content/shmem_test_all_vector.tex +++ b/content/shmem_test_all_vector.tex @@ -36,7 +36,10 @@ \apidescription{ The \FUNC{shmem\_test\_all\newtext{\_vector}} routine indicates whether all entries in the test set specified by \VAR{ivars} and \VAR{status} have - satisfied the test condition at the calling \ac{PE}. This routine does not + satisfied the test condition at the calling \ac{PE}. The \VAR{ivars} + objects at the calling \ac{PE} may be updated by an \ac{AMO} performed by a + thread located within the calling \ac{PE} or within another \ac{PE}. + This routine does not block and returns zero if not all entries in \VAR{ivars} satisfied the test condition\newtext{s}. \newtext{This routine compares each element of the \VAR{ivars} array in the test set with each respective value in @@ -53,6 +56,10 @@ ignored and all elements in \VAR{ivars} are included in the test set. The \VAR{ivars}, \VAR{indices}, and \VAR{status} arrays must not overlap in memory. + + Implementations must ensure that \FUNC{shmem\_test\_all\_vector} does not + return 1 before the update of the memory indicated by \VAR{ivars} is fully + complete. } \apireturnvalues{ diff --git a/content/shmem_test_any_vector.tex b/content/shmem_test_any_vector.tex index 6528ac83a..b77eb68d4 100644 --- a/content/shmem_test_any_vector.tex +++ b/content/shmem_test_any_vector.tex @@ -36,7 +36,10 @@ \apidescription{ The \FUNC{shmem\_test\_any\newtext{\_vector}} routine indicates whether any entry in the test set specified by \VAR{ivars} and \VAR{status} has - satisfied the test condition at the calling \ac{PE}. This routine does not + satisfied the test condition at the calling \ac{PE}. The \VAR{ivars} + objects at the calling \ac{PE} may be updated by an \ac{AMO} performed by a + thread located within the calling \ac{PE} or within another \ac{PE}. + This routine does not block and returns \CONST{SIZE\_MAX} if no entries in \VAR{ivars} satisfied the test condition. \newtext{This routine compares each element of the \VAR{ivars} array in the test set with each respective value in @@ -55,6 +58,10 @@ \CONST{SIZE\_MAX}. If \VAR{status} is a null pointer, it is ignored and all elements in \VAR{ivars} are included in the test set. The \VAR{ivars} and \VAR{status} arrays must not overlap in memory. + + Implementations must ensure that \FUNC{shmem\_test\_any\_vector} does not + return an index before the update of the memory indicated by the + corresponding \VAR{ivars} element is fully complete. } \apireturnvalues{ diff --git a/content/shmem_test_some_vector.tex b/content/shmem_test_some_vector.tex index 2913f19a8..275ad2cd2 100644 --- a/content/shmem_test_some_vector.tex +++ b/content/shmem_test_some_vector.tex @@ -38,7 +38,10 @@ \apidescription{ The \FUNC{shmem\_test\_some\newtext{\_vector}} routine indicates whether at least one entry in the test set specified by \VAR{ivars} and \VAR{status} - satisfies the test condition at the calling \ac{PE}. This routine does not + satisfies the test condition at the calling \ac{PE}. The \VAR{ivars} + objects at the calling \ac{PE} may be updated by an \ac{AMO} performed by a + thread located within the calling \ac{PE} or within another \ac{PE}. + This routine does not block and returns zero if no entries in \VAR{ivars} satisfied the test condition. \newtext{This routine compares each element of the \VAR{ivars} array in the test set with each respective value in \VAR{cmp\_values} @@ -67,6 +70,10 @@ empty and this routine returns 0. If \VAR{status} is a null pointer, it is ignored and all elements in \VAR{ivars} are included in the test set. The \VAR{ivars}, \VAR{indices}, and \VAR{status} arrays must not overlap in memory. + + Implementations must ensure that \FUNC{shmem\_test\_some\_vector} does not + return indices before the updates of the memory indicated by the + corresponding \VAR{ivars} elements are fully complete. } \apireturnvalues{ diff --git a/content/shmem_wait_until_all_vector.tex b/content/shmem_wait_until_all_vector.tex index 6e5116a86..7cc88047e 100644 --- a/content/shmem_wait_until_all_vector.tex +++ b/content/shmem_wait_until_all_vector.tex @@ -35,7 +35,10 @@ \apidescription{ The \FUNC{shmem\_wait\_until\_all\_vector} routine waits until all entries in the wait set specified by \VAR{ivars} and \VAR{status} have satisfied - the wait condition\newtext{s} at the calling \ac{PE}. If \VAR{nelems} is + the wait condition\newtext{s} at the calling \ac{PE}. The \VAR{ivars} + objects at the calling \ac{PE} may be updated by an \ac{AMO} performed by a + thread located within the calling \ac{PE} or within another \ac{PE}. + If \VAR{nelems} is 0, the wait set is empty and this routine returns immediately. \newtext{This routine compares each element of the \VAR{ivars} array in the wait set with each respective value in \VAR{cmp\_values} according to the @@ -53,6 +56,10 @@ immediately. If \VAR{status} is a null pointer, it is ignored and all elements in \VAR{ivars} are included in the wait set. The \VAR{ivars} and \VAR{status} arrays must not overlap in memory. + + Implementations must ensure that \FUNC{shmem\_wait\_until\_all\_vector} + does not return before the update of the memory indicated by \VAR{ivars} is + fully complete. } @@ -64,12 +71,5 @@ None. } -\apiimpnotes{ - Implementations must ensure that \FUNC{shmem\_wait\_until\_all\_vector} does not - return before the update of the memory indicated by \VAR{ivars} is fully - complete. Partial updates to the memory must not cause - \FUNC{shmem\_wait\_until\_all\_vector} to return. -} - \end{apidefinition} diff --git a/content/shmem_wait_until_any_vector.tex b/content/shmem_wait_until_any_vector.tex index c35c0ac27..fbdc024e5 100644 --- a/content/shmem_wait_until_any_vector.tex +++ b/content/shmem_wait_until_any_vector.tex @@ -36,7 +36,10 @@ \apidescription{ The \FUNC{shmem\_wait\_until\_any\_vector} routine waits until any one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies - the wait condition at the calling \ac{PE}. \newtext{This routine compares + the wait condition at the calling \ac{PE}. The \VAR{ivars} objects at the + calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located + within the calling \ac{PE} or within another \ac{PE}. + \newtext{This routine compares each element of the \VAR{ivars} array in the wait set with each respective value in \VAR{cmp\_values} according to the comparison operator \VAR{cmp} at the calling \ac{PE}}. The order in which these elements are waited upon @@ -53,6 +56,10 @@ \CONST{SIZE\_MAX}. If \VAR{status} is a null pointer, it is ignored and all elements in \VAR{ivars} are included in the wait set. The \VAR{ivars} and \VAR{status} arrays must not overlap in memory. + + Implementations must ensure that \FUNC{shmem\_wait\_until\_any\_vector} + does not return before the update of the memory indicated by \VAR{ivars} is + fully complete. } \apireturnvalues{ @@ -65,13 +72,6 @@ None. } -\apiimpnotes{ - Implementations must ensure that \FUNC{shmem\_wait\_until\_any\newtext{\_vector}} does not - return before the update of the memory indicated by the completed index of \VAR{ivars} is fully - executed. Partial updates to the memory must not cause - \FUNC{shmem\_wait\_until\_any\newtext{\_vector}} to return. -} - \color{ForestGreen}{ \begin{apiexamples} \apicexample diff --git a/content/shmem_wait_until_some_vector.tex b/content/shmem_wait_until_some_vector.tex index f92ca082a..6bf3c6fea 100644 --- a/content/shmem_wait_until_some_vector.tex +++ b/content/shmem_wait_until_some_vector.tex @@ -39,6 +39,9 @@ The \FUNC{shmem\_wait\_until\_some\newtext{\_vector}} routine waits until at least one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies the wait condition at the calling \ac{PE}. + The \VAR{ivars} objects at the calling \ac{PE} may be updated by an + \ac{AMO} performed by a thread located within the calling \ac{PE} or within + another \ac{PE}. \newtext{This routine compares each element of the \VAR{ivars} array in the wait set with each respective value in \VAR{cmp\_values} according to the comparison operator \VAR{cmp} at the calling \ac{PE}}. This routine tests @@ -69,6 +72,10 @@ and all elements in \VAR{ivars} are included in the wait set. The \VAR{ivars}, \VAR{indices}, and \VAR{status} arrays must not overlap in memory. + + Implementations must ensure that \FUNC{shmem\_wait\_until\_some\_vector} + does not return before the update of the memory indicated by \VAR{ivars} is + fully complete. } @@ -82,13 +89,5 @@ None. } -\apiimpnotes{ - Implementations must ensure that - \FUNC{shmem\_wait\_until\_some\newtext{\_vector}} does not return before - the update of the memory indicated by the completed indices of \VAR{ivars} - is fully executed. Partial updates to the memory must not cause - \FUNC{shmem\_wait\_until\_some\newtext{\_vector}} to return. -} - \end{apidefinition} From 5c0d48ecbbe6afe88d58682e9bfb595005ef9a78 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Thu, 26 Sep 2019 11:45:26 -0400 Subject: [PATCH 281/319] Remove change markers --- content/shmem_test_all_vector.tex | 24 ++++++++++---------- content/shmem_test_any_vector.tex | 24 ++++++++++---------- content/shmem_test_some_vector.tex | 28 ++++++++++++------------ content/shmem_wait_until_all_vector.tex | 23 +++++++++---------- content/shmem_wait_until_any_vector.tex | 20 ++++++++--------- content/shmem_wait_until_some_vector.tex | 28 ++++++++++++------------ 6 files changed, 72 insertions(+), 75 deletions(-) diff --git a/content/shmem_test_all_vector.tex b/content/shmem_test_all_vector.tex index 4eaa0c5e8..83ecd389e 100644 --- a/content/shmem_test_all_vector.tex +++ b/content/shmem_test_all_vector.tex @@ -1,18 +1,19 @@ \apisummary{ - Indicate whether all variables within an array of variables on the local \ac{PE} meet \oldtext{a} \newtext{the} specified test condition\newtext{s}. + Indicate whether all variables within an array of variables on the local + \ac{PE} meet the specified test conditions. } \begin{apidefinition} \begin{C11synopsis} -int @\FuncDecl{shmem\_test\_all\newtext{\_vector}}@(TYPE *ivars, size_t nelems, const int *status, int cmp, +int @\FuncDecl{shmem\_test\_all\_vector}@(TYPE *ivars, size_t nelems, const int *status, int cmp, TYPE *cmp_values); \end{C11synopsis} where \TYPE{} is one of the point-to-point synchronization types specified by Table \ref{p2psynctypes}. \begin{Csynopsis} -int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_test\_all\newtext{\_vector}}@(TYPE *ivars, size_t nelems, const int *status, int cmp, +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_test\_all\_vector}@(TYPE *ivars, size_t nelems, const int *status, int cmp, TYPE *cmp_values); \end{Csynopsis} where \TYPE{} is one of the point-to-point synchronization types and has a @@ -26,25 +27,24 @@ \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} that indicates which elements in \VAR{ivars} are excluded from the test set.} \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that - compares elements of \VAR{ivars} with \newtext{elements of} - \VAR{cmp\_value\newtext{s}}.} - \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} - containing values to be compared with the respective objects in \VAR{ivars}.}} + compares elements of \VAR{ivars} with elements of \VAR{cmp\_values}.} + \apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} + containing values to be compared with the respective objects in \VAR{ivars}.} \end{apiarguments} \apidescription{ - The \FUNC{shmem\_test\_all\newtext{\_vector}} routine indicates whether all + The \FUNC{shmem\_test\_all\_vector} routine indicates whether all entries in the test set specified by \VAR{ivars} and \VAR{status} have satisfied the test condition at the calling \ac{PE}. The \VAR{ivars} objects at the calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located within the calling \ac{PE} or within another \ac{PE}. This routine does not block and returns zero if not all entries in \VAR{ivars} satisfied the test - condition\newtext{s}. \newtext{This routine compares each element of the + conditions. This routine compares each element of the \VAR{ivars} array in the test set with each respective value in \VAR{cmp\_values} according to the comparison operator \VAR{cmp} at the - calling \ac{PE}}. If \VAR{nelems} is 0, the test set is empty and this + calling \ac{PE}. If \VAR{nelems} is 0, the test set is empty and this routine returns 1. The optional \VAR{status} is a mask array of length \VAR{nelems} where each element @@ -63,7 +63,9 @@ } \apireturnvalues{ - \FUNC{shmem\_test\_all\newtext{\_vector}} returns 1 if all variables in \VAR{ivars} satisfy the test condition\newtext{s} or if \VAR{nelems} is 0, otherwise this routine returns 0. + \FUNC{shmem\_test\_all\_vector} returns 1 if all variables in \VAR{ivars} + satisfy the test conditions or if \VAR{nelems} is 0, otherwise this routine + returns 0. } \apinotes{ diff --git a/content/shmem_test_any_vector.tex b/content/shmem_test_any_vector.tex index b77eb68d4..f27ea1caa 100644 --- a/content/shmem_test_any_vector.tex +++ b/content/shmem_test_any_vector.tex @@ -1,18 +1,19 @@ \apisummary{ - Indicate whether any one variable within an array of variables on the local \ac{PE} meets \oldtext{a} \newtext{its} specified test condition. + Indicate whether any one variable within an array of variables on the local + \ac{PE} meets its specified test condition. } \begin{apidefinition} \begin{C11synopsis} -size_t @\FuncDecl{shmem\_test\_any\newtext{\_vector}}@(TYPE *ivars, size_t nelems, const int *status, int cmp, +size_t @\FuncDecl{shmem\_test\_any\_vector}@(TYPE *ivars, size_t nelems, const int *status, int cmp, TYPE *cmp_values); \end{C11synopsis} where \TYPE{} is one of the point-to-point synchronization types specified by Table \ref{p2psynctypes}. \begin{Csynopsis} -size_t @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_test\_any\newtext{\_vector}}@(TYPE *ivars, size_t nelems, const int *status, +size_t @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_test\_any\_vector}@(TYPE *ivars, size_t nelems, const int *status, int cmp, TYPE *cmp_values); \end{Csynopsis} where \TYPE{} is one of the point-to-point synchronization types and has a @@ -26,28 +27,27 @@ \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} that indicates which elements in \VAR{ivars} are excluded from the test set.} \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that - compares elements of \VAR{ivars} with \newtext{elements of} - \VAR{cmp\_value\newtext{s}}.} - \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} - containing values to be compared with the respective objects in \VAR{ivars}.}} + compares elements of \VAR{ivars} with elements of \VAR{cmp\_values}.} + \apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} + containing values to be compared with the respective objects in \VAR{ivars}.} \end{apiarguments} \apidescription{ - The \FUNC{shmem\_test\_any\newtext{\_vector}} routine indicates whether any + The \FUNC{shmem\_test\_any\_vector} routine indicates whether any entry in the test set specified by \VAR{ivars} and \VAR{status} has satisfied the test condition at the calling \ac{PE}. The \VAR{ivars} objects at the calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located within the calling \ac{PE} or within another \ac{PE}. This routine does not block and returns \CONST{SIZE\_MAX} if no entries in \VAR{ivars} satisfied - the test condition. \newtext{This routine compares each element of the + the test condition. This routine compares each element of the \VAR{ivars} array in the test set with each respective value in \VAR{cmp\_values} according to the comparison operator \VAR{cmp} at the - calling \ac{PE}}. The order in which these elements are tested is + calling \ac{PE}. The order in which these elements are tested is unspecified. If an entry $i$ in \VAR{ivars} within the test set satisfies the test condition, a series of calls to - \FUNC{shmem\_test\_any\newtext{\_vector}} must eventually return $i$. + \FUNC{shmem\_test\_any\_vector} must eventually return $i$. The optional \VAR{status} is a mask array of length \VAR{nelems} where each element corresponds to the respective element in \VAR{ivars} and indicates @@ -65,7 +65,7 @@ } \apireturnvalues{ - \FUNC{shmem\_test\_any\newtext{\_vector}} returns the index of an element in the \VAR{ivars} + \FUNC{shmem\_test\_any\_vector} returns the index of an element in the \VAR{ivars} array that satisfies the test condition. If the test set is empty or no conditions in the test set are satisfied, this routine returns \CONST{SIZE\_MAX}. } diff --git a/content/shmem_test_some_vector.tex b/content/shmem_test_some_vector.tex index 275ad2cd2..fdf5c26bb 100644 --- a/content/shmem_test_some_vector.tex +++ b/content/shmem_test_some_vector.tex @@ -1,18 +1,19 @@ \apisummary{ - Indicate whether at least one variable within an array of variables on the local \ac{PE} meets \oldtext{a} \newtext{its} specified test condition. + Indicate whether at least one variable within an array of variables on the + local \ac{PE} meets its specified test condition. } \begin{apidefinition} \begin{C11synopsis} -size_t @\FuncDecl{shmem\_test\_some\newtext{\_vector}}@(TYPE *ivars, size_t nelems, size_t *indices, const int *status, +size_t @\FuncDecl{shmem\_test\_some\_vector}@(TYPE *ivars, size_t nelems, size_t *indices, const int *status, int cmp, TYPE *cmp_values); \end{C11synopsis} where \TYPE{} is one of the point-to-point synchronization types specified by Table \ref{p2psynctypes}. \begin{Csynopsis} -size_t @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_test\_some\newtext{\_vector}}@(TYPE *ivars, size_t nelems, size_t *indices, +size_t @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_test\_some\_vector}@(TYPE *ivars, size_t nelems, size_t *indices, const int *status, int cmp, TYPE *cmp_values); \end{Csynopsis} where \TYPE{} is one of the point-to-point synchronization types and has a @@ -28,38 +29,37 @@ \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} that indicates which elements in \VAR{ivars} are excluded from the test set.} \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that - compares elements of \VAR{ivars} with \newtext{elements of} - \VAR{cmp\_value\newtext{s}}.} - \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} - containing values to be compared with the respective objects in \VAR{ivars}.}} + compares elements of \VAR{ivars} with elements of \VAR{cmp\_values}.} + \apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} + containing values to be compared with the respective objects in \VAR{ivars}.} \end{apiarguments} \apidescription{ - The \FUNC{shmem\_test\_some\newtext{\_vector}} routine indicates whether at + The \FUNC{shmem\_test\_some\_vector} routine indicates whether at least one entry in the test set specified by \VAR{ivars} and \VAR{status} satisfies the test condition at the calling \ac{PE}. The \VAR{ivars} objects at the calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located within the calling \ac{PE} or within another \ac{PE}. This routine does not block and returns zero if no entries in \VAR{ivars} satisfied the test - condition. \newtext{This routine compares each element of the \VAR{ivars} + condition. This routine compares each element of the \VAR{ivars} array in the test set with each respective value in \VAR{cmp\_values} - according to the comparison operator \VAR{cmp} at the calling \ac{PE}}. + according to the comparison operator \VAR{cmp} at the calling \ac{PE}. This routine tests all elements of \VAR{ivars} in the test set at least once, and the order in which the elements are tested is unspecified. Upon return, the \VAR{indices} array contains the indices of the elements in the test set that satisfied the test condition during the call to - \FUNC{shmem\_test\_some\newtext{\_vector}}. The return value of - \FUNC{shmem\_test\_some\newtext{\_vector}} is equal to the total number of + \FUNC{shmem\_test\_some\_vector}. The return value of + \FUNC{shmem\_test\_some\_vector} is equal to the total number of these satisfied elements. If the return value is $N$, then the first $N$ elements of the \VAR{indices} array contain those unique indices that satisfied the test condition. These first $N$ elements of \VAR{indices} may be unordered with respect to the corresponding indices of \VAR{ivars}. The array pointed to by \VAR{indices} must be at least \VAR{nelems} long. If an entry $i$ in \VAR{ivars} within the test set satisfies the test - condition, a series of calls to \FUNC{shmem\_test\_some\newtext{\_vector}} + condition, a series of calls to \FUNC{shmem\_test\_some\_vector} must eventually include $i$ in the \VAR{indices} array. The optional \VAR{status} is a mask array of length \VAR{nelems} where each element @@ -77,7 +77,7 @@ } \apireturnvalues{ - \FUNC{shmem\_test\_some\newtext{\_vector}} returns the number of indices returned in + \FUNC{shmem\_test\_some\_vector} returns the number of indices returned in the \VAR{indices} array. If the test set is empty, this routine returns 0. } diff --git a/content/shmem_wait_until_all_vector.tex b/content/shmem_wait_until_all_vector.tex index 7cc88047e..2e250d2d7 100644 --- a/content/shmem_wait_until_all_vector.tex +++ b/content/shmem_wait_until_all_vector.tex @@ -1,18 +1,19 @@ \apisummary{ - Wait on an array of variables on the local \ac{PE} until all variables meet the specified wait condition\newtext{s}. + Wait on an array of variables on the local \ac{PE} until all variables meet + the specified wait conditions. } \begin{apidefinition} \begin{C11synopsis} -void @\FuncDecl{shmem\_wait\_until\_all\newtext{\_vector}}@(TYPE *ivars, size_t nelems, const int *status, int cmp, +void @\FuncDecl{shmem\_wait\_until\_all\_vector}@(TYPE *ivars, size_t nelems, const int *status, int cmp, TYPE *cmp_values); \end{C11synopsis} where \TYPE{} is one of the point-to-point synchronization types specified by Table \ref{p2psynctypes}. \begin{Csynopsis} -void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until\_all\newtext{\_vector}}@(TYPE *ivars, size_t nelems, const int *status, int cmp, TYPE *cmp_values); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until\_all\_vector}@(TYPE *ivars, size_t nelems, const int *status, int cmp, TYPE *cmp_values); \end{Csynopsis} where \TYPE{} is one of the point-to-point synchronization types and has a corresponding \TYPENAME{} specified by Table~\ref{p2psynctypes}. @@ -25,27 +26,23 @@ \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} that indicates which elements in \VAR{ivars} are excluded from the wait set.} \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that - compares elements of \VAR{ivars} with \newtext{elements of} - \VAR{cmp\_value\newtext{s}}.} - \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} - containing values to be compared with the respective objects in \VAR{ivars}.}} + compares elements of \VAR{ivars} with elements of \VAR{cmp\_values}.} + \apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} + containing values to be compared with the respective objects in \VAR{ivars}.} \end{apiarguments} \apidescription{ The \FUNC{shmem\_wait\_until\_all\_vector} routine waits until all entries in the wait set specified by \VAR{ivars} and \VAR{status} have satisfied - the wait condition\newtext{s} at the calling \ac{PE}. The \VAR{ivars} + the wait conditions at the calling \ac{PE}. The \VAR{ivars} objects at the calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located within the calling \ac{PE} or within another \ac{PE}. If \VAR{nelems} is 0, the wait set is empty and this routine returns immediately. - \newtext{This routine compares each element of the \VAR{ivars} array in the + This routine compares each element of the \VAR{ivars} array in the wait set with each respective value in \VAR{cmp\_values} according to the - comparison operator \VAR{cmp} at the calling \ac{PE}}. \oldtext{This - routine is semantically similar to \FUNC{shmem\_wait\_until} in - Section~\ref{subsec:shmem_wait_until}, but adds support for point-to-point - synchronization involving an array of symmetric data objects.} + comparison operator \VAR{cmp} at the calling \ac{PE}. The optional \VAR{status} is a mask array of length \VAR{nelems} where each element corresponds to the respective element in \VAR{ivars} and indicates diff --git a/content/shmem_wait_until_any_vector.tex b/content/shmem_wait_until_any_vector.tex index fbdc024e5..b7768f1b6 100644 --- a/content/shmem_wait_until_any_vector.tex +++ b/content/shmem_wait_until_any_vector.tex @@ -1,18 +1,19 @@ \apisummary{ - Wait on an array of variables on the local \ac{PE} until any one variable meets \oldtext{the} \newtext{its} specified wait condition. + Wait on an array of variables on the local \ac{PE} until any one variable + meets its specified wait condition. } \begin{apidefinition} \begin{C11synopsis} -size_t @\FuncDecl{shmem\_wait\_until\_any\newtext{\_vector}}@(TYPE *ivars, size_t nelems, const int *status, int cmp, +size_t @\FuncDecl{shmem\_wait\_until\_any\_vector}@(TYPE *ivars, size_t nelems, const int *status, int cmp, TYPE *cmp_values); \end{C11synopsis} where \TYPE{} is one of the point-to-point synchronization types specified by Table \ref{p2psynctypes}. \begin{Csynopsis} -size_t @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until\_any\newtext{\_vector}}@(TYPE *ivars, size_t nelems, const int *status, +size_t @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until\_any\_vector}@(TYPE *ivars, size_t nelems, const int *status, int cmp, TYPE *cmp_values); \end{Csynopsis} where \TYPE{} is one of the point-to-point synchronization types and has a @@ -26,10 +27,9 @@ \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} that indicates which elements in \VAR{ivars} are excluded from the wait set.} \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that - compares elements of \VAR{ivars} with \newtext{elements of} - \VAR{cmp\_value\newtext{s}}.} - \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} - containing values to be compared with the respective objects in \VAR{ivars}.}} + compares elements of \VAR{ivars} with elements of \VAR{cmp\_values}.} + \apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} + containing values to be compared with the respective objects in \VAR{ivars}}. \end{apiarguments} @@ -39,10 +39,10 @@ the wait condition at the calling \ac{PE}. The \VAR{ivars} objects at the calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located within the calling \ac{PE} or within another \ac{PE}. - \newtext{This routine compares + This routine compares each element of the \VAR{ivars} array in the wait set with each respective value in \VAR{cmp\_values} according to the comparison operator \VAR{cmp} - at the calling \ac{PE}}. The order in which these elements are waited upon + at the calling \ac{PE}. The order in which these elements are waited upon is unspecified. If an entry $i$ in \VAR{ivars} within the wait set satisfies the wait condition, a series of calls to \FUNC{shmem\_wait\_until\_any\_vector} must eventually return $i$. @@ -72,7 +72,6 @@ None. } -\color{ForestGreen}{ \begin{apiexamples} \apicexample {The following \Cstd[11] example demonstrates the use of @@ -81,7 +80,6 @@ {./example_code/shmem_wait_until_any_vector.c} {} \end{apiexamples} -} \end{apidefinition} diff --git a/content/shmem_wait_until_some_vector.tex b/content/shmem_wait_until_some_vector.tex index 6bf3c6fea..30dc169f3 100644 --- a/content/shmem_wait_until_some_vector.tex +++ b/content/shmem_wait_until_some_vector.tex @@ -1,18 +1,19 @@ \apisummary{ - Wait on an array of variables on the local \ac{PE} until at least one variable meets \oldtext{the} \newtext{its} specified wait condition. + Wait on an array of variables on the local \ac{PE} until at least one + variable meets the its specified wait condition. } \begin{apidefinition} \begin{C11synopsis} -size_t @\FuncDecl{shmem\_wait\_until\_some\newtext{\_vector}}@(TYPE *ivars, size_t nelems, size_t *indices, +size_t @\FuncDecl{shmem\_wait\_until\_some\_vector}@(TYPE *ivars, size_t nelems, size_t *indices, const int *status, int cmp, TYPE *cmp_values); \end{C11synopsis} where \TYPE{} is one of the point-to-point synchronization types specified by Table \ref{p2psynctypes}. \begin{Csynopsis} -size_t @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until\_some\newtext{\_vector}}@(TYPE *ivars, size_t nelems, size_t *indices, +size_t @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until\_some\_vector}@(TYPE *ivars, size_t nelems, size_t *indices, const int *status, int cmp, TYPE *cmp_values); \end{Csynopsis} where \TYPE{} is one of the point-to-point synchronization types and has a @@ -28,30 +29,29 @@ \apiargument{IN}{status}{An optional mask array of length \VAR{nelems} that indicates which elements in \VAR{ivars} are excluded from the wait set.} \apiargument{IN}{cmp}{A comparison operator from Table~\ref{p2p-consts} that - compares elements of \VAR{ivars} with \newtext{elements of} - \VAR{cmp\_value\newtext{s}}.} - \newtext{\apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} - containing values to be compared with the respective objects in \VAR{ivars}.}} + compares elements of \VAR{ivars} with elements of \VAR{cmp\_values}.} + \apiargument{IN}{cmp\_values}{An array of length \VAR{nelems} + containing values to be compared with the respective objects in \VAR{ivars}.} \end{apiarguments} \apidescription{ - The \FUNC{shmem\_wait\_until\_some\newtext{\_vector}} routine waits until + The \FUNC{shmem\_wait\_until\_some\_vector} routine waits until at least one entry in the wait set specified by \VAR{ivars} and \VAR{status} satisfies the wait condition at the calling \ac{PE}. The \VAR{ivars} objects at the calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located within the calling \ac{PE} or within another \ac{PE}. - \newtext{This routine compares each element of the \VAR{ivars} array in the + This routine compares each element of the \VAR{ivars} array in the wait set with each respective value in \VAR{cmp\_values} according to the - comparison operator \VAR{cmp} at the calling \ac{PE}}. This routine tests + comparison operator \VAR{cmp} at the calling \ac{PE}. This routine tests all elements of \VAR{ivars} in the wait set at least once, and the order in which the elements are waited upon is unspecified. Upon return, the \VAR{indices} array contains the indices of at least one element in the wait set that satisfied the wait condition during the call - to \FUNC{shmem\_wait\_until\_some\newtext{\_vector}}. The return value of - \FUNC{shmem\_wait\_until\_some\newtext{\_vector}} is equal to the total + to \FUNC{shmem\_wait\_until\_some\_vector}. The return value of + \FUNC{shmem\_wait\_until\_some\_vector} is equal to the total number of these satisfied elements. For a given return value $N$, the first $N$ elements of the \VAR{indices} array contain those unique indices that satisfied the wait condition. These first $N$ elements of @@ -59,7 +59,7 @@ \VAR{ivars}. The array pointed to by \VAR{indices} must be at least \VAR{nelems} long. If an entry $i$ in \VAR{ivars} within the wait set satisfies the wait condition, a series of calls to - \FUNC{shmem\_wait\_until\_some\newtext{\_vector}} must eventually include + \FUNC{shmem\_wait\_until\_some\_vector} must eventually include $i$ in the \VAR{indices} array. The optional \VAR{status} is a mask array of length \VAR{nelems} where each @@ -80,7 +80,7 @@ \apireturnvalues{ - \FUNC{shmem\_wait\_until\_some\newtext{\_vector}} returns the number of + \FUNC{shmem\_wait\_until\_some\_vector} returns the number of indices returned in the \VAR{indices} array. If the wait set is empty, this routine returns 0. } From 16b2ac09d6c2613f27d6dfc633bef29c53c5b0d7 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Thu, 26 Sep 2019 12:02:32 -0400 Subject: [PATCH 282/319] Remove additional change markers --- content/shmem_test_all.tex | 4 ++-- content/shmem_test_any.tex | 4 ++-- content/shmem_test_some.tex | 4 ++-- content/shmem_wait_until_all.tex | 4 ++-- content/shmem_wait_until_any.tex | 4 ++-- content/shmem_wait_until_some.tex | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/content/shmem_test_all.tex b/content/shmem_test_all.tex index 98e0e8928..ad3bd3dc3 100644 --- a/content/shmem_test_all.tex +++ b/content/shmem_test_all.tex @@ -39,9 +39,9 @@ the calling \ac{PE} or within another \ac{PE}. This routine does not block and returns zero if not all entries in \VAR{ivars} satisfied the test condition. - \newtext{This routine compares each element of the \VAR{ivars} array in the + This routine compares each element of the \VAR{ivars} array in the test set with the value \VAR{cmp\_value} according to the comparison - operator \VAR{cmp} at the calling \ac{PE}}. + operator \VAR{cmp} at the calling \ac{PE}. If \VAR{nelems} is 0, the test set is empty and this routine returns 1. diff --git a/content/shmem_test_any.tex b/content/shmem_test_any.tex index 2b7814719..eee615887 100644 --- a/content/shmem_test_any.tex +++ b/content/shmem_test_any.tex @@ -40,9 +40,9 @@ the calling \ac{PE} or within another \ac{PE}. This routine does not block and returns \CONST{SIZE\_MAX} if no entries in \VAR{ivars} satisfied the test condition. - \newtext{This routine compares each element of the \VAR{ivars} array in the + This routine compares each element of the \VAR{ivars} array in the test set with the value \VAR{cmp\_value} according to the comparison - operator \VAR{cmp} at the calling \ac{PE}}. + operator \VAR{cmp} at the calling \ac{PE}. The order in which these elements are tested is unspecified. If an entry $i$ in \VAR{ivars} within the test set satisfies the test condition, a series of calls to \FUNC{shmem\_test\_any} must diff --git a/content/shmem_test_some.tex b/content/shmem_test_some.tex index 539f37165..750ecc574 100644 --- a/content/shmem_test_some.tex +++ b/content/shmem_test_some.tex @@ -42,9 +42,9 @@ within the calling \ac{PE} or within another \ac{PE}. This routine does not block and returns zero if no entries in \VAR{ivars} satisfied the test condition. - \newtext{This routine compares each element of the \VAR{ivars} array in the + This routine compares each element of the \VAR{ivars} array in the test set with the value \VAR{cmp\_value} according to the comparison - operator \VAR{cmp} at the calling \ac{PE}}. + operator \VAR{cmp} at the calling \ac{PE}. This routine tests all elements of \VAR{ivars} in the test set at least once, and the order in which the elements are tested is unspecified. If an entry $i$ in \VAR{ivars} within the test set satisfies diff --git a/content/shmem_wait_until_all.tex b/content/shmem_wait_until_all.tex index 8cfe765c1..35a92ac91 100644 --- a/content/shmem_wait_until_all.tex +++ b/content/shmem_wait_until_all.tex @@ -38,9 +38,9 @@ updated by an \ac{AMO} performed by a thread located within the calling \ac{PE} or within another \ac{PE}. If \VAR{nelems} is 0, the wait set is empty and this routine returns immediately. - \newtext{This routine compares each element of the \VAR{ivars} array in the + This routine compares each element of the \VAR{ivars} array in the wait set with the value \VAR{cmp\_value} according to the comparison - operator \VAR{cmp} at the calling \ac{PE}}. + operator \VAR{cmp} at the calling \ac{PE}. This routine is semantically similar to \FUNC{shmem\_wait\_until} in Section~\ref{subsec:shmem_wait_until}, but adds support for point-to-point synchronization involving an array of diff --git a/content/shmem_wait_until_any.tex b/content/shmem_wait_until_any.tex index f57906cd7..0872808c0 100644 --- a/content/shmem_wait_until_any.tex +++ b/content/shmem_wait_until_any.tex @@ -38,9 +38,9 @@ condition at the calling \ac{PE}. The \VAR{ivars} objects at the calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located within the calling \ac{PE} or within another \ac{PE}. - \newtext{This routine compares each element of the \VAR{ivars} array in the + This routine compares each element of the \VAR{ivars} array in the wait set with the value \VAR{cmp\_value} according to the comparison - operator \VAR{cmp} at the calling \ac{PE}}. + operator \VAR{cmp} at the calling \ac{PE}. The order in which these elements are waited upon is unspecified. If an entry $i$ in \VAR{ivars} within the wait set satisfies the wait condition, a series of calls to diff --git a/content/shmem_wait_until_some.tex b/content/shmem_wait_until_some.tex index dc7a23d1e..d21709649 100644 --- a/content/shmem_wait_until_some.tex +++ b/content/shmem_wait_until_some.tex @@ -40,9 +40,9 @@ wait condition at the calling \ac{PE}. The \VAR{ivars} objects at the calling \ac{PE} may be updated by an \ac{AMO} performed by a thread located within the calling \ac{PE} or within another \ac{PE}. - \newtext{This routine compares each element of the \VAR{ivars} array in the + This routine compares each element of the \VAR{ivars} array in the wait set with the value \VAR{cmp\_value} according to the comparison - operator \VAR{cmp} at the calling \ac{PE}}. + operator \VAR{cmp} at the calling \ac{PE}. This routine tests all elements of \VAR{ivars} in the wait set at least once, and the order in which the elements are waited upon is unspecified. From be815263dad5d5ff326015c43f53a9b8fd0525fb Mon Sep 17 00:00:00 2001 From: "David M. Ozog" Date: Wed, 2 Oct 2019 11:34:21 -0400 Subject: [PATCH 283/319] Add changelog entry for the wait/test "vector" API Signed-off-by: David M. Ozog --- content/backmatter.tex | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index bcd6ef5dc..c619e1f65 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -503,7 +503,7 @@ \section{Version 1.5} environment variables. \\ See Section \ref{subsec:environment_variables}. % -\item Added a new multiple-element point-to-point synchronization API with +\item Added support for a multiple-element point-to-point synchronization API with the functions: \FUNC{shmem\_wait\_until\_all}, \FUNC{shmem\_wait\_until\_any}, \FUNC{shmem\_wait\_until\_some}, \FUNC{shmem\_test\_all}, \FUNC{shmem\_test\_any}, and \FUNC{shmem\_test\_some}. @@ -512,6 +512,17 @@ \section{Version 1.5} \ref{subsec:shmem_test_all}, \ref{subsec:shmem_test_any}, and \ref{subsec:shmem_test_some}. % +\item Added support for vectorized comparison values in the multiple-element + point-to-point synchronization API with the functions: + \FUNC{shmem\_wait\_until\_all\_vector}, \FUNC{shmem\_wait\_until\_any\_vector}, + \FUNC{shmem\_wait\_until\_some\_vector}, \\ + \FUNC{shmem\_test\_all\_vector}, \FUNC{shmem\_test\_any\_vector}, and + \FUNC{shmem\_test\_some\_vector}. + \\See Sections \ref{subsec:shmem_wait_until_all_vector}, + \ref{subsec:shmem_wait_until_any_vector}, \ref{subsec:shmem_wait_until_some_vector}, + \ref{subsec:shmem_test_all_vector}, \ref{subsec:shmem_test_any_vector}, and + \ref{subsec:shmem_test_some_vector}. +% \item Added \openshmem profiling interface. \\ See Section~\ref{sec:openshmem_profiling_interface}. % From b3b20d42f3bab7a8a5a81077ba31719d52e5fba0 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Thu, 3 Oct 2019 17:37:23 -0400 Subject: [PATCH 284/319] Remove change highlighting --- content/backmatter.tex | 43 +++++++----------- content/collective_intro.tex | 31 +++++-------- content/library_constants.tex | 4 -- content/library_handles.tex | 4 -- content/rma_intro.tex | 3 -- content/shmem_alltoall.tex | 51 +++++++++------------ content/shmem_alltoalls.tex | 37 ++++++--------- content/shmem_barrier.tex | 2 - content/shmem_barrier_all.tex | 14 +++--- content/shmem_broadcast.tex | 40 +++++++---------- content/shmem_calloc.tex | 2 +- content/shmem_collect.tex | 21 ++------- content/shmem_ctx_create.tex | 4 +- content/shmem_ctx_destroy.tex | 6 +-- content/shmem_finalize.tex | 4 +- content/shmem_malloc.tex | 2 +- content/shmem_reductions.tex | 85 +++++++++++------------------------ content/shmem_sync.tex | 30 +++---------- content/shmem_sync_all.tex | 24 ++-------- main_spec.tex | 8 ---- 20 files changed, 131 insertions(+), 284 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 20aafdc5e..a3c24a845 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -329,30 +329,22 @@ \section{Overview}\label{subsec:dep_overview} \\ \CorCpp: \FuncRef{shmem\_\FuncParam{TYPENAME}\_add}} & 1.4 & Current & \hyperref[subsec:shmem_atomic_add]{\FUNC{shmem\_atomic\_add}} \\ \hline Entire \Fortran API & 1.4 & Current & (none) \\ \hline - \color{Green} - \CorCpp: \FuncRef{shmem\_barrier} & 1.5 & Current & - \hyperref[subsec:shmem_quiet]{\FUNC{shmem\_quiet}}; \hyperref[subsec:shmem_sync]{\FUNC{shmem\_sync}} \\ \hline - \color{Green} - \CorCpp: Active set based \FuncRef{shmem\_sync} & 1.5 & Current & - Team based \hyperref[subsec:shmem_sync]{\FUNC{shmem\_sync}} \\ \hline - \color{Green} - \CorCpp: \FuncRef{shmem\_broadcast[32,64]} & 1.5 & Current & - \hyperref[subsec:shmem_broadcast]{\FUNC{shmem\_broadcast}} \\ \hline - \color{Green} - \CorCpp: \FuncRef{shmem\_collect[32,64]} & 1.5 & Current & - \hyperref[subsec:shmem_collect]{\FUNC{shmem\_collect}} \\ \hline - \color{Green} - \CorCpp: \FuncRef{shmem\_fcollect[32,64]} & 1.5 & Current & - \hyperref[subsec:shmem_collect]{\FUNC{shmem\_fcollect}} \\ \hline - \color{Green} - \CorCpp: \FuncRef{shmem\_\TYPENAME\_OP\_to\_all} & 1.5 & Current & - \hyperref[subsec:shmem_collect]{\FUNC{shmem\_\TYPENAME\_OP\_reduce}} \\ \hline - \color{Green} - \CorCpp: \FuncRef{shmem\_alltoall[32,64]} & 1.5 & Current & - \hyperref[subsec:shmem_alltoall]{\FUNC{shmem\_alltoall}} \\ \hline - \color{Green} - \CorCpp: \FuncRef{shmem\_alltoalls[32,64]} & 1.5 & Current & - \hyperref[subsec:shmem_alltoalls]{\FUNC{shmem\_alltoalls}} \\ \hline + \CorCpp: \FuncRef{shmem\_barrier} & 1.5 & Current & + \hyperref[subsec:shmem_quiet]{\FUNC{shmem\_quiet}}; \hyperref[subsec:shmem_sync]{\FUNC{shmem\_sync}} \\ \hline + \CorCpp: Active set based \FuncRef{shmem\_sync} & 1.5 & Current & + Team based \hyperref[subsec:shmem_sync]{\FUNC{shmem\_sync}} \\ \hline + \CorCpp: \FuncRef{shmem\_broadcast[32,64]} & 1.5 & Current & + \hyperref[subsec:shmem_broadcast]{\FUNC{shmem\_broadcast}} \\ \hline + \CorCpp: \FuncRef{shmem\_collect[32,64]} & 1.5 & Current & + \hyperref[subsec:shmem_collect]{\FUNC{shmem\_collect}} \\ \hline + \CorCpp: \FuncRef{shmem\_fcollect[32,64]} & 1.5 & Current & + \hyperref[subsec:shmem_collect]{\FUNC{shmem\_fcollect}} \\ \hline + \CorCpp: \FuncRef{shmem\_\TYPENAME\_OP\_to\_all} & 1.5 & Current & + \hyperref[subsec:shmem_collect]{\FUNC{shmem\_\TYPENAME\_OP\_reduce}} \\ \hline + \CorCpp: \FuncRef{shmem\_alltoall[32,64]} & 1.5 & Current & + \hyperref[subsec:shmem_alltoall]{\FUNC{shmem\_alltoall}} \\ \hline + \CorCpp: \FuncRef{shmem\_alltoalls[32,64]} & 1.5 & Current & + \hyperref[subsec:shmem_alltoalls]{\FUNC{shmem\_alltoalls}} \\ \hline \end{longtable} \end{center} @@ -495,7 +487,6 @@ \subsection{\textit{Fortran} API}\label{subsec:deprecate-fortran} %% WARNING: Is \footnote{Formally, \Fortran[2003] is known as ISO/IEC~1539-1:2004(E).}. -{\color{Green} \subsection{Active-set-based collective routines} With the addition of \openshmem teams, the previous methods for performing collective operations has been superseded by a more readable, flexible method for @@ -945,4 +936,4 @@ \section{Version 1.1} % \end{itemize} -} %end of setlength command that was started in frontmatter.tex +%end of setlength command that was started in frontmatter.tex diff --git a/content/collective_intro.tex b/content/collective_intro.tex index 943c7d56e..26db0b5f6 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -1,7 +1,6 @@ -\emph{Collective routines} are defined as \newtext{coordinated} communication or synchronization -operations \oldtext{on} \newtext{performed by} a group of \acp{PE} \oldtext{called an active set}. +\emph{Collective routines} are defined as coordinated communication or synchronization +operations performed by a group of \acp{PE}. -{\color{Green} \openshmem provides three types of collective routines: \begin{enumerate} @@ -58,19 +57,15 @@ \subsubsection*{Team-based collectives} In addition, all team creation functions are collective operations. In addition to the ordering and thread safety requirements described here, there are additional synchronization requirements on team creation operations. See Section~\ref{subsec:team} for more details. -} \begin{DeprecateBlock} -\subsubsection*{\newtext{Active-set-based collectives}} +\subsubsection*{Active-set-based collectives} -The \newtext{active-set-based} collective routines require all \acp{PE} +The active-set-based collective routines require all \acp{PE} in the active set to simultaneously call the routine. A \ac{PE} that is not in the active set calling the collective -routine results in undefined behavior. \oldtext{All collective routines have an -active set as an input parameter except \FUNC{shmem\_barrier\_all} and -\FUNC{shmem\_sync\_all}. Both \FUNC{shmem\_barrier\_all} and -\FUNC{shmem\_sync\_all} must be called by all \acp{PE} of the \openshmem program.} +routine results in undefined behavior. The active set is defined by the arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size}. \VAR{PE\_start} specifies the starting \ac{PE} number and @@ -82,29 +77,27 @@ \subsubsection*{\newtext{Active-set-based collectives}} number, that is $0 \le PE\_start + (PE\_size - 1) * 2^{logPE\_stride} < npes$. -All \acp{PE} participating in the \newtext{active-set-based} collective routine must provide the same +All \acp{PE} participating in the active-set-based collective routine must provide the same values for these arguments. If any of these requirements are not met, the behavior is undefined. -Another argument important to \newtext{active-set-based} collective routines is \VAR{pSync}, which is a -symmetric work array. All \acp{PE} participating in an \newtext{active-set-based} collective must pass the -same \VAR{pSync} array. On completion of \newtext{such} a collective call, the \VAR{pSync} is +Another argument important to active-set-based collective routines is \VAR{pSync}, which is a +symmetric work array. All \acp{PE} participating in an active-set-based collective must pass the +same \VAR{pSync} array. On completion of such a collective call, the \VAR{pSync} is restored to its original contents. The user is permitted to reuse a \VAR{pSync} array if all previous collective routines using the \VAR{pSync} array have been completed by all participating \acp{PE}. One can use a synchronization -collective routine such as \FUNC{shmem\_barrier} to ensure completion of previous \newtext{active-set-based} collective +collective routine such as \FUNC{shmem\_barrier} to ensure completion of previous active-set-based collective routines. The \FUNC{shmem\_barrier} and \FUNC{shmem\_sync} routines allow the same \VAR{pSync} array to be used on consecutive calls as long as the \acp{PE} in the active set do not change. All collective routines defined in the Specification are blocking. The -collective routines return on completion. The \newtext{active-set-based} collective +collective routines return on completion. The active-set-based collective routines defined in the \openshmem Specification are: \begin{itemize} -\item \oldtext{\FUNC{shmem\_barrier\_all}} \item \FUNC{shmem\_barrier} -\item \oldtext{\FUNC{shmem\_sync\_all}} \item \FUNC{shmem\_sync} \item \FUNC{shmem\_broadcast\{32, 64\}} \item \FUNC{shmem\_collect\{32, 64\}} @@ -116,7 +109,6 @@ \subsubsection*{\newtext{Active-set-based collectives}} \end{DeprecateBlock} -{\color{Green} \subsubsection*{Team-implicit collectives} @@ -134,4 +126,3 @@ \subsubsection*{Error codes returned from collectives} \CorCpp routines that return an integer error code follow the convention that \CONST{0} indicates successful local completion of the operation. This is considered a best effort of the implementation to indicate that all required local operations have been performed correctly inside the routine and the internal \openshmem state on the calling \ac{PE} is consistent with the description of the routine and its arguments upon completion. Implementations may use an integer return value from a routine to define integer error codes specific to the implementation as long as those codes are not already explicitly defined for that routine by the \openshmem specification. Collective operations involving many \acp{PE} may return values indicating success while other \acp{PE} are still executing the collective operation. Return values indicating success of a collective routine on one \ac{PE} do not indicate that all \acp{PE} involved in the collective operation will return from the routine successfully. In the case where successful local completion of a collective implies the success of some global operation, such as team creation resulting in a valid team on all involved \acp{PE}, the implementation should not return \CONST{0} from the routine if the implied or stated global guarantees of the routine are not met. -} diff --git a/content/library_constants.tex b/content/library_constants.tex index 86d550d2d..2747d0799 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -39,9 +39,7 @@ See Section~\ref{subsec:thread_support} for more detail about its use. \tabularnewline \hline %% -\color{Green} \LibConstDecl{SHMEM\_TEAM\_NUM\_CONTEXTS} & -\color{Green} The bitwise flag which specifies that a team creation routine should use the \VAR{num\_contexts} member of the provided \CTYPE{shmem\_team\_config\_t} configuration parameter as a request. @@ -49,9 +47,7 @@ \ref{subsec:shmem_team_split_strided} for more detail about its use. \tabularnewline \hline %% -\color{Green} \LibConstDecl{SHMEM\_TEAM\_INVALID} & -\color{Green} A value corresponding to an invalid team. This value can be used to initialize or update team handles to indicate that they do not reference a valid team. diff --git a/content/library_handles.tex b/content/library_handles.tex index 0899e61e5..2f674b3c3 100644 --- a/content/library_handles.tex +++ b/content/library_handles.tex @@ -13,9 +13,7 @@ \tabularnewline \hline \endhead %% -\color{Green} \LibHandleDecl{SHMEM\_TEAM\_WORLD} & -\color{Green} Handle of type \CTYPE{shmem\_team\_t} that corresponds to the default team of all \acp{PE} in the \openshmem program. All point-to-point communication operations and collective synchronizations that do not specify a team @@ -23,9 +21,7 @@ See Section~\ref{subsec:team} for more detail about its use. \tabularnewline \hline %% -\color{Green} \LibHandleDecl{SHMEM\_TEAM\_SHARED} & -\color{Green} Handle of type \CTYPE{shmem\_team\_t} that corresponds to a team of \acp{PE} that share a memory domain. When this handle is used by some \ac{PE}, it will refer to the team of all \acp{PE} that would return a non-null diff --git a/content/rma_intro.tex b/content/rma_intro.tex index 0ba8223a4..3c912e1f9 100644 --- a/content/rma_intro.tex +++ b/content/rma_intro.tex @@ -16,8 +16,6 @@ routine, \GET{}, the origin \ac{PE} provides the \dest{} data object and the destination \ac{PE} provides the \source{} data object. -{ -\color{Green} The destination \ac{PE} is specified as an integer representing the \ac{PE} number. This \ac{PE} number is relative to the team associated with the communication context being using for the operation. If no context argument is passed to the routine, @@ -25,7 +23,6 @@ the \ac{PE} number is relative to the default team. If the \ac{PE} number passed to the routine is invalid, being negative or greater than or equal to the size of the \openshmem team, then the behavior is undefined. -} Where appropriate compiler support is available, \openshmem provides type-generic one-sided communication interfaces via \Cstd[11] generic selection diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 5e21dabc6..b5dcfa61b 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -1,20 +1,17 @@ \apisummary{ - shmem\_alltoall is a collective routine where each \ac{PE} exchanges a fixed amount of data with all other \acp{PE} \oldtext{in the active set} \newtext{participating in the collective}. + shmem\_alltoall is a collective routine where each \ac{PE} exchanges a fixed amount of data with all other \acp{PE} participating in the collective. } \begin{apidefinition} %% C11 -{\color{Green} \begin{C11synopsis} int @\FuncDecl{shmem\_alltoall}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. -} \begin{Csynopsis} \end{Csynopsis} -{\color{Green} \begin{CsynopsisCol} int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_alltoall}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); \end{CsynopsisCol} @@ -23,7 +20,6 @@ \begin{CsynopsisCol} int @\FuncDecl{shmem\_alltoallmem}@(shmem_team_t team, void *dest, const void *source, size_t nelems); \end{CsynopsisCol} -} \begin{DeprecateBlock} \begin{CsynopsisCol} @@ -34,9 +30,7 @@ \begin{apiarguments} -\newtext{% \apiargument{IN}{team}{A valid \openshmem team handle to a team.} -} \apiargument{OUT}{dest}{A symmetric data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the @@ -66,7 +60,6 @@ \end{apiarguments} \apidescription{ -{\color{Green} The \FUNC{shmem\_alltoall} routines are collective routines. Each \ac{PE} participating in the operation exchanges \VAR{nelems} data elements with all other \acp{PE} participating in the operation. @@ -77,47 +70,45 @@ \item 8 bits for \FUNC{shmem\_alltoallmem} \item \FUNC{sizeof}(\TYPE{}) for alltoall routines taking typed \VAR{source} and \VAR{dest} \end{itemize} -} The data being sent and received are stored in a contiguous symmetric data object. The total size of each \acp{PE} \VAR{source} object and \VAR{dest} object is \VAR{nelems} times the size of - an element \oldtext{(32 bits or 64 bits) times \VAR{PE\_size}} - \newtext{times \VAR{N}, where \VAR{N} equals the number of \acp{PE} participating - in the operation}. - The \VAR{source} object contains oldtext{\VAR{PE\_size}} \VAR{N} blocks of data + an element + times \VAR{N}, where \VAR{N} equals the number of \acp{PE} participating + in the operation. + The \VAR{source} object contains \VAR{N} blocks of data (where the size of each block is defined by \VAR{nelems}) and each block of data is sent to a different \ac{PE}. - \newtext{The same \dest{} and \source{} - arrays, and same value for \newtext{nelems} - must be passed by all \acp{PE} that participate in the collective.} + The same \dest{} and \source{} + arrays, and same value for nelems + must be passed by all \acp{PE} that participate in the collective. - Given a \ac{PE} \VAR{i} that is the \kth \ac{PE} \oldtext{in the active set} - \newtext{participating in the operation} and a \ac{PE} - \VAR{j} that is the \lth \ac{PE} \oldtext{in the active set} - \newtext{participating in the operation}, + Given a \ac{PE} \VAR{i} that is the \kth \ac{PE} + participating in the operation and a \ac{PE} + \VAR{j} that is the \lth \ac{PE} + participating in the operation, \ac{PE} \VAR{i} sends the \lth block of its \VAR{source} object to the \kth block of the \VAR{dest} object of \ac{PE} \VAR{j}. -{\color{Green} Team-based collect routines operate over all \acp{PE} in the provided team argument. All \acp{PE} in the provided team must participate in the collective. Active-set-based collective routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. -} - As with all \oldtext{\openshmem} \newtext{active-set-based} collective routines, + + As with all active-set-based collective routines, this routine assumes that only \acp{PE} in the active set call the routine. - If a \ac{PE} not in the active set calls an \oldtext{\openshmem} - \newtext{active-set-based} collective routine, + If a \ac{PE} not in the active set calls an + active-set-based collective routine, the behavior is undefined. - The values of arguments \oldtext{\VAR{nelems},} \VAR{PE\_start}, \VAR{logPE\_stride}, + The values of arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be equal on all \acp{PE} in the active set. The same - \oldtext{\VAR{dest} and \VAR{source} data objects, and the same} \VAR{pSync} work + \VAR{pSync} work array must be passed to all \acp{PE} in the active set. Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, @@ -125,7 +116,7 @@ \begin{itemize} \item The \VAR{dest} data object on all \acp{PE} in the active set is ready to accept the \FUNC{shmem\_alltoall} data. - \item \newtext{For active-set-based routines}, the \VAR{pSync} array + \item For active-set-based routines, the \VAR{pSync} array on all \acp{PE} in the active set is not still in use from a prior call to a \FUNC{shmem\_alltoall} routine. \end{itemize} @@ -136,7 +127,7 @@ \begin{itemize} \item Its \VAR{dest} symmetric data object is completely updated and the data has been copied out of the \VAR{source} data object. - \item \newtext{For active-set-based routines, } + \item For active-set-based routines, the values in the \VAR{pSync} array are restored to the original values. \end{itemize} } @@ -150,7 +141,7 @@ \apitablerow{shmem\_alltoall32}{\CONST{32} bits aligned.} \apireturnvalues{ - \newtext{Zero on successful local completion. Nonzero otherwise.} + Zero on successful local completion. Nonzero otherwise. } \apinotes{ diff --git a/content/shmem_alltoalls.tex b/content/shmem_alltoalls.tex index 792a2a1b7..53e034f7f 100644 --- a/content/shmem_alltoalls.tex +++ b/content/shmem_alltoalls.tex @@ -1,20 +1,17 @@ \apisummary{ - shmem\_alltoalls is a collective routine where each \ac{PE} exchanges a fixed amount of strided data with all other \acp{PE} \oldtext{in the active set} \newtext{participating in the collective}. + shmem\_alltoalls is a collective routine where each \ac{PE} exchanges a fixed amount of strided data with all other \acp{PE} participating in the collective. } \begin{apidefinition} %% C11 -{\color{Green} \begin{C11synopsis} int @\FuncDecl{shmem\_alltoalls}@(shmem_team_t team, TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. -} \begin{Csynopsis} \end{Csynopsis} -{\color{Green} \begin{CsynopsisCol} int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_alltoalls}@(shmem_team_t team, TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); \end{CsynopsisCol} @@ -23,7 +20,6 @@ \begin{CsynopsisCol} int @\FuncDecl{shmem\_alltoallsmem}@(shmem_team_t team, void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems); \end{CsynopsisCol} -} \begin{DeprecateBlock} \begin{CsynopsisCol} @@ -34,9 +30,7 @@ \begin{apiarguments} -\newtext{% \apiargument{IN}{team}{A valid \openshmem team handle.} -} \apiargument{OUT}{dest}{A symmetric data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the @@ -75,31 +69,29 @@ \apidescription{ The \FUNC{shmem\_alltoalls} routines are collective routines. - \newtext{These routines are equivalent in functionality to the corresponding + These routines are equivalent in functionality to the corresponding \FUNC{shmem\_alltoall} routines except that they add explicit stride values for accessing the source and destination data arrays, whereas the array - access in \FUNC{shmem\_alltoall} is always with a stride of \CONST{1}.} + access in \FUNC{shmem\_alltoall} is always with a stride of \CONST{1}. - Each \ac{PE} \oldtext{in the active set} \newtext{participating in the operation} - exchanges \VAR{nelems} strided data elements \oldtext{of size - 32 bits (for \FUNC{shmem\_alltoalls32}) or 64 bits (for \FUNC{shmem\_alltoalls64})} - with all other \acp{PE} \oldtext{in the set} \newtext{participating in the operation}. + Each \ac{PE} participating in the operation + exchanges \VAR{nelems} strided data elements + with all other \acp{PE} participating in the operation. Both strides, \VAR{dst} and \VAR{sst}, must be greater than or equal to \CONST{1}. - \newtext{The same \dest{} and \source{} arrays and same values for values of + The same \dest{} and \source{} arrays and same values for values of arguments \VAR{dst}, \VAR{sst}, \VAR{nelems} must be passed by all \acp{PE} - that participate in the collective.} + that participate in the collective. - Given a \ac{PE} \VAR{i} that is the \kth \ac{PE} \oldtext{in the active set} - \newtext{participating in the operation} and a \ac{PE} - \VAR{j} that is the \lth \ac{PE} \oldtext{in the active set} - \newtext{participating in the operation} + Given a \ac{PE} \VAR{i} that is the \kth \ac{PE} + participating in the operation and a \ac{PE} + \VAR{j} that is the \lth \ac{PE} + participating in the operation \ac{PE} \VAR{i} sends the \VAR{sst}*\lth block of the \VAR{source} data object to the \VAR{dst}*\kth block of the \VAR{dest} data object on \ac{PE} \VAR{j}. -{\color{Green} See the description of \FUNC{shmem\_alltoall} in Section~\ref{subsec:shmem_alltoall} for: \begin{itemize} @@ -108,17 +100,16 @@ \item The pre- and post-conditions for symmetric objects. \item Typing constraints for \dest{} and \source{} data objects. \end{itemize} -} } \apireturnvalues{ - \newtext{Zero on successful local completion. Nonzero otherwise.} + Zero on successful local completion. Nonzero otherwise. } \apinotes{ - \newtext{See notes for \FUNC{shmem\_alltoall} in Section~\ref{subsec:shmem_alltoall}}. + See notes for \FUNC{shmem\_alltoall} in Section~\ref{subsec:shmem_alltoall}. } \begin{apiexamples} diff --git a/content/shmem_barrier.tex b/content/shmem_barrier.tex index 61de1efee..40405feb6 100644 --- a/content/shmem_barrier.tex +++ b/content/shmem_barrier.tex @@ -55,7 +55,6 @@ } \apinotes{ - \newtext{ As of \openshmem[1.5], \FUNC{shmem\_barrier} has been deprecated. No team-based barrier is provided by \openshmem, as a team may have any number of communication contexts associated with the team. @@ -63,7 +62,6 @@ \FUNC{shmem\_ctx\_quiet} on the desired communication context, followed by a call to \FUNC{shmem\_team\_sync} on the desired team. - } If the \VAR{pSync} array is initialized at the run time, all \acp{PE} must be synchronized before the first call to \FUNC{shmem\_barrier} diff --git a/content/shmem_barrier_all.tex b/content/shmem_barrier_all.tex index f75dd7abc..45e698ca6 100644 --- a/content/shmem_barrier_all.tex +++ b/content/shmem_barrier_all.tex @@ -17,12 +17,12 @@ \end{apiarguments} \apidescription{ - The \FUNC{shmem\_barrier\_all} routine \oldtext{registers the arrival of a \ac{PE} at - a barrier. Barriers are} \newtext{is} a mechanism for synchronizing all \acp{PE} \newtext{in the default team} at - once. This routine blocks the \newtext{calling} \ac{PE} until all \acp{PE} have called + The \FUNC{shmem\_barrier\_all} routine + is a mechanism for synchronizing all \acp{PE} in the default team at + once. This routine blocks the calling \ac{PE} until all \acp{PE} have called \FUNC{shmem\_barrier\_all}. In a multithreaded \openshmem - program, only the calling thread is blocked\newtext{, however, - it may not be called concurrently by multiple threads in the same \ac{PE}}. + program, only the calling thread is blocked, however, + it may not be called concurrently by multiple threads in the same \ac{PE}. Prior to synchronizing with other \acp{PE}, \FUNC{shmem\_barrier\_all} ensures completion of all previously issued memory stores and remote memory @@ -37,17 +37,13 @@ } \apinotes{ - \newtext{% The \FUNC{shmem\_barrier\_all} routine is equivalent to calling \FUNC{shmem\_ctx\_quiet} on the default context followed by calling \FUNC{shmem\_team\_sync} on the default team. - } - \oldtext{% The \FUNC{shmem\_barrier\_all} routine can be used to portably ensure that memory access operations observe remote updates in the order enforced by initiator \acp{PE}. - } Calls to \FUNC{shmem\_ctx\_quiet} can be performed prior to calling the barrier routine to ensure completion of operations issued on diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index e5667ab5b..6686d2507 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -6,17 +6,14 @@ \begin{apidefinition} %% C11 -{\color{Green} \begin{C11synopsis} int @\FuncDecl{shmem\_broadcast}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems, int PE_root); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. -} %% C/C++ \begin{Csynopsis} \end{Csynopsis} -{\color{Green} \begin{CsynopsisCol} int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_broadcast}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems, int PE_root); \end{CsynopsisCol} @@ -25,7 +22,7 @@ \begin{CsynopsisCol} int @\FuncDecl{shmem\_broadcastmem}@(shmem_team_t team, void *dest, const void *source, size_t nelems, int PE_root); \end{CsynopsisCol} -} + \begin{DeprecateBlock} \begin{CsynopsisCol} void @\FuncDecl{shmem\_broadcast32}@(void *dest, const void *source, size_t nelems, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync); @@ -35,19 +32,17 @@ \begin{apiarguments} -\newtext{% \apiargument{IN}{team}{The team over which to perform the operation.}% -} -\apiargument{OUT}{dest}{A symmetric data object. \newtext{See the table below in this description - for allowable types.}} +\apiargument{OUT}{dest}{A symmetric data object. See the table below in this description + for allowable types.} \apiargument{IN}{source}{A symmetric data object that can be of any data type that is permissible for the \dest{} argument.} \apiargument{IN}{nelems}{The number of elements in \source. nelems must be of type \VAR{size\_t} in \Cstd. When using \Fortran, it must be a default integer value.} \apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to - the \newtext{team or} active set, from which the data is copied. + the team or active set, from which the data is copied. \VAR{PE\_root} must be of type \CTYPE{int}. When using \Fortran, it must be a default integer value.} @@ -71,16 +66,14 @@ \end{apiarguments} \apidescription{ - \openshmem broadcast routines are collective routines \newtext{over an active set or - existing \openshmem team}. They copy data object + \openshmem broadcast routines are collective routines over an active set or + existing \openshmem team. They copy data object \source{} on the processor specified by \VAR{PE\_root} and store the values at - \dest{} on the other \acp{PE} \newtext{particpating in the collective operation.} - \oldtext{specified by the triplet \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size}.} %% + \dest{} on the other \acp{PE} participating in the collective operation. The data is not copied to the \dest{} area on the root \ac{PE}. - {\color{Green} The same \dest{} and \source{} data objects and the same value of \VAR{PE\_root} must be - passed by all \acp{PE} particpating in the collective operation. + passed by all \acp{PE} participating in the collective operation. Team-based broadcast routines operate over all \acp{PE} in the provided team argument. All \acp{PE} in the provided team must participate in the operation. @@ -94,25 +87,24 @@ Active-set-based broadcast routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. - } - As with all \newtext{active-set-based} \oldtext{\openshmem} collective routines, + As with all active-set-based collective routines, each of these routines assumes that only \acp{PE} in the active set call the routine. If a \ac{PE} not in the - active set calls an \newtext{active-set-based} \oldtext{\openshmem} + active set calls an active-set-based collective routine, the behavior is undefined. The values of arguments \VAR{PE\_root}, \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be the same value on all \acp{PE} in the active set. - \newtext{The value of \VAR{PE\_root} must be between \CONST{0} and \VAR{PE\_size}.} + The value of \VAR{PE\_root} must be between \CONST{0} and \VAR{PE\_size}. The same \VAR{pSync} work array must be passed by all \acp{PE} in the active set. Before any \ac{PE} calls a broadcast routine, the following conditions must be ensured: \begin{itemize} - \item The \dest{} array on all \acp{PE} \newtext{participating in the broadcast} - \oldtext{in the active set} %% + \item The \dest{} array on all \acp{PE} participating in the broadcast + %% is ready to accept the broadcast data. - \item \newtext{If using active-set-based routines,} the + \item If using active-set-based routines, the \VAR{pSync} array on all \acp{PE} in the active set is not still in use from a prior call to a collective \openshmem routine. @@ -125,7 +117,7 @@ \item If the current \ac{PE} is not the root \ac{PE}, the \dest{} data object is updated. \item The \source{} data object may be safely reused. - \item \newtext{If using active-set-based routines,} + \item If using active-set-based routines, the values in the \VAR{pSync} array are restored to the original values. \end{itemize} } @@ -140,7 +132,7 @@ \apitablerow{shmem\_broadcast32}{No \CorCpp{} structures are allowed.} \apireturnvalues{ - \newtext{Zero on successful local completion. Nonzero otherwise.} + Zero on successful local completion. Nonzero otherwise. } \apinotes{ diff --git a/content/shmem_calloc.tex b/content/shmem_calloc.tex index 235e552d5..8eeb90be2 100644 --- a/content/shmem_calloc.tex +++ b/content/shmem_calloc.tex @@ -16,7 +16,7 @@ \apidescription{ The \FUNC{shmem\_calloc} routine is a collective operation - \newtext{on the default team} that allocates a + on the default team that allocates a region of remotely-accessible memory for an array of \VAR{count} objects of \VAR{size} bytes each and returns a pointer to the lowest byte address of the allocated symmetric diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 6c031a259..5f645c368 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -1,22 +1,19 @@ \apisummary{ Concatenates blocks of data from multiple \acp{PE} to an array in every - \ac{PE} \newtext{participating in the collective routine}. + \ac{PE} participating in the collective routine. } \begin{apidefinition} %% C11 -{\color{Green} \begin{C11synopsis} int @\FuncDecl{shmem\_collect}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); int @\FuncDecl{shmem\_fcollect}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. -} \begin{Csynopsis} \end{Csynopsis} -{\color{Green} \begin{CsynopsisCol} int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_collect}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_fcollect}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); @@ -28,7 +25,6 @@ int @\FuncDecl{shmem\_fcollectmem}@(shmem_team_t team, void *dest, const void *source, size_t nelems); \end{CsynopsisCol} -} \begin{DeprecateBlock} \begin{CsynopsisCol} void @\FuncDecl{shmem\_collect32}@(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); @@ -40,13 +36,11 @@ \begin{apiarguments} -\newtext{% \apiargument{IN}{team}{A valid \openshmem team handle.} -} \apiargument{OUT}{dest}{A symmetric array large enough to accept the concatenation of the \source{} arrays on all participating \acp{PE}. - \newtext{See table below in this description for allowable data types.}} + See table below in this description for allowable data types.} \apiargument{IN}{source}{A symmetric data object that can be of any type permissible for the \dest{} argument.} \apiargument{IN}{nelems}{The number of elements in the \source{} array. \VAR{nelems} @@ -71,7 +65,6 @@ \end{apiarguments} \apidescription{ -{\color{Green} \openshmem \FUNC{collect} and \FUNC{fcollect} routines perform a collective operation to concatenate \VAR{nelems} data items from the \source{} array into the @@ -89,13 +82,11 @@ The collected result is written to the \dest{} array for all \acp{PE} that participate in the operation. The same \dest{} and \source{} arrays must be passed by all \acp{PE} that participate in the operation. -} The \FUNC{fcollect} routines require that \VAR{nelems} be the same value in all participating \acp{PE}, while the \FUNC{collect} routines allow \VAR{nelems} to vary from \ac{PE} to \ac{PE}. -{\color{Green} Team-based collect routines operate over all \acp{PE} in the provided team argument. All \acp{PE} in the provided team must participate in the operation. @@ -105,23 +96,20 @@ each of these routines assumes that only \acp{PE} in the active set call the routine. If a \ac{PE} not in the active set and calls this collective routine, the behavior is undefined. -} The values of arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be the same value on all \acp{PE} in the active set. The same - \oldtext{\dest{} and \source{} arrays and the same} %% \VAR{pSync} work array must be passed by all \acp{PE} in the active set. Upon return from a collective routine, the following are true for the local \ac{PE}: \begin{itemize} \item The \dest{} array is updated and the \source{} array may be safely reused. - \item \newtext{For active-set-based collective routines,} the values in the \VAR{pSync} array are + \item For active-set-based collective routines, the values in the \VAR{pSync} array are restored to the original values. \end{itemize} } -{\color{Green} \apidesctable{ The \dest{} and \source{} data objects must conform to certain typing constraints, which are as follows: @@ -133,10 +121,9 @@ \apitablerow{\FUNC{shmem\_collect32}, \FUNC{shmem\_fcollect32}}% {Any noncharacter type that has an element size of \CONST{32} bits. No \Fortran derived types nor \CorCpp{} structures are allowed.} -} \apireturnvalues{ - \newtext{Zero on successful local completion. Nonzero otherwise.} + Zero on successful local completion. Nonzero otherwise. } \apinotes{ diff --git a/content/shmem_ctx_create.tex b/content/shmem_ctx_create.tex index 62ebf120c..a6298a921 100644 --- a/content/shmem_ctx_create.tex +++ b/content/shmem_ctx_create.tex @@ -1,5 +1,5 @@ \apisummary{ - Create a communication context \newtext{locally}. + Create a communication context locally. } \begin{apidefinition} @@ -28,14 +28,12 @@ in a correct state. The creation call can be reattempted with different options or after additional resources become available. - \newtext{ A newly created communication context has a fixed association with the default team. All \openshmem routines that operate on this context will do so with respect to the associated \ac{PE} team. That is, all point-to-point routines operating on this context will use team-relative \ac{PE} numbering. - } By default, contexts are {\em shareable} and, when it is allowed by the threading model provided by the \openshmem library, they can be used concurrently by diff --git a/content/shmem_ctx_destroy.tex b/content/shmem_ctx_destroy.tex index 553e5e3aa..2e801685a 100644 --- a/content/shmem_ctx_destroy.tex +++ b/content/shmem_ctx_destroy.tex @@ -14,7 +14,7 @@ \apidescription{ \FUNC{shmem\_ctx\_destroy} destroys a context that was created by a call to - \FUNC{shmem\_ctx\_create} \newtext{or \FUNC{shmem\_team\_create\_ctx}}. + \FUNC{shmem\_ctx\_create} or \FUNC{shmem\_team\_create\_ctx}. It is the user's responsibility to ensure that the context is not used after it has been destroyed, for example when the destroyed context is used by multiple threads. This function @@ -28,10 +28,6 @@ } \apinotes{ - \oldtext{ - It is invalid to pass \CONST{SHMEM\_CTX\_DEFAULT} to this routine. - } - Destroying a context makes it impossible for the user to complete communication operations that are pending on that context. This includes nonblocking communication operations, whose local buffers are only returned diff --git a/content/shmem_finalize.tex b/content/shmem_finalize.tex index 4d92adb1b..cfa32d137 100644 --- a/content/shmem_finalize.tex +++ b/content/shmem_finalize.tex @@ -22,11 +22,9 @@ implicit global barrier in \FUNC{shmem\_finalize} to ensure that pending communications are completed and that no resources are released until all \acp{PE} have entered \FUNC{shmem\_finalize}. - \oldtext{This routine destroys all shareable contexts.} - \newtext{ This routine destroys all teams created by the \openshmem program. As a result, all shareable contexts are destroyed. - } The user is + The user is responsible for destroying all contexts with the \CONST{SHMEM\_CTX\_PRIVATE} option enabled prior to calling this routine; otherwise, the behavior is undefined. diff --git a/content/shmem_malloc.tex b/content/shmem_malloc.tex index 26ba59ec7..44b4854de 100644 --- a/content/shmem_malloc.tex +++ b/content/shmem_malloc.tex @@ -23,7 +23,7 @@ \apidescription{ The \FUNC{shmem\_malloc}, \FUNC{shmem\_free}, \FUNC{shmem\_realloc}, and \FUNC{shmem\_align} routines are collective operations that require - participation by all \acp{PE} \newtext{in the default team}. + participation by all \acp{PE} in the default team. The \FUNC{shmem\_malloc} routine returns a pointer to a block of at least \VAR{size} bytes, which shall be suitably aligned so that it may be diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 308758fc4..348257f73 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -8,7 +8,6 @@ \begin{table}[h] \begin{center} -{\color{Green} \begin{tabular}{|l|l|l|l|l|} \hline \TYPE & \TYPENAME & \multicolumn{3}{c|}{Operations Supporting \TYPE}\\ \hline @@ -29,7 +28,6 @@ \end{tabular} \TableCaptionRef{Reduction Types, Names and Supporting Operations} \label{reducetypes} -} \end{center} \end{table} @@ -38,193 +36,170 @@ Performs a bitwise AND reduction across a set of \acp{PE}.\newline %% C11 -{\color{Green} \begin{C11synopsis} int @\FuncDecl{shmem\_and\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{C11synopsis} where \TYPE{} is one of the integer types supported for the AND operation as specified by Table \ref{reducetypes}. -} %% C/C++ \begin{Csynopsis} \end{Csynopsis} -{\color{Green} \begin{CsynopsisCol} int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_and\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{CsynopsisCol} -} + \begin{DeprecateBlock} \begin{CsynopsisCol} void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_and\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); \end{CsynopsisCol} \end{DeprecateBlock} -\newtext{where \TYPE{} is one of the integer types supported for the AND operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}.} +where \TYPE{} is one of the integer types supported for the AND operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}. \paragraph{OR} Performs a bitwise OR reduction across a set of \acp{PE}.\newline %% C11 -{\color{Green} \begin{C11synopsis} int @\FuncDecl{shmem\_or\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{C11synopsis} where \TYPE{} is one of the integer types supported for the OR operation as specified by Table \ref{reducetypes}. -} %% C/C++ \begin{Csynopsis} \end{Csynopsis} -{\color{Green} \begin{CsynopsisCol} int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_or\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{CsynopsisCol} -} + \begin{DeprecateBlock} \begin{CsynopsisCol} void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_or\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); \end{CsynopsisCol} \end{DeprecateBlock} -\newtext{where \TYPE{} is one of the integer types supported for the OR operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}.} +where \TYPE{} is one of the integer types supported for the OR operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}. \paragraph{XOR} Performs a bitwise exclusive OR (XOR) reduction across a set of \acp{PE}.\newline %% C11 -{\color{Green} \begin{C11synopsis} int @\FuncDecl{shmem\_xor\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{C11synopsis} where \TYPE{} is one of the integer types supported for the XOR operation as specified by Table \ref{reducetypes}. -} %% C/C++ \begin{Csynopsis} \end{Csynopsis} -{\color{Green} \begin{CsynopsisCol} int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_xor\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{CsynopsisCol} -} + \begin{DeprecateBlock} \begin{CsynopsisCol} void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_xor\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); \end{CsynopsisCol} \end{DeprecateBlock} -\newtext{where \TYPE{} is one of the integer types supported for the XOR operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}.} +where \TYPE{} is one of the integer types supported for the XOR operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}. \paragraph{MAX} Performs a maximum-value reduction across a set of \acp{PE}.\newline %% C11 -{\color{Green} \begin{C11synopsis} int @\FuncDecl{shmem\_max\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{C11synopsis} where \TYPE{} is one of the integer or real types supported for the MAX operation as specified by Table \ref{reducetypes}. -} %% C/C++ \begin{Csynopsis} \end{Csynopsis} -{\color{Green} \begin{CsynopsisCol} int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_max\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{CsynopsisCol} -} + \begin{DeprecateBlock} \begin{CsynopsisCol} void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_max\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); \end{CsynopsisCol} \end{DeprecateBlock} -\newtext{where \TYPE{} is one of the integer or real types supported for the MAX operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}.} +where \TYPE{} is one of the integer or real types supported for the MAX operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}. \paragraph{MIN} Performs a minimum-value reduction across a set of \acp{PE}.\newline %% C11 -{\color{Green} \begin{C11synopsis} int @\FuncDecl{shmem\_min\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{C11synopsis} where \TYPE{} is one of the integer or real types supported for the MIN operation as specified by Table \ref{reducetypes}. -} %% C/C++ \begin{Csynopsis} \end{Csynopsis} -{\color{Green} \begin{CsynopsisCol} int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_min\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{CsynopsisCol} -} + \begin{DeprecateBlock} \begin{CsynopsisCol} void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_min\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); \end{CsynopsisCol} \end{DeprecateBlock} -\newtext{where \TYPE{} is one of the integer or real types supported for the MIN operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}.} +where \TYPE{} is one of the integer or real types supported for the MIN operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}. \paragraph{SUM} Performs a sum reduction across a set of \acp{PE}.\newline %% C11 -{\color{Green} \begin{C11synopsis} int @\FuncDecl{shmem\_sum\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{C11synopsis} where \TYPE{} is one of the integer, real, or complex types supported for the SUM operation as specified by Table \ref{reducetypes}. -} %% C/C++ \begin{Csynopsis} \end{Csynopsis} -{\color{Green} \begin{CsynopsisCol} int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{CsynopsisCol} -} + \begin{DeprecateBlock} \begin{CsynopsisCol} void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); \end{CsynopsisCol} \end{DeprecateBlock} -\newtext{where \TYPE{} is one of the integer, real, or complex types supported for the SUM operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}.} +where \TYPE{} is one of the integer, real, or complex types supported for the SUM operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}. \paragraph{PROD} Performs a product reduction across a set of \acp{PE}.\newline %% C11 -{\color{Green} \begin{C11synopsis} int @\FuncDecl{shmem\_prod\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{C11synopsis} where \TYPE{} is one of the integer, real, or complex types supported for the PROD operation as specified by Table \ref{reducetypes}. -} %% C/C++ \begin{Csynopsis} \end{Csynopsis} -{\color{Green} \begin{CsynopsisCol} int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_prod\_reduce}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); \end{CsynopsisCol} -} + \begin{DeprecateBlock} \begin{CsynopsisCol} void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_prod\_to\_all}@(TYPE *dest, const TYPE *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); \end{CsynopsisCol} \end{DeprecateBlock} -\newtext{where \TYPE{} is one of the integer, real, or complex types supported for the PROD operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}.} +where \TYPE{} is one of the integer, real, or complex types supported for the PROD operation and has a corresponding \TYPENAME{} as specified by Table \ref{reducetypes}. \begin{apiarguments} -\newtext{% \apiargument{IN}{team}{The team over which to perform the operation.}% -} \apiargument{OUT}{dest}{A symmetric array, of length \VAR{nreduce} elements, to receive the result of the reduction routines. The data type of \dest{} varies @@ -234,8 +209,8 @@ contains one element for each separate reduction routine. The \source{} argument must have the same data type as \dest.} \apiargument{IN}{nreduce}{The number of elements in the \dest{} and \source{} - arrays. \newtext{In teams based API calls, \VAR{nreduce} must be of type size\_t. - In deprecated active-set based API calls,} + arrays. In teams based API calls, \VAR{nreduce} must be of type size\_t. + In deprecated active-set based API calls, \VAR{nreduce} must be of type integer.} \begin{DeprecateBlock} @@ -260,36 +235,31 @@ \end{apiarguments} \apidescription{ - \openshmem reduction routines \newtext{are collective routines over an active set or - existing \openshmem team that} compute one or more reductions across symmetric + \openshmem reduction routines are collective routines over an active set or + existing \openshmem team that compute one or more reductions across symmetric arrays on multiple \acp{PE}. A reduction performs an associative binary routine across a set of values. The \VAR{nreduce} argument determines the number of separate reductions to - perform. The \source{} array on all \acp{PE} \newtext{participating in the reduction} - \oldtext{in the active set} %% + perform. The \source{} array on all \acp{PE} participating in the reduction provides one element for each reduction. The results of the reductions are placed in the - \dest{} array on all \acp{PE} \newtext{participating in the reduction.} - \oldtext{in the active set.} %% + \dest{} array on all \acp{PE} participating in the reduction. The \source{} and \dest{} arrays may be the same array, but they may not be overlapping arrays. The same \dest{} and \source{} arrays - must be passed to all \acp{PE} \newtext{participating in the reduction.} - \oldtext{in the active set.} %% + must be passed to all \acp{PE} participating in the reduction. -{\color{Green} Team-based reduction routines operate over all \acp{PE} in the provided team argument. All \acp{PE} in the provided team must participate in the reduction. If an invalid team handle or \LibConstRef{SHMEM\_TEAM\_INVALID} is passed to this routine, the behavior is undefined. Active-set-based sync routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. -} - As with all \oldtext{\openshmem} \newtext{active set-based} collective routines, + As with all active set-based collective routines, each of these routines assumes that only \acp{PE} in the active set call the routine. If a \ac{PE} not in - the active set calls an \oldtext{\openshmem} \newtext{active set-based} collective routine, + the active set calls an active set-based collective routine, the behavior is undefined. The values of arguments \VAR{nreduce}, \VAR{PE\_start}, \VAR{logPE\_stride}, @@ -299,10 +269,9 @@ Before any \ac{PE} calls a reduction routine, the following conditions must be ensured: \begin{itemize} - \item The \dest{} array on all \acp{PE} \newtext{participating in the reduction} - \oldtext{in the active set} %% + \item The \dest{} array on all \acp{PE} participating in the reduction is ready to accept the results of the \OPR{reduction}. - \item \newtext{If using active-set-based routines,} the + \item If using active-set-based routines, the \VAR{pWrk} and \VAR{pSync} arrays on all \acp{PE} in the active set are not still in use from a prior call to a collective \openshmem routine. @@ -313,7 +282,7 @@ \ac{PE}: \begin{itemize} \item The \dest{} array is updated and the \source{} array may be safely reused. - \item \newtext{If using active-set-based routines,} + \item If using active-set-based routines, the values in the \VAR{pSync} array are restored to the original values. \end{itemize} @@ -326,7 +295,7 @@ } \apireturnvalues{ - \newtext{Zero on successful local completion. Nonzero otherwise.} + Zero on successful local completion. Nonzero otherwise. } \apinotes{ diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 0c8c7ae58..ceacc2115 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -1,17 +1,12 @@ \apisummary{ - \newtext{Registers the arrival of a \ac{PE} at a synchronization point and suspends + Registers the arrival of a \ac{PE} at a synchronization point and suspends execution until all other \acp{PE} in a given \openshmem team or active set arrive at a synchronization point. For multithreaded programs, execution is suspended - as specified by the threading model (Section \ref{subsec:thread_support}).} - \oldtext{% - Performs all operations described in the \FUNC{shmem\_sync\_all} interface - but with respect to a subset of \acp{PE} defined by the active set. - } + as specified by the threading model (Section \ref{subsec:thread_support}). } \begin{apidefinition} -{\color{ForestGreen} \begin{C11synopsis} int @\FuncDecl{shmem\_sync}@(shmem_team_t team); \end{C11synopsis} @@ -19,7 +14,6 @@ \begin{Csynopsis} int @\FuncDecl{shmem\_team\_sync}@(shmem_team_t team); \end{Csynopsis} -} \begin{DeprecateBlock} \begin{CsynopsisCol} @@ -29,9 +23,7 @@ \begin{apiarguments} -\newtext{% \apiargument{IN}{team}{The team over which to perform the operation.}% -} \begin{DeprecateBlock} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of @@ -51,14 +43,8 @@ \apidescription{ \FUNC{shmem\_sync} is a collective synchronization routine over an - \newtext{existing \openshmem team or} active set. - \oldtext{% - Control returns from \FUNC{shmem\_sync} after all \acp{PE} in - the active set (specified by \VAR{PE\_start}, \VAR{logPE\_stride}, and - \VAR{PE\_size}) have called \FUNC{shmem\_sync}. - } - -{\color{Green} + existing \openshmem team or active set. + The routine registers the arrival of a \ac{PE} at a synchronization point in the program. This is a fast mechanism for synchronizing all \acp{PE} that participate in this collective call. The routine blocks the calling \ac{PE} until all \acp{PE} in the @@ -72,12 +58,11 @@ Active-set-based sync routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. -} - As with all \oldtext{\openshmem} \newtext{active set-based} collective routines, + As with all active set-based collective routines, each of these routines assumes that only \acp{PE} in the active set call the routine. If a \ac{PE} not in - the active set calls an \oldtext{\openshmem} \newtext{active set-based} collective routine, + the active set calls an active set-based collective routine, the behavior is undefined. The values of arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and @@ -93,8 +78,7 @@ } \apireturnvalues{ - \newtext{Zero on successful local completion. Nonzero otherwise.} - \oldtext{None.} + Zero on successful local completion. Nonzero otherwise. } \apinotes{ diff --git a/content/shmem_sync_all.tex b/content/shmem_sync_all.tex index 96937a9f7..5c70a966e 100644 --- a/content/shmem_sync_all.tex +++ b/content/shmem_sync_all.tex @@ -1,7 +1,7 @@ \apisummary{ - Registers the arrival of a \ac{PE} at a \newtext{synchronization point} \oldtext{barrier} and suspends - execution until all other \acp{PE} \newtext{in the default team} arrive at \newtext{a synchronization point} \oldtext{the barrier}. \newtext{For multithreaded programs, execution is suspended - as specified by the threading model (Section \ref{subsec:thread_support})}. + Registers the arrival of a \ac{PE} at a synchronization point and suspends + execution until all other \acp{PE} in the default team arrive at a synchronization point. For multithreaded programs, execution is suspended + as specified by the threading model (Section \ref{subsec:thread_support}). } \begin{apidefinition} @@ -18,16 +18,9 @@ \apidescription{ - \newtext{% This routine blocks the calling \ac{PE} until all \acp{PE} in the default team have called \FUNC{shmem\_sync\_all}. - } - \oldtext{% - The \FUNC{shmem\_sync\_all} routine registers the arrival of a \ac{PE} at a - barrier. Barriers are a fast mechanism for synchronizing all \acp{PE} at - once. This routine blocks the \ac{PE} until all \acp{PE} have called - \FUNC{shmem\_sync\_all}. - } + In a multithreaded \openshmem program, only the calling thread is blocked. @@ -42,17 +35,8 @@ } \apinotes{ - \newtext{% The \FUNC{shmem\_sync\_all} routine is equivalent to calling \FUNC{shmem\_team\_sync} on the default team. - } - \oldtext{% - The \FUNC{shmem\_sync\_all} routine can be used to portably ensure that - memory access operations observe remote updates in the order enforced by the - initiator \acp{PE}, provided that the initiator PE ensures completion of remote - updates with a call to \FUNC{shmem\_quiet} prior to the call to the - \FUNC{shmem\_sync\_all} routine. - } } \end{apidefinition} diff --git a/main_spec.tex b/main_spec.tex index b9044e367..54d007696 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -105,7 +105,6 @@ \subsubsection{\textbf{SHMEM\_CALLOC}}\label{subsec:shmem_calloc} -\color{Green} \subsection{Team Management Routines}\label{subsec:team} \input{content/teams_intro.tex} @@ -133,7 +132,6 @@ \subsubsection{\textbf{SHMEM\_TEAM\_SPLIT\_2D}}\label{subsec:shmem_team_split_2d \subsubsection{\textbf{SHMEM\_TEAM\_DESTROY}}\label{subsec:shmem_team_destroy} \input{content/shmem_team_destroy.tex} -\color{Black} @@ -170,7 +168,6 @@ \subsection{Communication Management Routines} When managed in this way, applications can use an equality comparison to test whether a given context handle references a valid context. -\newtext{ Every communication context is associated with a team. This association is established at context creation. Communication contexts created by \FUNC{shmem\_ctx\_create} are @@ -185,27 +182,22 @@ \subsection{Communication Management Routines} respect to the team-relative \ac{PE} numbering of the associated team. If the PE number passed to such a routine is invalid, being negative or greater than or equal to the size of the \openshmem team, then the behavior is undefined. -} \subsubsection{\textbf{SHMEM\_CTX\_CREATE}} \label{subsec:shmem_ctx_create} \input{content/shmem_ctx_create.tex} -\newtext{ \subsubsection{\textbf{SHMEM\_TEAM\_CREATE\_CTX}} \label{subsec:shmem_team_create_ctx} \input{content/shmem_team_create_ctx.tex} -} \subsubsection{\textbf{SHMEM\_CTX\_DESTROY}} \label{subsec:shmem_ctx_destroy} \input{content/shmem_ctx_destroy.tex} -\newtext{ \subsubsection{\textbf{SHMEM\_CTX\_GET\_TEAM}} \label{subsec:shmem_ctx_get_team} \input{content/shmem_ctx_get_team.tex} -} \subsection{Remote Memory Access Routines}\label{sec:rma} From 58889711e3424955bf8ded7916d619d2ff5ef434 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Thu, 3 Oct 2019 17:50:16 -0400 Subject: [PATCH 285/319] Remove change markers --- content/library_constants.tex | 4 ---- content/shmem_put_signal.tex | 2 -- content/shmem_put_signal_nbi.tex | 2 -- content/shmem_signal_fetch.tex | 2 -- content/shmem_signal_wait_until.tex | 2 -- main_spec.tex | 2 -- 6 files changed, 14 deletions(-) diff --git a/content/library_constants.tex b/content/library_constants.tex index f14c6fca0..75106e0fd 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -67,17 +67,13 @@ See Section~\ref{subsec:shmem_ctx_create} for more detail about its use. \tabularnewline \hline %% -\color{ForestGreen} \LibConstDecl{SHMEM\_SIGNAL\_SET} & -\color{ForestGreen} An integer constant expression corresponding to the signal update set operation. See Section~\ref{subsec:shmem_put_signal} and Section~\ref{subsec:shmem_put_signal_nbi} for more detail about its use. \tabularnewline \hline %% -\color{ForestGreen} \LibConstDecl{SHMEM\_SIGNAL\_ADD} & -\color{ForestGreen} An integer constant expression corresponding to the signal update add operation. See Section~\ref{subsec:shmem_put_signal} and Section~\ref{subsec:shmem_put_signal_nbi} for more detail about its use. diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 84ff894d7..79507e30e 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -1,4 +1,3 @@ -\color{ForestGreen} \apisummary{ The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} @@ -102,4 +101,3 @@ \end{apiexamples} \end{apidefinition} -\color{black} diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index def87af14..297617cb6 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -1,4 +1,3 @@ -\color{ForestGreen} \apisummary{ The nonblocking put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} @@ -90,4 +89,3 @@ } \end{apidefinition} -\color{black} diff --git a/content/shmem_signal_fetch.tex b/content/shmem_signal_fetch.tex index c838742a7..35094245e 100644 --- a/content/shmem_signal_fetch.tex +++ b/content/shmem_signal_fetch.tex @@ -1,4 +1,3 @@ -\color{ForestGreen} \apisummary{ Fetches the signal update on a local data object. } @@ -30,4 +29,3 @@ } \end{apidefinition} -\color{Black} diff --git a/content/shmem_signal_wait_until.tex b/content/shmem_signal_wait_until.tex index dbd6225b0..35a1a14e9 100644 --- a/content/shmem_signal_wait_until.tex +++ b/content/shmem_signal_wait_until.tex @@ -1,4 +1,3 @@ -\color{ForestGreen} \apisummary{ Wait for a variable on the local \ac{PE} to change from a signaling operation. @@ -54,4 +53,3 @@ } \end{apidefinition} -\color{Black} diff --git a/main_spec.tex b/main_spec.tex index 7edc00543..582afd1b0 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -241,7 +241,6 @@ \subsubsection{\textbf{SHMEM\_ATOMIC\_XOR}} -\color{ForestGreen} \subsection{Signaling Operations}\label{sec:shmem_signal} This section specifies the OpenSHMEM support for \emph{put-with-signal}, non-blocking \emph{put-with-signal}, and \emph{signal-fetch} routines. The @@ -285,7 +284,6 @@ \subsubsection{Available Signal Operators} signal into the signal data object on a remote \VAR{PE} as an atomic operation.} -\color{Black} \subsubsection{\textbf{SHMEM\_PUT\_SIGNAL}}\label{subsec:shmem_put_signal} \input{content/shmem_put_signal.tex} From d570c69dde4c71b85bcb7a8110632bd90bae3b4b Mon Sep 17 00:00:00 2001 From: James Dinan Date: Mon, 7 Oct 2019 13:53:45 -0400 Subject: [PATCH 286/319] Remove change markers --- content/shmem_atomic_compare_swap_nbi.tex | 2 -- content/shmem_atomic_fetch_add_nbi.tex | 2 -- content/shmem_atomic_fetch_and_nbi.tex | 2 -- content/shmem_atomic_fetch_inc_nbi.tex | 2 -- content/shmem_atomic_fetch_nbi.tex | 2 -- content/shmem_atomic_fetch_or_nbi.tex | 2 -- content/shmem_atomic_fetch_xor_nbi.tex | 2 -- content/shmem_atomic_swap_nbi.tex | 2 -- 8 files changed, 16 deletions(-) diff --git a/content/shmem_atomic_compare_swap_nbi.tex b/content/shmem_atomic_compare_swap_nbi.tex index b988d73b2..f8139294d 100644 --- a/content/shmem_atomic_compare_swap_nbi.tex +++ b/content/shmem_atomic_compare_swap_nbi.tex @@ -1,4 +1,3 @@ -\color{Green} \apisummary{ The nonblocking atomic routine provides a method for performing an atomic conditional swap on a remote data object. @@ -58,4 +57,3 @@ } \end{apidefinition} -\color{Black} diff --git a/content/shmem_atomic_fetch_add_nbi.tex b/content/shmem_atomic_fetch_add_nbi.tex index 727b7120f..6426d7958 100644 --- a/content/shmem_atomic_fetch_add_nbi.tex +++ b/content/shmem_atomic_fetch_add_nbi.tex @@ -1,4 +1,3 @@ -\color{Green} \apisummary{ The nonblocking atomic routine performs an atomic fetch-and-add operation on a remote data object. @@ -55,4 +54,3 @@ } \end{apidefinition} -\color{Black} diff --git a/content/shmem_atomic_fetch_and_nbi.tex b/content/shmem_atomic_fetch_and_nbi.tex index 1fb4b871a..205e463d6 100644 --- a/content/shmem_atomic_fetch_and_nbi.tex +++ b/content/shmem_atomic_fetch_and_nbi.tex @@ -1,4 +1,3 @@ -\color{Green} \apisummary{ This nonblocking atomic operation performs an atomic fetching bitwise AND operation on a remote data object. @@ -54,4 +53,3 @@ } \end{apidefinition} -\color{Black} diff --git a/content/shmem_atomic_fetch_inc_nbi.tex b/content/shmem_atomic_fetch_inc_nbi.tex index b116d1140..990afc348 100644 --- a/content/shmem_atomic_fetch_inc_nbi.tex +++ b/content/shmem_atomic_fetch_inc_nbi.tex @@ -1,4 +1,3 @@ -\color{Green} \apisummary{ This nonblocking atomic routine performs an atomic fetch-and-increment operation on a remote data object. @@ -52,4 +51,3 @@ } \end{apidefinition} -\color{Black} diff --git a/content/shmem_atomic_fetch_nbi.tex b/content/shmem_atomic_fetch_nbi.tex index a66b564a4..bd96ff9e7 100644 --- a/content/shmem_atomic_fetch_nbi.tex +++ b/content/shmem_atomic_fetch_nbi.tex @@ -1,4 +1,3 @@ -\color{Green} \apisummary{ The nonblocking atomic fetch routine provides a method for atomically fetching the value of a remote data object. @@ -51,4 +50,3 @@ } \end{apidefinition} -\color{Black} diff --git a/content/shmem_atomic_fetch_or_nbi.tex b/content/shmem_atomic_fetch_or_nbi.tex index 5b8251a52..a30735bd9 100644 --- a/content/shmem_atomic_fetch_or_nbi.tex +++ b/content/shmem_atomic_fetch_or_nbi.tex @@ -1,4 +1,3 @@ -\color{Green} \apisummary{ This nonblocking atomic operation performs an atomic fetching bitwise OR operation on a remote data object. @@ -54,4 +53,3 @@ } \end{apidefinition} -\color{Black} diff --git a/content/shmem_atomic_fetch_xor_nbi.tex b/content/shmem_atomic_fetch_xor_nbi.tex index cd7f1eb73..65651b41b 100644 --- a/content/shmem_atomic_fetch_xor_nbi.tex +++ b/content/shmem_atomic_fetch_xor_nbi.tex @@ -1,4 +1,3 @@ -\color{Green} \apisummary{ This nonblocking atomic operation performs an atomic fetching bitwise XOR operation on a remote data object. @@ -54,4 +53,3 @@ } \end{apidefinition} -\color{Black} diff --git a/content/shmem_atomic_swap_nbi.tex b/content/shmem_atomic_swap_nbi.tex index 61f62d2be..b9ab1afb2 100644 --- a/content/shmem_atomic_swap_nbi.tex +++ b/content/shmem_atomic_swap_nbi.tex @@ -1,4 +1,3 @@ -\color{Green} \apisummary{ This nonblocking atomic operation performs an atomic swap to a remote data object. @@ -51,4 +50,3 @@ } \end{apidefinition} -\color{Black} From cd17bce4fae27f828d558d2c3994b070ce7a17a1 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Mon, 7 Oct 2019 13:53:59 -0400 Subject: [PATCH 287/319] Fix duplicate label --- main_spec.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main_spec.tex b/main_spec.tex index 91aa42192..d47d50603 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -237,7 +237,7 @@ \subsubsection{\textbf{SHMEM\_ATOMIC\_XOR}} \label{subsec:shmem_atomic_xor} \input{content/shmem_atomic_xor.tex} -\subsection{Non-blocking Atomic Memory Operations}\label{sec:amo} +\subsection{Non-blocking Atomic Memory Operations}\label{sec:amo-nbi} \subsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_NBI}} \label{subsec:shmem_atomic_fetch_nbi} From 7e07700aa853618746bd9b11b8216964451007bb Mon Sep 17 00:00:00 2001 From: James Dinan Date: Mon, 7 Oct 2019 15:25:05 -0400 Subject: [PATCH 288/319] Use \OPR macro to format put-with-signal refs --- content/p2p_sync_intro.tex | 2 +- content/shmem_fence.tex | 24 ++++++++++++------------ content/shmem_put_signal.tex | 10 +++++----- content/shmem_put_signal_nbi.tex | 10 +++++----- content/shmem_quiet.tex | 20 ++++++++++---------- main_spec.tex | 8 ++++---- 6 files changed, 37 insertions(+), 37 deletions(-) diff --git a/content/p2p_sync_intro.tex b/content/p2p_sync_intro.tex index 214ffeded..f2132e0bc 100644 --- a/content/p2p_sync_intro.tex +++ b/content/p2p_sync_intro.tex @@ -3,7 +3,7 @@ object. The point-to-point synchronization routines can be used to portably ensure that memory access operations observe remote updates in the order enforced by -the initiator \ac{PE} using the put-with-signal, \FUNC{shmem\_fence} and +the initiator \ac{PE} using the \OPR{put-with-signal}, \FUNC{shmem\_fence} and \FUNC{shmem\_quiet} routines. Where appropriate compiler support is available, \openshmem provides diff --git a/content/shmem_fence.tex b/content/shmem_fence.tex index 7b2a66652..7762d8c3f 100644 --- a/content/shmem_fence.tex +++ b/content/shmem_fence.tex @@ -1,7 +1,7 @@ \apisummary{ Assures ordering of delivery of memory store, blocking \PUT{}, - \ac{AMO}, and \emph{put-with-signal}, as well as nonblocking - \PUT{}, \emph{put-with-signal}, and \ac{AMO} + \ac{AMO}, and \OPR{put-with-signal}, as well as nonblocking + \PUT{}, \OPR{put-with-signal}, and \ac{AMO} routines to symmetric data objects. } @@ -20,16 +20,16 @@ \apidescription{ This routine assures ordering of delivery of memory store, blocking \PUT{}, - \ac{AMO}, and \emph{put-with-signal}, as well as nonblocking \PUT{}, - \emph{put-with-signal}, and \ac{AMO} + \ac{AMO}, and \OPR{put-with-signal}, as well as nonblocking \PUT{}, + \OPR{put-with-signal}, and \ac{AMO} routines to symmetric data objects. All memory store, blocking \PUT{}, - \ac{AMO}, and \emph{put-with-signal}, as well as nonblocking \PUT{}, - \emph{put-with-signal}, and \ac{AMO} + \ac{AMO}, and \OPR{put-with-signal}, as well as nonblocking \PUT{}, + \OPR{put-with-signal}, and \ac{AMO} routines to symmetric data objects issued to a particular remote \ac{PE} on the given context prior to the call to \FUNC{shmem\_fence} are guaranteed to be delivered before any - subsequent memory store, blocking \PUT{}, \ac{AMO}, and \emph{put-with-signal}, - as well as nonblocking \PUT{}, \emph{put-with-signal}, and \ac{AMO} + subsequent memory store, blocking \PUT{}, \ac{AMO}, and \OPR{put-with-signal}, + as well as nonblocking \PUT{}, \OPR{put-with-signal}, and \ac{AMO} routines to symmetric data objects to the same \ac{PE}. \FUNC{shmem\_fence} guarantees order of delivery, not completion. It does not guarantee order of delivery of nonblocking @@ -52,18 +52,18 @@ There is a subtle difference between \FUNC{shmem\_fence} and \FUNC{shmem\_quiet}, in that, \FUNC{shmem\_quiet} guarantees completion of memory store, blocking \PUT{}, \ac{AMO}, and - \emph{put-with-signal}, as well as nonblocking \PUT{}, \emph{put-with-signal}, and \ac{AMO} routines to + \OPR{put-with-signal}, as well as nonblocking \PUT{}, \OPR{put-with-signal}, and \ac{AMO} routines to symmetric data objects which makes the updates visible to all other \acp{PE}. The \FUNC{shmem\_quiet} routine should be called if completion of memory - store, blocking \PUT{}, \ac{AMO}, and \emph{put-with-signal}, as well as - nonblocking \PUT{}, \emph{put-with-signal}, and \ac{AMO} routines to symmetric data objects is desired + store, blocking \PUT{}, \ac{AMO}, and \OPR{put-with-signal}, as well as + nonblocking \PUT{}, \OPR{put-with-signal}, and \ac{AMO} routines to symmetric data objects is desired when multiple remote \acp{PE} are involved. In an \openshmem program with multithreaded \acp{PE}, it is the user's responsibility to ensure ordering between operations issued by the threads - in a \ac{PE} that target symmetric memory (e.g. \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory stores, + in a \ac{PE} that target symmetric memory (e.g. \PUT{}, \ac{AMO}, \OPR{put-with-signal}, memory stores, and nonblocking routines) and calls by threads in that \ac{PE} to \FUNC{shmem\_fence}. The \FUNC{shmem\_fence} routine can enforce memory store ordering only for the calling thread. Thus, to ensure ordering for memory stores performed by a thread that is diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 79507e30e..0cc3cb512 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -1,5 +1,5 @@ \apisummary{ - The put-with-signal routines provide a method for copying data from a + The \OPR{put-with-signal} routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} and subsequently updating a remote flag to signal completion. } @@ -48,7 +48,7 @@ \end{apiarguments} \apidescription{ - The put-with-signal routines provide a method for copying data from a + The \OPR{put-with-signal} routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} and subsequently updating a remote flag to signal completion. The routines return after the data has been copied out of the \source{} array on the @@ -61,7 +61,7 @@ corresponding \dest{} data words into the data object on the remote \ac{PE}. An update to the \VAR{sig\_addr} signal data object through a - put-with-signal routine completes as if performed atomically as described in + \OPR{put-with-signal} routine completes as if performed atomically as described in Section~\ref{subsec:signal_atomicity}. The various options as described in Section~\ref{subsec:signal_operator} can be used as the \VAR{sig\_op} signal operator. @@ -83,9 +83,9 @@ \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. Without a memory-ordering operation, there is no implied ordering between the signal update of a - put-with-signal routine and another data transfer. For example, the + \OPR{put-with-signal} routine and another data transfer. For example, the completion of the signal update in a sequence consisting of a put routine - followed by a put-with-signal routine does not imply delivery of the put + followed by a \OPR{put-with-signal} routine does not imply delivery of the put routine's data. } diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 297617cb6..9a41e6ebc 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -1,5 +1,5 @@ \apisummary{ - The nonblocking put-with-signal routines provide a method for copying data + The nonblocking \OPR{put-with-signal} routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} and subsequently updating a remote flag to signal completion. } @@ -48,7 +48,7 @@ \end{apiarguments} \apidescription{ - The nonblocking put-with-signal routines provide a method for copying data + The nonblocking \OPR{put-with-signal} routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} and subsequently updating a remote flag to signal completion. @@ -60,8 +60,8 @@ The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on the - remote \ac{PE}. Furthermore, two successive non-blocking put-with-signal - routines, or a non-blocking put-with-signal routine with another data + remote \ac{PE}. Furthermore, two successive non-blocking \OPR{put-with-signal} + routines, or a non-blocking \OPR{put-with-signal} routine with another data transfer may deliver data out of order unless a call to \FUNC{shmem\_fence} is introduced between the two calls. @@ -69,7 +69,7 @@ performed on the remote \VAR{sig\_addr} signal data object. An update to the \VAR{sig\_addr} signal data object through a non-blocking - put-with-signal routine completes as if performed atomically as described in + \OPR{put-with-signal} routine completes as if performed atomically as described in Section~\ref{subsec:signal_atomicity}. The various options as described in Section~\ref{subsec:signal_operator} can be used as the \VAR{sig\_op} signal operator. diff --git a/content/shmem_quiet.tex b/content/shmem_quiet.tex index aa7e28a66..e0fb5e8c2 100644 --- a/content/shmem_quiet.tex +++ b/content/shmem_quiet.tex @@ -1,7 +1,7 @@ \apisummary{ Waits for completion of all outstanding memory store, blocking - \PUT{}, \ac{AMO}, and \emph{put-with-signal}, as well as - nonblocking \PUT{}, \emph{put-with-signal}, \GET{}, and \ac{AMO} routines + \PUT{}, \ac{AMO}, and \OPR{put-with-signal}, as well as + nonblocking \PUT{}, \OPR{put-with-signal}, \GET{}, and \ac{AMO} routines to symmetric data objects issued by a \ac{PE}. } @@ -21,10 +21,10 @@ \apidescription{ The \FUNC{shmem\_quiet} routine ensures completion of memory store, blocking \PUT{}, \ac{AMO}, and - \emph{put-with-signal}, as well as nonblocking \PUT{},\emph{put-with-signal}, \GET{}, and \ac{AMO} routines on + \OPR{put-with-signal}, as well as nonblocking \PUT{},\OPR{put-with-signal}, \GET{}, and \ac{AMO} routines on symmetric data objects issued by the calling \ac{PE} on the given context. - All memory store, blocking \PUT{}, \ac{AMO}, and \emph{put-with-signal}, as - well as nonblocking \PUT{}, \emph{put-with-signal}, \GET{}, and \ac{AMO} routines to + All memory store, blocking \PUT{}, \ac{AMO}, and \OPR{put-with-signal}, as + well as nonblocking \PUT{}, \OPR{put-with-signal}, \GET{}, and \ac{AMO} routines to symmetric data objects are guaranteed to be completed and visible to all \acp{PE} when \FUNC{shmem\_quiet} returns. If \VAR{ctx} has the value \CONST{SHMEM\_CTX\_INVALID}, no operation is @@ -38,22 +38,22 @@ \apinotes{ \FUNC{shmem\_quiet} is most useful as a way of ensuring completion of - several memory store, blocking \PUT{}, \ac{AMO}, and \emph{put-with-signal}, + several memory store, blocking \PUT{}, \ac{AMO}, and \OPR{put-with-signal}, as well as nonblocking \PUT{}, - \emph{put-with-signal}, \GET{}, and \ac{AMO} routines to symmetric data objects initiated by the calling + \OPR{put-with-signal}, \GET{}, and \ac{AMO} routines to symmetric data objects initiated by the calling \ac{PE}. For example, one might use \FUNC{shmem\_quiet} to await delivery of a block of data before issuing another \PUT{} or nonblocking \PUT{} routine, which sets a completion flag on another \ac{PE}. \FUNC{shmem\_quiet} is not usually needed if \FUNC{shmem\_barrier\_all} or \FUNC{shmem\_barrier} are called. The barrier routines wait for the completion of outstanding writes (memory store, - blocking \PUT{}, \ac{AMO}, and \emph{put-with-signal}, as well as nonblocking \PUT{}, - \emph{put-with-signal}, \GET{}, and \ac{AMO} routines) to + blocking \PUT{}, \ac{AMO}, and \OPR{put-with-signal}, as well as nonblocking \PUT{}, + \OPR{put-with-signal}, \GET{}, and \ac{AMO} routines) to symmetric data objects on all \acp{PE}. In an \openshmem program with multithreaded \acp{PE}, it is the user's responsibility to ensure ordering between operations issued by the threads - in a \ac{PE} that target symmetric memory (e.g. \PUT{}, \ac{AMO}, \emph{put-with-signal}, memory stores, + in a \ac{PE} that target symmetric memory (e.g. \PUT{}, \ac{AMO}, \OPR{put-with-signal}, memory stores, and nonblocking routines) and calls by threads in that \ac{PE} to \FUNC{shmem\_quiet}. The \FUNC{shmem\_quiet} routine can enforce memory store ordering only for the calling thread. Thus, to ensure ordering for memory stores performed by a thread that is diff --git a/main_spec.tex b/main_spec.tex index 05edf081c..64ae67139 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -275,8 +275,8 @@ \subsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_XOR\_NBI}} \subsection{Signaling Operations}\label{sec:shmem_signal} -This section specifies the OpenSHMEM support for \emph{put-with-signal}, -non-blocking \emph{put-with-signal}, and \emph{signal-fetch} routines. The +This section specifies the OpenSHMEM support for \OPR{put-with-signal}, +non-blocking \OPR{put-with-signal}, and \OPR{signal-fetch} routines. The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} and subsequently updating a remote flag to signal completion. The signal-fetch routine provides @@ -416,8 +416,8 @@ \subsubsection{\textbf{SHMEM\_SIGNAL\_WAIT\_UNTIL}}\label{subsec:shmem_signal_wa \subsection{Memory Ordering Routines}\label{subsec:memory_order} The following section discusses \openshmem \acp{API} that provide mechanisms to ensure ordering and/or delivery of memory store, blocking \OPR{Put}, \ac{AMO}, -and \emph{put-with-signal}, as well as non-blocking \PUT{}, -\emph{put-with-signal}, \GET{}, and \ac{AMO} routines to symmetric data +and \OPR{put-with-signal}, as well as non-blocking \PUT{}, +\OPR{put-with-signal}, \GET{}, and \ac{AMO} routines to symmetric data objects. \subsubsection{\textbf{SHMEM\_FENCE}}\label{subsec:shmem_fence} From deee8172a705de2bbbef970656943e058161e213 Mon Sep 17 00:00:00 2001 From: James Dinan Date: Mon, 7 Oct 2019 15:28:55 -0400 Subject: [PATCH 289/319] Update: non-blocking -> nonblocking --- content/shmem_put_signal_nbi.tex | 6 +++--- main_spec.tex | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 9a41e6ebc..0717aee35 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -60,15 +60,15 @@ The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on the - remote \ac{PE}. Furthermore, two successive non-blocking \OPR{put-with-signal} - routines, or a non-blocking \OPR{put-with-signal} routine with another data + remote \ac{PE}. Furthermore, two successive nonblocking \OPR{put-with-signal} + routines, or a nonblocking \OPR{put-with-signal} routine with another data transfer may deliver data out of order unless a call to \FUNC{shmem\_fence} is introduced between the two calls. The \VAR{sig\_op} signal operator determines the type of update to be performed on the remote \VAR{sig\_addr} signal data object. - An update to the \VAR{sig\_addr} signal data object through a non-blocking + An update to the \VAR{sig\_addr} signal data object through a nonblocking \OPR{put-with-signal} routine completes as if performed atomically as described in Section~\ref{subsec:signal_atomicity}. The various options as described in Section~\ref{subsec:signal_operator} can be used as the \VAR{sig\_op} signal diff --git a/main_spec.tex b/main_spec.tex index 64ae67139..f25d48e8d 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -169,7 +169,7 @@ \subsubsection{\textbf{SHMEM\_G}}\label{subsec:shmem_g} \subsubsection{\textbf{SHMEM\_IGET}}\label{subsec:shmem_iget} \input{content/shmem_iget.tex} -\subsection{Non-blocking Remote Memory Access Routines}\label{sec:rma_nbi} +\subsection{Nonblocking Remote Memory Access Routines}\label{sec:rma_nbi} \subsubsection{\textbf{SHMEM\_PUT\_NBI}}\label{subsec:shmem_put_nbi} \input{content/shmem_put_nbi.tex} @@ -238,7 +238,7 @@ \subsubsection{\textbf{SHMEM\_ATOMIC\_XOR}} \label{subsec:shmem_atomic_xor} \input{content/shmem_atomic_xor.tex} -\subsection{Non-blocking Atomic Memory Operations}\label{sec:amo-nbi} +\subsection{Nonblocking Atomic Memory Operations}\label{sec:amo-nbi} \subsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_NBI}} \label{subsec:shmem_atomic_fetch_nbi} @@ -276,7 +276,7 @@ \subsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_XOR\_NBI}} \subsection{Signaling Operations}\label{sec:shmem_signal} This section specifies the OpenSHMEM support for \OPR{put-with-signal}, -non-blocking \OPR{put-with-signal}, and \OPR{signal-fetch} routines. The +nonblocking \OPR{put-with-signal}, and \OPR{signal-fetch} routines. The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} and subsequently updating a remote flag to signal completion. The signal-fetch routine provides @@ -284,7 +284,7 @@ \subsection{Signaling Operations}\label{sec:shmem_signal} \subsubsection{Atomicity Guarantees for Signaling Operations} \label{subsec:signal_atomicity} -All signaling operations put-with-signal, non-blocking put-with-signal, and +All signaling operations put-with-signal, nonblocking put-with-signal, and signal-fetch are performed on a signal data object, a remotely accessible symmetric object of type \VAR{uint64\_t}. A signal operator in the put-with-signal routine is a \openshmem library constant that determines the @@ -293,7 +293,7 @@ \subsubsection{Atomicity Guarantees for Signaling Operations} All signaling operations on the signal data object completes as if performed atomically with respect to the following: \begin{itemize} - \item other blocking or non-blocking variant of the put-with-signal routine + \item other blocking or nonblocking variant of the put-with-signal routine that updates the signal data object using the same signal update operator; \item signal-fetch routine that fetches the signal data object; and \item any point-to-point synchronization routine that accesses the signal @@ -416,7 +416,7 @@ \subsubsection{\textbf{SHMEM\_SIGNAL\_WAIT\_UNTIL}}\label{subsec:shmem_signal_wa \subsection{Memory Ordering Routines}\label{subsec:memory_order} The following section discusses \openshmem \acp{API} that provide mechanisms to ensure ordering and/or delivery of memory store, blocking \OPR{Put}, \ac{AMO}, -and \OPR{put-with-signal}, as well as non-blocking \PUT{}, +and \OPR{put-with-signal}, as well as nonblocking \PUT{}, \OPR{put-with-signal}, \GET{}, and \ac{AMO} routines to symmetric data objects. From 11fd7dab845503cf9301b714cee9f4f4b1c947ce Mon Sep 17 00:00:00 2001 From: James Dinan Date: Wed, 9 Oct 2019 09:57:01 -0400 Subject: [PATCH 290/319] DocEdits from September F2F --- content/shmem_atomic_compare_swap_nbi.tex | 4 ++-- content/shmem_atomic_fetch_add_nbi.tex | 4 ++-- content/shmem_atomic_fetch_and_nbi.tex | 8 ++++---- content/shmem_atomic_fetch_inc_nbi.tex | 6 +++--- content/shmem_atomic_fetch_nbi.tex | 4 ++-- content/shmem_atomic_fetch_or_nbi.tex | 8 ++++---- content/shmem_atomic_fetch_xor_nbi.tex | 6 +++--- content/shmem_atomic_swap_nbi.tex | 6 +++--- content/shmem_get_nbi.tex | 2 +- content/shmem_put_nbi.tex | 2 +- content/shmem_put_signal_nbi.tex | 2 +- 11 files changed, 26 insertions(+), 26 deletions(-) diff --git a/content/shmem_atomic_compare_swap_nbi.tex b/content/shmem_atomic_compare_swap_nbi.tex index f8139294d..467b9e015 100644 --- a/content/shmem_atomic_compare_swap_nbi.tex +++ b/content/shmem_atomic_compare_swap_nbi.tex @@ -43,8 +43,8 @@ This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, prior contents of the \VAR{dest} data - object is fetched into \VAR{fetch} local data object and the - contents of \VAR{value} is conditionally updated into \VAR{dest} on the + object have been fetched into \VAR{fetch} local data object and the + contents of \VAR{value} have been conditionally updated into \VAR{dest} on the remote \ac{PE}. } diff --git a/content/shmem_atomic_fetch_add_nbi.tex b/content/shmem_atomic_fetch_add_nbi.tex index 6426d7958..4efce200b 100644 --- a/content/shmem_atomic_fetch_add_nbi.tex +++ b/content/shmem_atomic_fetch_add_nbi.tex @@ -41,8 +41,8 @@ of the fetch and the update. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, \VAR{value} - is added to \VAR{dest} on \VAR{pe} and the prior contents of \VAR{dest} - are fetched into the \VAR{fetch} local data object. + has been added to \VAR{dest} on \VAR{pe} and the prior contents of \VAR{dest} + fetched into the \VAR{fetch} local data object. } \apireturnvalues{ diff --git a/content/shmem_atomic_fetch_and_nbi.tex b/content/shmem_atomic_fetch_and_nbi.tex index 205e463d6..953e0ce67 100644 --- a/content/shmem_atomic_fetch_and_nbi.tex +++ b/content/shmem_atomic_fetch_and_nbi.tex @@ -34,13 +34,13 @@ \end{apiarguments} \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_fetch\_and\_nbi} routines performs an + The nonblocking \FUNC{shmem\_atomic\_fetch\_and\_nbi} routines perform an atomic fetching bitwise AND on the remotely accessible data object pointed - by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine + by \VAR{dest} at \ac{PE} \VAR{pe} with the operand \VAR{value}. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, these routines performs a fetching bitwise AND on - \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the prior + \FUNC{shmem\_quiet}, these routines have performed a fetching bitwise AND on + \VAR{dest} at \ac{PE} \VAR{pe} with the operand \VAR{value} and fetch the prior contents of \VAR{dest} into the \VAR{fetch} local data object. } diff --git a/content/shmem_atomic_fetch_inc_nbi.tex b/content/shmem_atomic_fetch_inc_nbi.tex index 990afc348..5f895528b 100644 --- a/content/shmem_atomic_fetch_inc_nbi.tex +++ b/content/shmem_atomic_fetch_inc_nbi.tex @@ -34,11 +34,11 @@ \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_fetch\_inc\_nbi} routines performs an + The nonblocking \FUNC{shmem\_atomic\_fetch\_inc\_nbi} routines perform an atomic fetch-and-increment operation. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to - \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, the \dest{} on - \ac{PE} \VAR{pe} is increased by one and the previous contents of \dest{} are + \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, \dest{} on + \ac{PE} \VAR{pe} has been increased by one and the previous contents of \dest{} are fetched into the \VAR{fetch} local data object. } diff --git a/content/shmem_atomic_fetch_nbi.tex b/content/shmem_atomic_fetch_nbi.tex index bd96ff9e7..a816bd01c 100644 --- a/content/shmem_atomic_fetch_nbi.tex +++ b/content/shmem_atomic_fetch_nbi.tex @@ -33,11 +33,11 @@ \end{apiarguments} \apidescription{ - The nonblocking atomic fetch routines performs a nonblocking fetch of a + The nonblocking atomic fetch routines perform a nonblocking fetch of a value atomically from a remote data object. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, contents of the \source{} data object from \ac{PE} is + \FUNC{shmem\_quiet}, contents of the \source{} data object from \ac{PE} has been fetched into \VAR{fetch} local data object. } diff --git a/content/shmem_atomic_fetch_or_nbi.tex b/content/shmem_atomic_fetch_or_nbi.tex index a30735bd9..5ce26bb6c 100644 --- a/content/shmem_atomic_fetch_or_nbi.tex +++ b/content/shmem_atomic_fetch_or_nbi.tex @@ -34,13 +34,13 @@ \end{apiarguments} \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_fetch\_or\_nbi} routines performs an + The nonblocking \FUNC{shmem\_atomic\_fetch\_or\_nbi} routines perform an atomic fetching bitwise OR on the remotely accessible data object pointed by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine - returns after posting the operation. The operation is considered complete + returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, these routines performs a fetching bitwise OR on - \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the prior + \FUNC{shmem\_quiet}, these routines have performed a fetching bitwise OR on + \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetched the prior contents of \VAR{dest} into the \VAR{fetch} local data object. } diff --git a/content/shmem_atomic_fetch_xor_nbi.tex b/content/shmem_atomic_fetch_xor_nbi.tex index 65651b41b..afec95a51 100644 --- a/content/shmem_atomic_fetch_xor_nbi.tex +++ b/content/shmem_atomic_fetch_xor_nbi.tex @@ -34,13 +34,13 @@ \end{apiarguments} \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_fetch\_xor\_nbi} routines performs an + The nonblocking \FUNC{shmem\_atomic\_fetch\_xor\_nbi} routines perform an atomic fetching bitwise XOR on the remotely accessible data object pointed by \VAR{dest} at PE \VAR{pe} with the operand \VAR{value}. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, these routines performs a fetching bitwise XOR on - \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetch the prior + \FUNC{shmem\_quiet}, these routines have performed a fetching bitwise XOR on + \VAR{dest} at PE \VAR{pe} with the operand \VAR{value} and fetched the prior contents of \VAR{dest} into the \VAR{fetch} local data object. } diff --git a/content/shmem_atomic_swap_nbi.tex b/content/shmem_atomic_swap_nbi.tex index b9ab1afb2..0007f0ef4 100644 --- a/content/shmem_atomic_swap_nbi.tex +++ b/content/shmem_atomic_swap_nbi.tex @@ -33,11 +33,11 @@ \end{apiarguments} \apidescription{ - The nonblocking \FUNC{shmem\_atomic\_swap\_nbi} routines performs an atomic + The nonblocking \FUNC{shmem\_atomic\_swap\_nbi} routines perform an atomic swap operation. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to - \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, it writes - \VAR{value} into \dest{} on \ac{PE} and fetches the prior contents of + \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, it has written + \VAR{value} into \dest{} on \ac{PE} and fetched the prior contents of \dest{} into \VAR{fetch} local data object. } diff --git a/content/shmem_get_nbi.tex b/content/shmem_get_nbi.tex index aa7a119c1..37d6e5ec2 100644 --- a/content/shmem_get_nbi.tex +++ b/content/shmem_get_nbi.tex @@ -45,7 +45,7 @@ \apidescription{ The get routines provide a method for copying a contiguous symmetric data object from a different \ac{PE} to a contiguous data object on the local - \ac{PE}. The routines return after posting the operation. The operation is considered + \ac{PE}. The routines return after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, the data has been delivered to the \dest{} array on the local \ac{PE}. diff --git a/content/shmem_put_nbi.tex b/content/shmem_put_nbi.tex index c706920f9..a74924992 100644 --- a/content/shmem_put_nbi.tex +++ b/content/shmem_put_nbi.tex @@ -42,7 +42,7 @@ \end{apiarguments} \apidescription{ - The routines return after posting the operation. The operation is considered + The routines return after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, the data has been copied into the \dest{} array on the destination \ac{PE}. diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 0717aee35..15c185bc7 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -52,7 +52,7 @@ from a contiguous local data object to a data object on a specified \ac{PE} and subsequently updating a remote flag to signal completion. - The routines return after posting the operation. The operation is considered + The routines return after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, the data has been copied out of the \source{} array on the local \ac{PE} and delivered into the \dest{} array on the destination From 892b5d99e3b696be99caa5af1b85ae163affba05 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Wed, 9 Oct 2019 17:58:43 -0500 Subject: [PATCH 291/319] Add changelog for NBI AMO and fix PWS changelog --- content/backmatter.tex | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index b87ae0bcc..cb9e78994 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -520,10 +520,13 @@ \section{Version 1.5} The following list describes the specific changes in \openshmem[1.5]: \begin{itemize} % -\item Added support for blocking put-with-signal functions. +\item Added support for nonblocking \ac{AMO} functions. +\\ See Section \ref{sec:amo-nbi}. +% +\item Added support for blocking \OPR{put-with-signal} functions. \\ See Section \ref{subsec:shmem_put_signal}. % -\item Added support for nonblocking put-with-signal functions. +\item Added support for nonblocking \OPR{put-with-signal} functions. \\ See Section \ref{subsec:shmem_put_signal_nbi}. % \item Clarified that point-to-point synchronization routines preserve the From 7b7d3f533919463bf874fb99b77861820bbdd25c Mon Sep 17 00:00:00 2001 From: James Dinan Date: Fri, 11 Oct 2019 10:52:14 -0400 Subject: [PATCH 292/319] Verb tense corrections from @nspark Signed-off-by: James Dinan --- content/shmem_atomic_fetch_and_nbi.tex | 2 +- content/shmem_atomic_fetch_inc_nbi.tex | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_atomic_fetch_and_nbi.tex b/content/shmem_atomic_fetch_and_nbi.tex index 953e0ce67..925d75be5 100644 --- a/content/shmem_atomic_fetch_and_nbi.tex +++ b/content/shmem_atomic_fetch_and_nbi.tex @@ -40,7 +40,7 @@ returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, these routines have performed a fetching bitwise AND on - \VAR{dest} at \ac{PE} \VAR{pe} with the operand \VAR{value} and fetch the prior + \VAR{dest} at \ac{PE} \VAR{pe} with the operand \VAR{value} and fetched the prior contents of \VAR{dest} into the \VAR{fetch} local data object. } diff --git a/content/shmem_atomic_fetch_inc_nbi.tex b/content/shmem_atomic_fetch_inc_nbi.tex index 5f895528b..c960c5efd 100644 --- a/content/shmem_atomic_fetch_inc_nbi.tex +++ b/content/shmem_atomic_fetch_inc_nbi.tex @@ -38,7 +38,7 @@ atomic fetch-and-increment operation. This routine returns after initiating the operation. The operation is considered complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion of \FUNC{shmem\_quiet}, \dest{} on - \ac{PE} \VAR{pe} has been increased by one and the previous contents of \dest{} are + \ac{PE} \VAR{pe} has been increased by one and the previous contents of \dest{} fetched into the \VAR{fetch} local data object. } From e1d50954b9d061f3d5e26e28da80ba47df5110a2 Mon Sep 17 00:00:00 2001 From: Min Si Date: Tue, 29 Jan 2019 13:49:36 -0600 Subject: [PATCH 293/319] Rewrite the interoperability annex --- content/backmatter.tex | 200 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) diff --git a/content/backmatter.tex b/content/backmatter.tex index 2a36fa819..f2ce991e3 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -184,7 +184,207 @@ \chapter{Undefined Behavior in OpenSHMEM}\label{sec:undefined} \end{longtable} +\color{ForestGreen} +\chapter{Interoperability with other Programming Models}\label{sec:interoperability} + +OpenSHMEM routines may be used in conjunction with the routines of other +communication libraries or parallel languages in the same program. This section +describes the interoperability with other programming models including +clarification of undefined behaviors caused by mixed use of different models, +advice to \openshmem library users and developers that may improve the portability +and performance of hybrid programs, and the definition of an OpenSHMEM extension +API that queries the interoperability features provided by an \openshmem library. + + +\section{MPI Interoperability} + +\openshmem and MPI are two commonly used parallel programming models for distributed +memory systems. The user can choose to utilize both models in the same program +to efficiently and easily support various communication patterns. + +A vendor may implement the \openshmem and MPI libraries in different ways. For +instance, one may implement both \openshmem and MPI as standalone libraries +and each of them allocates and initializes fully isolated communication +resources. Consequently, an \openshmem call does not interfere with any MPI +communication in the same application. As the other common approach, however, +a vendor may also implement both \openshmem and MPI interfaces within the +same software system in order to share communication resource when possible. +In such a case, internal interference may occur. + +To improve interoperability and portability in \openshmem + MPI hybrid +programming, we clarify several aspects in the following subsections. + + +\subsection{Initialization} +To ensure that a hybrid program can be portably performed with different vendor +implementations, the \openshmem environment of the program must be initialized by +a call to \FUNC{shmem\_init} or \FUNC{shmem\_init\_thread}, and be finalized by +a call to \FUNC{shmem\_finalize}; the MPI environment of the program must be initialized +by a call to \FUNC{MPI\_Init} or \FUNC{MPI\_Init\_thread}, and be finalized by a +call to \FUNC{MPI\_Finalize}. + +\apiimpnotes{ +Portable implementations of OpenSHMEM and MPI must ensure that the initialization +calls can be made in an arbitrary order within a program; the same rule also +applies to the finalization calls. A software runtime that utilizes shared +communication resource for \openshmem and MPI communication may maintain an +internal reference counter in order to ensure that the shared resource is +initialized only once, and no shared resource is released until the last +finalization call is made. +} + + +\subsection{Dynamic Process Creation and MPMD Programming} +\label{subsec:interoperability:mpmd} + +MPI defines the dynamic process model that allows creation of processes after +an MPI application has started, and provides the mechanism to establish communication +between the newly created processes and the existing MPI application. This model +can be useful when implementing a MPMD application by dynamically starting multiple +groups of processes, and each of these groups may launch a different executable +MPI program. The communication performed within a process group is identified by +an intracommunicator, and that performed between two process groups is identified +by an intercommunicator. The two types of communication do not interfere with +each other. + +Unlike MPI, \openshmem requires all PEs to collectively allocate and initialize +resources used by the \openshmem library before any other \openshmem routine may +be called. Thus, the dynamic process model is not supported in \openshmem. For +instance, the processes newly created by a call to \FUNC{MPI\_Comm\_spawn} cannot +join the existing \openshmem environment that was initialized by other existing +PEs. The \FUNC{shmem\_pe\_accessible} routine can be used in this scenario to +portably ensure that a remote PE is accessible via \openshmem communication. + + +\subsection{Thread Safety} +\label{subsec:interoperability:thread} +Both \openshmem and MPI define the interaction with user threads in a program +with routines that can be used for initializing and querying the thread +environment. In a hybrid program, the user can request different thread levels +at the initialization calls of \openshmem and MPI environments, however, the +returned support level provided by the \openshmem library might be different +from that returned in an \openshmem-only program. For instance, the former +initialization call in a hybrid program may initialize resource with the user +requested thread level but the supported level cannot be updated by the latter +initialization call, if the underlying software runtime of \openshmem and MPI +share the same internal communication resource. +The program should always check the \VAR{provided} thread level returned +at the corresponding initialization call to portably ensure thread support in each +communication environment. + + +\subsection{Mapping Process Identification Numbers} +\label{subsec:interoperability:id} + +Similar to the PE identifier in \openshmem, MPI defines rank as the +identification number of a process in a communicator. Both \openshmem PE +and MPI rank are unique integers assigned from zero to one less than the total +number of processes. In a hybrid program, one may observe that the \openshmem +PE and the MPI rank in \VAR{MPI\_COMM\_WORLD} of a process can be equal. +This feature, however, may be provided by only some of the \openshmem and MPI +implementations (e.g., if both environments share the same underlying process +manager), and is not portably guaranteed. A portable program should always +use the standard functions in each model, i.e., \FUNC{shmem\_my\_pe} in \openshmem +and \FUNC{MPI\_Comm\_rank} in MPI, to query the process identification numbers +in each communication environment and manage the mapping of identifiers in the +program when necessary. + + +\subsection{RMA Synchronization, Ordering and Atomicity} +\label{subsec:interoperability:rma} + +Both \openshmem and MPI define similar RMA and atomic operations with additional +semantics and synchronization routines to ensure the operations' ordering and +completion. A synchronization call in \openshmem, however, does not interfere +with the outstanding operations issued in the MPI environment. For instance, +the \FUNC{shmem\_quiet} function only ensures completion of \openshmem RMA, +AMO, and memory store operations. It does not force the completion +of any MPI outstanding operations. To ensure the completion of RMA operations +in MPI, the program should use an appropriate MPI synchronization routine in the +MPI context (e.g., using \FUNC{MPI\_Win\_flush\_all} to ensure remote completion +of all outstanding operations in the passive-target mode). Similarly, \openshmem +guarantees only the atomicity of concurrent AMO operations that operate on +symmetric data with the same datatype. Access to the same symmetric object with +MPI atomic operations, such as an \FUNC{MPI\_Fetch\_and\_op}, may result in undefined +result. + +\apiimpnotes{ +In the implementations that share the same communication resources for \openshmem +and MPI, the memory or network synchronization internally issued for one +programming model may also effect the status of operations in the other model. +Although the user program must make necessary synchronization calls for both models +in order to ensure semantics correctness, a high performance implementation may +internally avoid the later synchronization made by the other model when no +subsequent operation is issued between these two synchronization calls. +} + +\subsection{Communication Progress} +\label{subsec:interoperability:progress} + +\openshmem promises the progression of communication both with and without +\openshmem calls and requires the software progress mechanism in implementation +(e.g., a progress thread) when the hardware does not provide asynchronous communication +capabilities. In MPI, however, a weak progress semantics is applied. That is, +an MPI communication call is only guaranteed to complete in finite time. For +instance, an MPI Put may be completed only when the remote process makes an MPI +call which internally triggers the progress of MPI, if the underlying hardware +does not support asynchronous communication. A portable hybrid program +should not assume that a call to the \openshmem library also makes progress for MPI, +and it may have to explicitly manage the asynchronous communication in MPI in +order to prevent any deadlock or performance degradation. + +\apiimpnotes{ +Implementations that provide both \openshmem and MPI interfaces should try +to ensure progress for both models when necessary and possible, for performance +reasons. For instance, a high-quality implementation may start making progress for +both \openshmem and MPI whenever possible, after the user program has called +\FUNC{shmem\_init} and \FUNC{MPI\_init} provided by the same system. +} + +To avoid unnecessary overhead and programming complexity in the user program, +the \openshmem implementation may provide an extended \openshmem routine that +allows the user program to query the progress support for the MPI environment. +We introduce the definition and semantics of this routine in +Section~\ref{subsec:interoperability:query}. + + +\section{Interoperability Query API} +\label{subsec:interoperability:query} + +Determines whether an interoperability feature is supported by the \openshmem +library implementation. + +\begin{apidefinition} + +\begin{Csynopsis} +int @\FuncDecl{shmemx\_query\_interoperability}@(int property); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{property}{The interoperability property queried by the user.} +\end{apiarguments} + +% compiling error ? +% \apidescription{ +\FUNC{shmemx\_query\_interoperability} is an extended \openshmem routine that queries +whether an interoperability property is supported by the \openshmem library. One of the +following property can be queried in an \openshmem program after finishing the +initialization call to \openshmem and that of the relevant programming models +being used in the program. An OpenSHMEM library implementation may extend the +available properties. +\begin{itemize} + \item \VAR{SHMEM\_PROGRESS\_MPI} Query whether the \openshmem + implementation makes progress for the MPI communication used in the user program. +\end{itemize} +% } + +\apireturnvalues{ + The return value is \CONST{1} if \VAR{property} is supported by the \openshmem library; + otherwise, it is \CONST{0}. +} +\end{apidefinition} +\color{black} \chapter{History of OpenSHMEM}\label{sec:openshmem_history} From 34c7e86fb46cc38890c224c2a6dab969f486abb7 Mon Sep 17 00:00:00 2001 From: Min Si Date: Mon, 1 Apr 2019 17:08:09 -0500 Subject: [PATCH 294/319] Update dynamic process creation subsection --- content/backmatter.tex | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index f2ce991e3..8bb891cdf 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -238,22 +238,33 @@ \subsection{Dynamic Process Creation and MPMD Programming} \label{subsec:interoperability:mpmd} MPI defines the dynamic process model that allows creation of processes after -an MPI application has started, and provides the mechanism to establish communication -between the newly created processes and the existing MPI application. This model -can be useful when implementing a MPMD application by dynamically starting multiple -groups of processes, and each of these groups may launch a different executable -MPI program. The communication performed within a process group is identified by -an intracommunicator, and that performed between two process groups is identified -by an intercommunicator. The two types of communication do not interfere with -each other. - +an MPI application has started (e.g., by calling \FUNC{MPI\_Comm\_spawn}), +and provides the mechanism to establish communication +between the newly created processes and the existing MPI application (see +MPI standard version 3.1, Chapter 10). Unlike MPI, \openshmem requires all PEs to collectively allocate and initialize resources used by the \openshmem library before any other \openshmem routine may -be called. Thus, the dynamic process model is not supported in \openshmem. For -instance, the processes newly created by a call to \FUNC{MPI\_Comm\_spawn} cannot -join the existing \openshmem environment that was initialized by other existing -PEs. The \FUNC{shmem\_pe\_accessible} routine can be used in this scenario to -portably ensure that a remote PE is accessible via \openshmem communication. +be called. Hence, attention must be paid when using \openshmem together with the +MPI dynamic process routines. Specifically, we clarify the following three scenarios: + +\begin{enumerate} +\item After MPI initialization and before any PEs start \openshmem initialization, +it is implementation defined whether processes created by a call to MPI dynamic +process routine are able to join the call to \FUNC{shmem\_init} or +\FUNC{shmem\_init\_thread} and establish the same \openshmem environment together +with other existing PEs. + +\item After \openshmem initialization, a process newly created by +the MPI dynamic process routine cannot join the existing \openshmem environment +that was initialized by other existing PEs. The \FUNC{shmem\_pe\_accessible} routine +may be used in this scenario to portably ensure that a remote PE is accessible +via \openshmem communication. + +\item After \openshmem initialization, it is implementation defined whether +processes newly created by MPI dynamic process routine can make a call to +\FUNC{shmem\_init} or \FUNC{shmem\_init\_thread} and establish a separate +\openshmem environment. +\end{enumerate} \subsection{Thread Safety} From afdc69af1a0539c8228a19a3e8ce8beb83bc29b6 Mon Sep 17 00:00:00 2001 From: Min Si Date: Mon, 1 Apr 2019 17:19:32 -0500 Subject: [PATCH 295/319] Typo fix and minor word adjustment --- content/backmatter.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 8bb891cdf..3079e5a43 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -322,7 +322,7 @@ \subsection{RMA Synchronization, Ordering and Atomicity} \apiimpnotes{ In the implementations that share the same communication resources for \openshmem and MPI, the memory or network synchronization internally issued for one -programming model may also effect the status of operations in the other model. +programming model may also affect the status of operations in the other model. Although the user program must make necessary synchronization calls for both models in order to ensure semantics correctness, a high performance implementation may internally avoid the later synchronization made by the other model when no @@ -347,7 +347,7 @@ \subsection{Communication Progress} \apiimpnotes{ Implementations that provide both \openshmem and MPI interfaces should try to ensure progress for both models when necessary and possible, for performance -reasons. For instance, a high-quality implementation may start making progress for +reasons. For instance, an implementation may start making progress for both \openshmem and MPI whenever possible, after the user program has called \FUNC{shmem\_init} and \FUNC{MPI\_init} provided by the same system. } From 43c537903590da2a8d00555e7f548eadd9d97baf Mon Sep 17 00:00:00 2001 From: Min Si Date: Tue, 2 Apr 2019 13:53:52 -0500 Subject: [PATCH 296/319] Add more details in RMA semantics subsection --- content/backmatter.tex | 61 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 8 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 3079e5a43..121962d07 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -301,23 +301,68 @@ \subsection{Mapping Process Identification Numbers} program when necessary. -\subsection{RMA Synchronization, Ordering and Atomicity} +\subsection{RMA Memory Semantics, Completion, Ordering and Atomicity} \label{subsec:interoperability:rma} -Both \openshmem and MPI define similar RMA and atomic operations with additional -semantics and synchronization routines to ensure the operations' ordering and -completion. A synchronization call in \openshmem, however, does not interfere -with the outstanding operations issued in the MPI environment. For instance, +Both \openshmem and MPI define similar RMA and atomic operations for remote memory +access, however, each model defines different semantics for memory synchronization, +operation completion, ordering, and atomicity. +We clarify the semantics differences and interoperability of these two models +as below. + +\begin{itemize} + +\item Memory Semantics: MPI defines the concept of public and private copies +for each RMA window. Any remote RMA operation can access only the +public copy of that window, and memory load\slash store can access only the +private copy. MPI defines two memory models for memory +synchronization between the copies: RMA separate and RMA unified (see definition +in MPI standard version 3.1, Section 11.4), and requires additional RMA +synchronization call to ensure consistent view on memory in each memory model +(see requirement of RMA synchronization in MPI standard version 3.1, Section 11.7). +Unlike MPI, the memory model in \openshmem is implicit. +However, additional synchronization is still required to ensure consistent view +between remote memory access and memory load\slash store (e.g., \FUNC{shmem\_barrier}). + +To ensure portability, a hybrid program should always make appropriate \openshmem +and MPI synchronization calls for remote access in each environment respectively +in order to ensure any remote updates are visible to the target PE +and also become visible to other remote access operations. For instance, a program +can make a call to \FUNC{shmem\_barrier} on both local and target PEs after +a \FUNC{shmem\_put} operation in order to ensure the remote update is visible to +the target PE, and then make a call to \FUNC{MPI\_Win\_sync} on the target +PE before the data can be accessed by other PEs using MPI RMA operations. + +\item Completion: Unlike \openshmem RMA operations, all MPI RMA communication +operations including the atomic operations such as \FUNC{MPI\_Accumulate} are +nonblocking. Similar to \openshmem nonblocking RMA, the program should perform +additional MPI synchronization to ensure any local buffers involved in the outstanding +MPI RMA operations can be safely reused (see definition of MPI RMA synchronization +in MPI standard version 3.1, Section 11.5). +A synchronization call in \openshmem, however, does not interfere +with any outstanding operations issued in the MPI environment. For instance, the \FUNC{shmem\_quiet} function only ensures completion of \openshmem RMA, AMO, and memory store operations. It does not force the completion of any MPI outstanding operations. To ensure the completion of RMA operations in MPI, the program should use an appropriate MPI synchronization routine in the MPI context (e.g., using \FUNC{MPI\_Win\_flush\_all} to ensure remote completion -of all outstanding operations in the passive-target mode). Similarly, \openshmem +of all outstanding operations in the passive-target mode). + +\item Ordering: Unlike \openshmem ordering semantics, MPI does not ensure the +ordering of {\PUT} and {\GET} operations, however, it guarantees ordering between +MPI atomic operations from one process to the same (or overlapping) memory +locations at another process via the same window. A call to \FUNC{shmem\_fence} +forces neither ordering of any MPI operations, nor ordering between outstanding +MPI operations +and \openshmem operations. + +\item Atomicity: \openshmem guarantees only the atomicity of concurrent AMO operations that operate on symmetric data with the same datatype. Access to the same symmetric object with -MPI atomic operations, such as an \FUNC{MPI\_Fetch\_and\_op}, may result in undefined -result. +MPI atomic operations, such as an \FUNC{MPI\_Fetch\_and\_op}, may result in +undefined result. + +\end{itemize} \apiimpnotes{ In the implementations that share the same communication resources for \openshmem From c055ca476d2a2f84ab5eb5cf01693656ff02825f Mon Sep 17 00:00:00 2001 From: Min Si Date: Mon, 20 May 2019 16:46:33 -0500 Subject: [PATCH 297/319] Made a pass by English editor --- content/backmatter.tex | 64 ++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 121962d07..3ed7fca82 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -189,26 +189,26 @@ \chapter{Interoperability with other Programming Models}\label{sec:interoperabil OpenSHMEM routines may be used in conjunction with the routines of other communication libraries or parallel languages in the same program. This section -describes the interoperability with other programming models including +describes the interoperability with other programming models, including clarification of undefined behaviors caused by mixed use of different models, advice to \openshmem library users and developers that may improve the portability -and performance of hybrid programs, and the definition of an OpenSHMEM extension +and performance of hybrid programs, and definition of an OpenSHMEM extension API that queries the interoperability features provided by an \openshmem library. \section{MPI Interoperability} -\openshmem and MPI are two commonly used parallel programming models for distributed -memory systems. The user can choose to utilize both models in the same program +\openshmem and MPI are two commonly used parallel programming models for +distributed-memory systems. The user can choose to utilize both models in the same program to efficiently and easily support various communication patterns. A vendor may implement the \openshmem and MPI libraries in different ways. For -instance, one may implement both \openshmem and MPI as standalone libraries -and each of them allocates and initializes fully isolated communication +instance, one may implement both \openshmem and MPI as standalone libraries, +each of which allocates and initializes fully isolated communication resources. Consequently, an \openshmem call does not interfere with any MPI communication in the same application. As the other common approach, however, -a vendor may also implement both \openshmem and MPI interfaces within the -same software system in order to share communication resource when possible. +a vendor may implement both \openshmem and MPI interfaces within the +same software system in order to share a communication resource when possible. In such a case, internal interference may occur. To improve interoperability and portability in \openshmem + MPI hybrid @@ -218,18 +218,18 @@ \section{MPI Interoperability} \subsection{Initialization} To ensure that a hybrid program can be portably performed with different vendor implementations, the \openshmem environment of the program must be initialized by -a call to \FUNC{shmem\_init} or \FUNC{shmem\_init\_thread}, and be finalized by +a call to \FUNC{shmem\_init} or \FUNC{shmem\_init\_thread} and be finalized by a call to \FUNC{shmem\_finalize}; the MPI environment of the program must be initialized -by a call to \FUNC{MPI\_Init} or \FUNC{MPI\_Init\_thread}, and be finalized by a +by a call to \FUNC{MPI\_Init} or \FUNC{MPI\_Init\_thread} and be finalized by a call to \FUNC{MPI\_Finalize}. \apiimpnotes{ Portable implementations of OpenSHMEM and MPI must ensure that the initialization calls can be made in an arbitrary order within a program; the same rule also -applies to the finalization calls. A software runtime that utilizes shared +applies to the finalization calls. A software runtime that utilizes a shared communication resource for \openshmem and MPI communication may maintain an internal reference counter in order to ensure that the shared resource is -initialized only once, and no shared resource is released until the last +initialized only once and thus no shared resource is released until the last finalization call is made. } @@ -237,9 +237,11 @@ \subsection{Initialization} \subsection{Dynamic Process Creation and MPMD Programming} \label{subsec:interoperability:mpmd} -MPI defines the dynamic process model that allows creation of processes after -an MPI application has started (e.g., by calling \FUNC{MPI\_Comm\_spawn}), -and provides the mechanism to establish communication +MPI defines a dynamic process model that allows creation of processes after +an MPI application has started (e.g., by calling \FUNC{MPI\_Comm\_spawn}) and +connection to independent processes (e.g., through \FUNC{MPI\_Comm\_accept} +and \FUNC{MPI\_Comm\_connect}) +and provides a mechanism to establish communication between the newly created processes and the existing MPI application (see MPI standard version 3.1, Chapter 10). Unlike MPI, \openshmem requires all PEs to collectively allocate and initialize @@ -272,12 +274,12 @@ \subsection{Thread Safety} Both \openshmem and MPI define the interaction with user threads in a program with routines that can be used for initializing and querying the thread environment. In a hybrid program, the user can request different thread levels -at the initialization calls of \openshmem and MPI environments, however, the +at the initialization calls of \openshmem and MPI environments; however, the returned support level provided by the \openshmem library might be different from that returned in an \openshmem-only program. For instance, the former -initialization call in a hybrid program may initialize resource with the user -requested thread level but the supported level cannot be updated by the latter -initialization call, if the underlying software runtime of \openshmem and MPI +initialization call in a hybrid program may initialize a resource with the +user-requested thread level, but the supported level cannot be updated by the latter +initialization call if the underlying software runtime of \openshmem and MPI share the same internal communication resource. The program should always check the \VAR{provided} thread level returned at the corresponding initialization call to portably ensure thread support in each @@ -290,18 +292,18 @@ \subsection{Mapping Process Identification Numbers} Similar to the PE identifier in \openshmem, MPI defines rank as the identification number of a process in a communicator. Both \openshmem PE and MPI rank are unique integers assigned from zero to one less than the total -number of processes. In a hybrid program, one may observe that the \openshmem +number of processes. In a hybrid program, the \openshmem PE and the MPI rank in \VAR{MPI\_COMM\_WORLD} of a process can be equal. This feature, however, may be provided by only some of the \openshmem and MPI implementations (e.g., if both environments share the same underlying process -manager), and is not portably guaranteed. A portable program should always -use the standard functions in each model, i.e., \FUNC{shmem\_my\_pe} in \openshmem +manager) and is not portably guaranteed. A portable program should always +use the standard functions in each model, namely, \FUNC{shmem\_my\_pe} in \openshmem and \FUNC{MPI\_Comm\_rank} in MPI, to query the process identification numbers in each communication environment and manage the mapping of identifiers in the program when necessary. -\subsection{RMA Memory Semantics, Completion, Ordering and Atomicity} +\subsection{RMA Memory Semantics, Completion, Ordering, and Atomicity} \label{subsec:interoperability:rma} Both \openshmem and MPI define similar RMA and atomic operations for remote memory @@ -341,7 +343,7 @@ \subsection{RMA Memory Semantics, Completion, Ordering and Atomicity} in MPI standard version 3.1, Section 11.5). A synchronization call in \openshmem, however, does not interfere with any outstanding operations issued in the MPI environment. For instance, -the \FUNC{shmem\_quiet} function only ensures completion of \openshmem RMA, +the \FUNC{shmem\_quiet} function ensures completion only of \openshmem RMA, AMO, and memory store operations. It does not force the completion of any MPI outstanding operations. To ensure the completion of RMA operations in MPI, the program should use an appropriate MPI synchronization routine in the @@ -357,9 +359,9 @@ \subsection{RMA Memory Semantics, Completion, Ordering and Atomicity} and \openshmem operations. \item Atomicity: \openshmem -guarantees only the atomicity of concurrent AMO operations that operate on +guarantees the atomicity only of concurrent AMO operations that operate on symmetric data with the same datatype. Access to the same symmetric object with -MPI atomic operations, such as an \FUNC{MPI\_Fetch\_and\_op}, may result in +MPI atomic operations, such as an \FUNC{MPI\_Fetch\_and\_op}, may result in an undefined result. \end{itemize} @@ -369,7 +371,7 @@ \subsection{RMA Memory Semantics, Completion, Ordering and Atomicity} and MPI, the memory or network synchronization internally issued for one programming model may also affect the status of operations in the other model. Although the user program must make necessary synchronization calls for both models -in order to ensure semantics correctness, a high performance implementation may +in order to ensure semantics correctness, a high-performance implementation may internally avoid the later synchronization made by the other model when no subsequent operation is issued between these two synchronization calls. } @@ -378,12 +380,12 @@ \subsection{Communication Progress} \label{subsec:interoperability:progress} \openshmem promises the progression of communication both with and without -\openshmem calls and requires the software progress mechanism in implementation +\openshmem calls and requires the software progress mechanism in the implementation (e.g., a progress thread) when the hardware does not provide asynchronous communication capabilities. In MPI, however, a weak progress semantics is applied. That is, -an MPI communication call is only guaranteed to complete in finite time. For +an MPI communication call is guaranteed only to complete in finite time. For instance, an MPI Put may be completed only when the remote process makes an MPI -call which internally triggers the progress of MPI, if the underlying hardware +call that internally triggers the progress of MPI, if the underlying hardware does not support asynchronous communication. A portable hybrid program should not assume that a call to the \openshmem library also makes progress for MPI, and it may have to explicitly manage the asynchronous communication in MPI in @@ -424,7 +426,7 @@ \section{Interoperability Query API} % \apidescription{ \FUNC{shmemx\_query\_interoperability} is an extended \openshmem routine that queries whether an interoperability property is supported by the \openshmem library. One of the -following property can be queried in an \openshmem program after finishing the +following properties can be queried in an \openshmem program after finishing the initialization call to \openshmem and that of the relevant programming models being used in the program. An OpenSHMEM library implementation may extend the available properties. From f8ebcfca1b83b2b9129fda7073129538706070ec Mon Sep 17 00:00:00 2001 From: Min Si Date: Mon, 9 Sep 2019 11:42:56 -0500 Subject: [PATCH 298/319] Fix function format --- content/backmatter.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 3ed7fca82..db3294e87 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -384,7 +384,7 @@ \subsection{Communication Progress} (e.g., a progress thread) when the hardware does not provide asynchronous communication capabilities. In MPI, however, a weak progress semantics is applied. That is, an MPI communication call is guaranteed only to complete in finite time. For -instance, an MPI Put may be completed only when the remote process makes an MPI +instance, an \FUNC{MPI\_Put} may be completed only when the remote process makes an MPI call that internally triggers the progress of MPI, if the underlying hardware does not support asynchronous communication. A portable hybrid program should not assume that a call to the \openshmem library also makes progress for MPI, From 63eef1db3c873a0021c08ea3432012ff10f89908 Mon Sep 17 00:00:00 2001 From: Min Si Date: Mon, 9 Sep 2019 11:43:43 -0500 Subject: [PATCH 299/319] Change query API to shmem_ and move text into separate file --- content/backmatter.tex | 50 +++++------------------- content/shmem_query_interoperability.tex | 39 ++++++++++++++++++ 2 files changed, 49 insertions(+), 40 deletions(-) create mode 100644 content/shmem_query_interoperability.tex diff --git a/content/backmatter.tex b/content/backmatter.tex index db3294e87..446ae6a64 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -192,7 +192,7 @@ \chapter{Interoperability with other Programming Models}\label{sec:interoperabil describes the interoperability with other programming models, including clarification of undefined behaviors caused by mixed use of different models, advice to \openshmem library users and developers that may improve the portability -and performance of hybrid programs, and definition of an OpenSHMEM extension +and performance of hybrid programs, and definition of an OpenSHMEM API that queries the interoperability features provided by an \openshmem library. @@ -399,49 +399,19 @@ \subsection{Communication Progress} \FUNC{shmem\_init} and \FUNC{MPI\_init} provided by the same system. } -To avoid unnecessary overhead and programming complexity in the user program, -the \openshmem implementation may provide an extended \openshmem routine that -allows the user program to query the progress support for the MPI environment. -We introduce the definition and semantics of this routine in -Section~\ref{subsec:interoperability:query}. +\section{Query Interoperability} -\section{Interoperability Query API} -\label{subsec:interoperability:query} - -Determines whether an interoperability feature is supported by the \openshmem -library implementation. - -\begin{apidefinition} - -\begin{Csynopsis} -int @\FuncDecl{shmemx\_query\_interoperability}@(int property); -\end{Csynopsis} - -\begin{apiarguments} - \apiargument{IN}{property}{The interoperability property queried by the user.} -\end{apiarguments} +A hybrid user program can query the interoperability feature of an \openshmem +implementation in order to avoid unnecessary overhead and programming complexity. +For instance, the user program can eliminate manual progress polling for MPI +communication if the underlying software runtime guarantees the progression of +communication also for MPI even without explicit function calls. -% compiling error ? -% \apidescription{ -\FUNC{shmemx\_query\_interoperability} is an extended \openshmem routine that queries -whether an interoperability property is supported by the \openshmem library. One of the -following properties can be queried in an \openshmem program after finishing the -initialization call to \openshmem and that of the relevant programming models -being used in the program. An OpenSHMEM library implementation may extend the -available properties. - -\begin{itemize} - \item \VAR{SHMEM\_PROGRESS\_MPI} Query whether the \openshmem - implementation makes progress for the MPI communication used in the user program. -\end{itemize} -% } +\subsection{\textbf{SHMEM\_QUERY\_INTEROPERABILITY}} +\label{subsec:interoperability:query} +\input{content/shmem_query_interoperability} -\apireturnvalues{ - The return value is \CONST{1} if \VAR{property} is supported by the \openshmem library; - otherwise, it is \CONST{0}. -} -\end{apidefinition} \color{black} \chapter{History of OpenSHMEM}\label{sec:openshmem_history} diff --git a/content/shmem_query_interoperability.tex b/content/shmem_query_interoperability.tex new file mode 100644 index 000000000..8af1e26ca --- /dev/null +++ b/content/shmem_query_interoperability.tex @@ -0,0 +1,39 @@ +\apisummary{ + Determines whether an interoperability feature is supported by the \openshmem + library implementation. +} +\begin{apidefinition} + +\begin{Csynopsis} +int @\FuncDecl{shmem\_query\_interoperability}@(int property); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{property}{The interoperability property queried by the user.} +\end{apiarguments} + +% compiling error ? +% \apidescription{ +\FUNC{shmem\_query\_interoperability} queries whether an interoperability property +is supported by the \openshmem library. One of the following properties can be +queried in an \openshmem program after finishing the +initialization call to \openshmem and that of the relevant programming models +being used in the program. An \openshmem library implementation may extend the +available properties. + +\begin{itemize} +\item \VAR{SHMEM\_PROGRESS\_MPI} Query whether the \openshmem +implementation makes progress for the MPI communication used in the user program. +\end{itemize} +% } + +\apireturnvalues{ + The return value is \CONST{1} if \VAR{property} is supported by the \openshmem library; + otherwise, it is \CONST{0}. +} +\end{apidefinition} + +\apiimpnotes{ +Implementations that do not support interoperability with other programming models +may simply return \CONST{0} for the relevant interoperability query. +} From 325957a1c2e4cbd00f4b6a303279231997a96828 Mon Sep 17 00:00:00 2001 From: Min Si Date: Tue, 10 Sep 2019 06:37:30 -0500 Subject: [PATCH 300/319] Add example code for pe mapping --- content/backmatter.tex | 7 ++++++ example_code/hybrid_mpi_mapping_id.c | 36 ++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 example_code/hybrid_mpi_mapping_id.c diff --git a/content/backmatter.tex b/content/backmatter.tex index 446ae6a64..8a5b833be 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -302,6 +302,13 @@ \subsection{Mapping Process Identification Numbers} in each communication environment and manage the mapping of identifiers in the program when necessary. +\subsubsection{Example} +The following example demonstrates how to manage the mapping of process +identifiers in a hybrid \openshmem and MPI program. + +\lstinputlisting[language={C}, tabsize=2, + basicstyle=\ttfamily\footnotesize] + {example_code/hybrid_mpi_mapping_id.c} \subsection{RMA Memory Semantics, Completion, Ordering, and Atomicity} \label{subsec:interoperability:rma} diff --git a/example_code/hybrid_mpi_mapping_id.c b/example_code/hybrid_mpi_mapping_id.c new file mode 100644 index 000000000..9720ce94f --- /dev/null +++ b/example_code/hybrid_mpi_mapping_id.c @@ -0,0 +1,36 @@ +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + static long pSync[SHMEM_COLLECT_SYNC_SIZE]; + for (int i = 0; i < SHMEM_COLLECT_SYNC_SIZE; i++) + pSync[i] = SHMEM_SYNC_VALUE; + + MPI_Init(&argc, &argv); + shmem_init(); + + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); + + static int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + int *mpi_ranks = shmem_calloc(npes, sizeof(int)); + + shmem_barrier_all(); + shmem_collect32(mpi_ranks, &myrank, 1, 0, 0, npes, pSync); + + if (mype == 0) + for (int i = 0; i < npes; i++) + printf("PE %d's MPI rank is %d\n", i, mpi_ranks[i]); + + shmem_free(mpi_ranks); + + shmem_finalize(); + MPI_Finalize(); + + return 0; +} From 348d60b9da3cc44a932afd32521fe4624cd80a30 Mon Sep 17 00:00:00 2001 From: Min Si Date: Tue, 10 Sep 2019 06:47:44 -0500 Subject: [PATCH 301/319] Minor text adjustment --- content/backmatter.tex | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 8a5b833be..88e240da2 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -212,7 +212,7 @@ \section{MPI Interoperability} In such a case, internal interference may occur. To improve interoperability and portability in \openshmem + MPI hybrid -programming, we clarify several aspects in the following subsections. +programming, we clarify the relevant semantics in the following subsections. \subsection{Initialization} @@ -282,8 +282,9 @@ \subsection{Thread Safety} initialization call if the underlying software runtime of \openshmem and MPI share the same internal communication resource. The program should always check the \VAR{provided} thread level returned -at the corresponding initialization call to portably ensure thread support in each -communication environment. +at the corresponding initialization call or query the level of thread support +after initialization to portably ensure thread support in each communication +environment. \subsection{Mapping Process Identification Numbers} @@ -303,8 +304,10 @@ \subsection{Mapping Process Identification Numbers} program when necessary. \subsubsection{Example} -The following example demonstrates how to manage the mapping of process -identifiers in a hybrid \openshmem and MPI program. +\label{subsubsec:interoperability:id:example} +The following example demonstrates how to manage the mapping between \openshmem +PE identifier and MPI ranks in \VAR{MPI\_COMM\_WORLD} in a hybrid \openshmem +and MPI program. \lstinputlisting[language={C}, tabsize=2, basicstyle=\ttfamily\footnotesize] From c089a75441df75e534adc34a6e36073389d07694 Mon Sep 17 00:00:00 2001 From: Min Si Date: Tue, 10 Sep 2019 10:51:01 -0500 Subject: [PATCH 302/319] Simplified version of dynamic process and rma sections --- content/backmatter.tex | 97 ++++++++---------------------------------- 1 file changed, 18 insertions(+), 79 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 88e240da2..9f552d21e 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -243,30 +243,13 @@ \subsection{Dynamic Process Creation and MPMD Programming} and \FUNC{MPI\_Comm\_connect}) and provides a mechanism to establish communication between the newly created processes and the existing MPI application (see -MPI standard version 3.1, Chapter 10). -Unlike MPI, \openshmem requires all PEs to collectively allocate and initialize +MPI standard version 3.1, Chapter 10). The dynamic process model can be used to +implement Multiple Program Multiple Data (MPMD) style program. +Unlike MPI, \openshmem follows the SPMD programming model. It starts +all processes at once and requires all PEs to collectively allocate and initialize resources used by the \openshmem library before any other \openshmem routine may -be called. Hence, attention must be paid when using \openshmem together with the -MPI dynamic process routines. Specifically, we clarify the following three scenarios: - -\begin{enumerate} -\item After MPI initialization and before any PEs start \openshmem initialization, -it is implementation defined whether processes created by a call to MPI dynamic -process routine are able to join the call to \FUNC{shmem\_init} or -\FUNC{shmem\_init\_thread} and establish the same \openshmem environment together -with other existing PEs. - -\item After \openshmem initialization, a process newly created by -the MPI dynamic process routine cannot join the existing \openshmem environment -that was initialized by other existing PEs. The \FUNC{shmem\_pe\_accessible} routine -may be used in this scenario to portably ensure that a remote PE is accessible -via \openshmem communication. - -\item After \openshmem initialization, it is implementation defined whether -processes newly created by MPI dynamic process routine can make a call to -\FUNC{shmem\_init} or \FUNC{shmem\_init\_thread} and establish a separate -\openshmem environment. -\end{enumerate} +be called. Hence, users should avoid using \openshmem and MPI dynamic process model +in the same program. \subsection{Thread Safety} @@ -313,68 +296,24 @@ \subsubsection{Example} basicstyle=\ttfamily\footnotesize] {example_code/hybrid_mpi_mapping_id.c} -\subsection{RMA Memory Semantics, Completion, Ordering, and Atomicity} +\subsection{RMA Programming Models} \label{subsec:interoperability:rma} Both \openshmem and MPI define similar RMA and atomic operations for remote memory -access, however, each model defines different semantics for memory synchronization, -operation completion, ordering, and atomicity. -We clarify the semantics differences and interoperability of these two models -as below. +access, however, each model defines different semantics and functions for memory +synchronization, operation completion, and ordering. To ensure semantics correctness +and portability, a hybrid program should always make appropriate \openshmem and MPI +synchronization calls for remote access in each environment respectively. -\begin{itemize} +\openshmem guarantees the atomicity only of concurrent \openshmem AMO operations +that operate on symmetric data with the same datatype. Access to the same symmetric +object with MPI atomic operations, such as an \FUNC{MPI\_Fetch\_and\_op}, may +result in an undefined result. -\item Memory Semantics: MPI defines the concept of public and private copies -for each RMA window. Any remote RMA operation can access only the -public copy of that window, and memory load\slash store can access only the -private copy. MPI defines two memory models for memory -synchronization between the copies: RMA separate and RMA unified (see definition -in MPI standard version 3.1, Section 11.4), and requires additional RMA -synchronization call to ensure consistent view on memory in each memory model -(see requirement of RMA synchronization in MPI standard version 3.1, Section 11.7). -Unlike MPI, the memory model in \openshmem is implicit. -However, additional synchronization is still required to ensure consistent view -between remote memory access and memory load\slash store (e.g., \FUNC{shmem\_barrier}). - -To ensure portability, a hybrid program should always make appropriate \openshmem -and MPI synchronization calls for remote access in each environment respectively -in order to ensure any remote updates are visible to the target PE -and also become visible to other remote access operations. For instance, a program -can make a call to \FUNC{shmem\_barrier} on both local and target PEs after -a \FUNC{shmem\_put} operation in order to ensure the remote update is visible to -the target PE, and then make a call to \FUNC{MPI\_Win\_sync} on the target -PE before the data can be accessed by other PEs using MPI RMA operations. - -\item Completion: Unlike \openshmem RMA operations, all MPI RMA communication -operations including the atomic operations such as \FUNC{MPI\_Accumulate} are -nonblocking. Similar to \openshmem nonblocking RMA, the program should perform -additional MPI synchronization to ensure any local buffers involved in the outstanding -MPI RMA operations can be safely reused (see definition of MPI RMA synchronization -in MPI standard version 3.1, Section 11.5). -A synchronization call in \openshmem, however, does not interfere -with any outstanding operations issued in the MPI environment. For instance, -the \FUNC{shmem\_quiet} function ensures completion only of \openshmem RMA, -AMO, and memory store operations. It does not force the completion -of any MPI outstanding operations. To ensure the completion of RMA operations -in MPI, the program should use an appropriate MPI synchronization routine in the -MPI context (e.g., using \FUNC{MPI\_Win\_flush\_all} to ensure remote completion -of all outstanding operations in the passive-target mode). - -\item Ordering: Unlike \openshmem ordering semantics, MPI does not ensure the -ordering of {\PUT} and {\GET} operations, however, it guarantees ordering between -MPI atomic operations from one process to the same (or overlapping) memory -locations at another process via the same window. A call to \FUNC{shmem\_fence} -forces neither ordering of any MPI operations, nor ordering between outstanding -MPI operations -and \openshmem operations. - -\item Atomicity: \openshmem -guarantees the atomicity only of concurrent AMO operations that operate on -symmetric data with the same datatype. Access to the same symmetric object with -MPI atomic operations, such as an \FUNC{MPI\_Fetch\_and\_op}, may result in an -undefined result. +Most RMA programs can be written using either \openshmem or MPI RMA. +It is recommended to choose only one of the RMA models in the same program, whenever +possible, for performance and code simplicity. -\end{itemize} \apiimpnotes{ In the implementations that share the same communication resources for \openshmem From d754e7a6c4d82a7aaa55aa8bba0fcf910bd40322 Mon Sep 17 00:00:00 2001 From: Min Si Date: Thu, 12 Sep 2019 13:25:08 -0500 Subject: [PATCH 303/319] Do not mention interference in first paragraph --- content/backmatter.tex | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 9f552d21e..97b19299a 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -205,11 +205,10 @@ \section{MPI Interoperability} A vendor may implement the \openshmem and MPI libraries in different ways. For instance, one may implement both \openshmem and MPI as standalone libraries, each of which allocates and initializes fully isolated communication -resources. Consequently, an \openshmem call does not interfere with any MPI -communication in the same application. As the other common approach, however, +resources. +As the other common approach, however, a vendor may implement both \openshmem and MPI interfaces within the same software system in order to share a communication resource when possible. -In such a case, internal interference may occur. To improve interoperability and portability in \openshmem + MPI hybrid programming, we clarify the relevant semantics in the following subsections. From b1791324076e9c87d31cff49290c0f9eeb285b71 Mon Sep 17 00:00:00 2001 From: Min Si Date: Thu, 12 Sep 2019 13:27:58 -0500 Subject: [PATCH 304/319] interop/mpmd: strong advice to not use dynamic process with shmem --- content/backmatter.tex | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 97b19299a..d1e7b18c7 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -242,12 +242,13 @@ \subsection{Dynamic Process Creation and MPMD Programming} and \FUNC{MPI\_Comm\_connect}) and provides a mechanism to establish communication between the newly created processes and the existing MPI application (see -MPI standard version 3.1, Chapter 10). The dynamic process model can be used to -implement Multiple Program Multiple Data (MPMD) style program. -Unlike MPI, \openshmem follows the SPMD programming model. It starts -all processes at once and requires all PEs to collectively allocate and initialize -resources used by the \openshmem library before any other \openshmem routine may -be called. Hence, users should avoid using \openshmem and MPI dynamic process model +MPI standard version 3.1, Chapter 10). +Unlike MPI, \openshmem starts all processes at once and requires all PEs to +collectively allocate and initialize resources (e.g., symmetric heap) used by +the \openshmem library before any other \openshmem routine may +be called. Communicating with a dynamically created process in the \openshmem +environment may result in undefined behavior. +Hence, users should not use \openshmem and MPI dynamic process model in the same program. From 56db04b27be33c31536be1c24d0284fb6048c080 Mon Sep 17 00:00:00 2001 From: Min Si Date: Tue, 24 Sep 2019 23:35:37 -0400 Subject: [PATCH 305/319] interop/rma: simply ask user to avoid using both RMA models --- content/backmatter.tex | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index d1e7b18c7..a8e879bf1 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -300,31 +300,16 @@ \subsection{RMA Programming Models} \label{subsec:interoperability:rma} Both \openshmem and MPI define similar RMA and atomic operations for remote memory -access, however, each model defines different semantics and functions for memory -synchronization, operation completion, and ordering. To ensure semantics correctness -and portability, a hybrid program should always make appropriate \openshmem and MPI -synchronization calls for remote access in each environment respectively. - -\openshmem guarantees the atomicity only of concurrent \openshmem AMO operations +access, however, a portable program should not assume interoperability between these +two RMA models. +For instance, \openshmem guarantees the atomicity only of concurrent \openshmem AMO operations that operate on symmetric data with the same datatype. Access to the same symmetric object with MPI atomic operations, such as an \FUNC{MPI\_Fetch\_and\_op}, may -result in an undefined result. - -Most RMA programs can be written using either \openshmem or MPI RMA. -It is recommended to choose only one of the RMA models in the same program, whenever +result in an undefined result. Furthermore, +because most RMA programs can be written using either \openshmem or MPI RMA, +users should choose only one of the RMA models in the same program, whenever possible, for performance and code simplicity. - -\apiimpnotes{ -In the implementations that share the same communication resources for \openshmem -and MPI, the memory or network synchronization internally issued for one -programming model may also affect the status of operations in the other model. -Although the user program must make necessary synchronization calls for both models -in order to ensure semantics correctness, a high-performance implementation may -internally avoid the later synchronization made by the other model when no -subsequent operation is issued between these two synchronization calls. -} - \subsection{Communication Progress} \label{subsec:interoperability:progress} From ab532fbfc6416ef54a44460a3bfe93d5de838b8e Mon Sep 17 00:00:00 2001 From: Min Si Date: Wed, 25 Sep 2019 06:36:55 -0400 Subject: [PATCH 306/319] interop/progress: mention query api to connect paragraphs --- content/backmatter.tex | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index a8e879bf1..7c4ac8642 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -321,8 +321,12 @@ \subsection{Communication Progress} instance, an \FUNC{MPI\_Put} may be completed only when the remote process makes an MPI call that internally triggers the progress of MPI, if the underlying hardware does not support asynchronous communication. A portable hybrid program -should not assume that a call to the \openshmem library also makes progress for MPI, -and it may have to explicitly manage the asynchronous communication in MPI in +should not assume that a call to the \openshmem library also makes progress for MPI. +A call to \FUNC{shmem\_query\_interoperability} (see definition in \ref{subsec:interoperability:query}) +can be used to check whether the implementation provides such a functionality. +If it is provided, then the library ensures progression of +both \openshmem and MPI communication; otherwise, it may have to explicitly +manage the asynchronous communication in MPI in order to prevent any deadlock or performance degradation. \apiimpnotes{ From c57d957d2f58811db32d217f995f1b133427ef5a Mon Sep 17 00:00:00 2001 From: Min Si Date: Wed, 25 Sep 2019 09:19:35 -0400 Subject: [PATCH 307/319] interop/threads: add restriction for mixed thread levels --- content/backmatter.tex | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 7c4ac8642..15e91a033 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -256,7 +256,7 @@ \subsection{Thread Safety} \label{subsec:interoperability:thread} Both \openshmem and MPI define the interaction with user threads in a program with routines that can be used for initializing and querying the thread -environment. In a hybrid program, the user can request different thread levels +environment. In a hybrid program, the user may request different thread levels at the initialization calls of \openshmem and MPI environments; however, the returned support level provided by the \openshmem library might be different from that returned in an \openshmem-only program. For instance, the former @@ -269,6 +269,28 @@ \subsection{Thread Safety} after initialization to portably ensure thread support in each communication environment. +Both \openshmem and MPI define similar thread levels, namely, \VAR{THREAD\_SINGLE}, +\VAR{THREAD\_FUNNELED}, \VAR{THREAD\_SERIALIZED}, and \VAR{THREAD\_MULTIPLE}. +When requesting threading support in a hybrid program, however, +users should follow additional rules as described below. + +\begin{itemize} + \item The \VAR{THREAD\_SINGLE} thread level requires a single-threaded program. + Hence, users should not request \VAR{THREAD\_SINGLE} at the initialization + call of either \openshmem or MPI but request a different thread level at the + initialization call of the other model in the same program. + + \item The \VAR{THREAD\_FUNNELED} thread level allows only the main thread to + make communication calls. A hybrid program using the \VAR{THREAD\_FUNNELED} + thread level in both \openshmem and MPI should ensure the same main thread + is used in both communication environments. + + \item The \VAR{THREAD\_SERIALIZED} thread level requires the program to ensure + communication calls are not made concurrently by multiple threads. A hybrid + program should ensure serialized calls to both \openshmem and MPI libraries, + if the program uses \VAR{THREAD\_SERIALIZED} in one communication environment + and \VAR{THREAD\_SERIALIZED} or \VAR{THREAD\_FUNNELED} in the other one. +\end{itemize} \subsection{Mapping Process Identification Numbers} \label{subsec:interoperability:id} From 1c6b1976c1abb076384991eb2174169064d7378b Mon Sep 17 00:00:00 2001 From: Min Si Date: Wed, 25 Sep 2019 10:46:26 -0400 Subject: [PATCH 308/319] interop/id: use sync_all instead of barrier_all in example --- example_code/hybrid_mpi_mapping_id.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example_code/hybrid_mpi_mapping_id.c b/example_code/hybrid_mpi_mapping_id.c index 9720ce94f..c72168d6e 100644 --- a/example_code/hybrid_mpi_mapping_id.c +++ b/example_code/hybrid_mpi_mapping_id.c @@ -20,7 +20,7 @@ int main(int argc, char *argv[]) int *mpi_ranks = shmem_calloc(npes, sizeof(int)); - shmem_barrier_all(); + shmem_sync_all(); shmem_collect32(mpi_ranks, &myrank, 1, 0, 0, npes, pSync); if (mype == 0) From 7e1508dca56531a7e8031fdad1bb286cf96ff6ea Mon Sep 17 00:00:00 2001 From: Min Si Date: Wed, 25 Sep 2019 11:00:20 -0400 Subject: [PATCH 309/319] interop/progress: minor text adjustment --- content/backmatter.tex | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 15e91a033..a3ff43f7b 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -342,13 +342,13 @@ \subsection{Communication Progress} an MPI communication call is guaranteed only to complete in finite time. For instance, an \FUNC{MPI\_Put} may be completed only when the remote process makes an MPI call that internally triggers the progress of MPI, if the underlying hardware -does not support asynchronous communication. A portable hybrid program -should not assume that a call to the \openshmem library also makes progress for MPI. -A call to \FUNC{shmem\_query\_interoperability} (see definition in \ref{subsec:interoperability:query}) -can be used to check whether the implementation provides such a functionality. -If it is provided, then the library ensures progression of -both \openshmem and MPI communication; otherwise, it may have to explicitly -manage the asynchronous communication in MPI in +does not support asynchronous communication. A hybrid program +should not assume that the \openshmem library also makes progress for MPI. +A call to \FUNC{shmem\_query\_interoperability} with the \VAR{SHMEM\_PROGRESS\_MPI} +property (see definition in \ref{subsec:interoperability:query}) +can be used to portably check whether the implementation provides asynchronous +progression also for MPI. If it is not provided, the user program may have to +explicitly manage the asynchronous communication in MPI in order to prevent any deadlock or performance degradation. \apiimpnotes{ From e062450a0db2a3e76c271554a7e63b331ae7cca9 Mon Sep 17 00:00:00 2001 From: Min Si Date: Mon, 21 Oct 2019 15:05:18 -0500 Subject: [PATCH 310/319] interop: move interoperability to a separate file --- content/backmatter.tex | 188 +---------------------------------- content/interoperability.tex | 186 ++++++++++++++++++++++++++++++++++ 2 files changed, 187 insertions(+), 187 deletions(-) create mode 100644 content/interoperability.tex diff --git a/content/backmatter.tex b/content/backmatter.tex index a3ff43f7b..d5098e3a2 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -185,193 +185,7 @@ \chapter{Undefined Behavior in OpenSHMEM}\label{sec:undefined} \color{ForestGreen} -\chapter{Interoperability with other Programming Models}\label{sec:interoperability} - -OpenSHMEM routines may be used in conjunction with the routines of other -communication libraries or parallel languages in the same program. This section -describes the interoperability with other programming models, including -clarification of undefined behaviors caused by mixed use of different models, -advice to \openshmem library users and developers that may improve the portability -and performance of hybrid programs, and definition of an OpenSHMEM -API that queries the interoperability features provided by an \openshmem library. - - -\section{MPI Interoperability} - -\openshmem and MPI are two commonly used parallel programming models for -distributed-memory systems. The user can choose to utilize both models in the same program -to efficiently and easily support various communication patterns. - -A vendor may implement the \openshmem and MPI libraries in different ways. For -instance, one may implement both \openshmem and MPI as standalone libraries, -each of which allocates and initializes fully isolated communication -resources. -As the other common approach, however, -a vendor may implement both \openshmem and MPI interfaces within the -same software system in order to share a communication resource when possible. - -To improve interoperability and portability in \openshmem + MPI hybrid -programming, we clarify the relevant semantics in the following subsections. - - -\subsection{Initialization} -To ensure that a hybrid program can be portably performed with different vendor -implementations, the \openshmem environment of the program must be initialized by -a call to \FUNC{shmem\_init} or \FUNC{shmem\_init\_thread} and be finalized by -a call to \FUNC{shmem\_finalize}; the MPI environment of the program must be initialized -by a call to \FUNC{MPI\_Init} or \FUNC{MPI\_Init\_thread} and be finalized by a -call to \FUNC{MPI\_Finalize}. - -\apiimpnotes{ -Portable implementations of OpenSHMEM and MPI must ensure that the initialization -calls can be made in an arbitrary order within a program; the same rule also -applies to the finalization calls. A software runtime that utilizes a shared -communication resource for \openshmem and MPI communication may maintain an -internal reference counter in order to ensure that the shared resource is -initialized only once and thus no shared resource is released until the last -finalization call is made. -} - - -\subsection{Dynamic Process Creation and MPMD Programming} -\label{subsec:interoperability:mpmd} - -MPI defines a dynamic process model that allows creation of processes after -an MPI application has started (e.g., by calling \FUNC{MPI\_Comm\_spawn}) and -connection to independent processes (e.g., through \FUNC{MPI\_Comm\_accept} -and \FUNC{MPI\_Comm\_connect}) -and provides a mechanism to establish communication -between the newly created processes and the existing MPI application (see -MPI standard version 3.1, Chapter 10). -Unlike MPI, \openshmem starts all processes at once and requires all PEs to -collectively allocate and initialize resources (e.g., symmetric heap) used by -the \openshmem library before any other \openshmem routine may -be called. Communicating with a dynamically created process in the \openshmem -environment may result in undefined behavior. -Hence, users should not use \openshmem and MPI dynamic process model -in the same program. - - -\subsection{Thread Safety} -\label{subsec:interoperability:thread} -Both \openshmem and MPI define the interaction with user threads in a program -with routines that can be used for initializing and querying the thread -environment. In a hybrid program, the user may request different thread levels -at the initialization calls of \openshmem and MPI environments; however, the -returned support level provided by the \openshmem library might be different -from that returned in an \openshmem-only program. For instance, the former -initialization call in a hybrid program may initialize a resource with the -user-requested thread level, but the supported level cannot be updated by the latter -initialization call if the underlying software runtime of \openshmem and MPI -share the same internal communication resource. -The program should always check the \VAR{provided} thread level returned -at the corresponding initialization call or query the level of thread support -after initialization to portably ensure thread support in each communication -environment. - -Both \openshmem and MPI define similar thread levels, namely, \VAR{THREAD\_SINGLE}, -\VAR{THREAD\_FUNNELED}, \VAR{THREAD\_SERIALIZED}, and \VAR{THREAD\_MULTIPLE}. -When requesting threading support in a hybrid program, however, -users should follow additional rules as described below. - -\begin{itemize} - \item The \VAR{THREAD\_SINGLE} thread level requires a single-threaded program. - Hence, users should not request \VAR{THREAD\_SINGLE} at the initialization - call of either \openshmem or MPI but request a different thread level at the - initialization call of the other model in the same program. - - \item The \VAR{THREAD\_FUNNELED} thread level allows only the main thread to - make communication calls. A hybrid program using the \VAR{THREAD\_FUNNELED} - thread level in both \openshmem and MPI should ensure the same main thread - is used in both communication environments. - - \item The \VAR{THREAD\_SERIALIZED} thread level requires the program to ensure - communication calls are not made concurrently by multiple threads. A hybrid - program should ensure serialized calls to both \openshmem and MPI libraries, - if the program uses \VAR{THREAD\_SERIALIZED} in one communication environment - and \VAR{THREAD\_SERIALIZED} or \VAR{THREAD\_FUNNELED} in the other one. -\end{itemize} - -\subsection{Mapping Process Identification Numbers} -\label{subsec:interoperability:id} - -Similar to the PE identifier in \openshmem, MPI defines rank as the -identification number of a process in a communicator. Both \openshmem PE -and MPI rank are unique integers assigned from zero to one less than the total -number of processes. In a hybrid program, the \openshmem -PE and the MPI rank in \VAR{MPI\_COMM\_WORLD} of a process can be equal. -This feature, however, may be provided by only some of the \openshmem and MPI -implementations (e.g., if both environments share the same underlying process -manager) and is not portably guaranteed. A portable program should always -use the standard functions in each model, namely, \FUNC{shmem\_my\_pe} in \openshmem -and \FUNC{MPI\_Comm\_rank} in MPI, to query the process identification numbers -in each communication environment and manage the mapping of identifiers in the -program when necessary. - -\subsubsection{Example} -\label{subsubsec:interoperability:id:example} -The following example demonstrates how to manage the mapping between \openshmem -PE identifier and MPI ranks in \VAR{MPI\_COMM\_WORLD} in a hybrid \openshmem -and MPI program. - -\lstinputlisting[language={C}, tabsize=2, - basicstyle=\ttfamily\footnotesize] - {example_code/hybrid_mpi_mapping_id.c} - -\subsection{RMA Programming Models} -\label{subsec:interoperability:rma} - -Both \openshmem and MPI define similar RMA and atomic operations for remote memory -access, however, a portable program should not assume interoperability between these -two RMA models. -For instance, \openshmem guarantees the atomicity only of concurrent \openshmem AMO operations -that operate on symmetric data with the same datatype. Access to the same symmetric -object with MPI atomic operations, such as an \FUNC{MPI\_Fetch\_and\_op}, may -result in an undefined result. Furthermore, -because most RMA programs can be written using either \openshmem or MPI RMA, -users should choose only one of the RMA models in the same program, whenever -possible, for performance and code simplicity. - -\subsection{Communication Progress} -\label{subsec:interoperability:progress} - -\openshmem promises the progression of communication both with and without -\openshmem calls and requires the software progress mechanism in the implementation -(e.g., a progress thread) when the hardware does not provide asynchronous communication -capabilities. In MPI, however, a weak progress semantics is applied. That is, -an MPI communication call is guaranteed only to complete in finite time. For -instance, an \FUNC{MPI\_Put} may be completed only when the remote process makes an MPI -call that internally triggers the progress of MPI, if the underlying hardware -does not support asynchronous communication. A hybrid program -should not assume that the \openshmem library also makes progress for MPI. -A call to \FUNC{shmem\_query\_interoperability} with the \VAR{SHMEM\_PROGRESS\_MPI} -property (see definition in \ref{subsec:interoperability:query}) -can be used to portably check whether the implementation provides asynchronous -progression also for MPI. If it is not provided, the user program may have to -explicitly manage the asynchronous communication in MPI in -order to prevent any deadlock or performance degradation. - -\apiimpnotes{ -Implementations that provide both \openshmem and MPI interfaces should try -to ensure progress for both models when necessary and possible, for performance -reasons. For instance, an implementation may start making progress for -both \openshmem and MPI whenever possible, after the user program has called -\FUNC{shmem\_init} and \FUNC{MPI\_init} provided by the same system. -} - - -\section{Query Interoperability} - -A hybrid user program can query the interoperability feature of an \openshmem -implementation in order to avoid unnecessary overhead and programming complexity. -For instance, the user program can eliminate manual progress polling for MPI -communication if the underlying software runtime guarantees the progression of -communication also for MPI even without explicit function calls. - -\subsection{\textbf{SHMEM\_QUERY\_INTEROPERABILITY}} -\label{subsec:interoperability:query} -\input{content/shmem_query_interoperability} - +\input{content/interoperability} \color{black} \chapter{History of OpenSHMEM}\label{sec:openshmem_history} diff --git a/content/interoperability.tex b/content/interoperability.tex new file mode 100644 index 000000000..7257537ab --- /dev/null +++ b/content/interoperability.tex @@ -0,0 +1,186 @@ +\chapter{Interoperability with other Programming Models}\label{sec:interoperability} + +OpenSHMEM routines may be used in conjunction with the routines of other +communication libraries or parallel languages in the same program. This section +describes the interoperability with other programming models, including +clarification of undefined behaviors caused by mixed use of different models, +advice to \openshmem library users and developers that may improve the portability +and performance of hybrid programs, and definition of an OpenSHMEM +API that queries the interoperability features provided by an \openshmem library. + + +\section{MPI Interoperability} + +\openshmem and MPI are two commonly used parallel programming models for +distributed-memory systems. The user can choose to utilize both models in the same program +to efficiently and easily support various communication patterns. + +A vendor may implement the \openshmem and MPI libraries in different ways. For +instance, one may implement both \openshmem and MPI as standalone libraries, +each of which allocates and initializes fully isolated communication +resources. +As the other common approach, however, +a vendor may implement both \openshmem and MPI interfaces within the +same software system in order to share a communication resource when possible. + +To improve interoperability and portability in \openshmem + MPI hybrid +programming, we clarify the relevant semantics in the following subsections. + + +\subsection{Initialization} +To ensure that a hybrid program can be portably performed with different vendor +implementations, the \openshmem environment of the program must be initialized by +a call to \FUNC{shmem\_init} or \FUNC{shmem\_init\_thread} and be finalized by +a call to \FUNC{shmem\_finalize}; the MPI environment of the program must be initialized +by a call to \FUNC{MPI\_Init} or \FUNC{MPI\_Init\_thread} and be finalized by a +call to \FUNC{MPI\_Finalize}. + +\apiimpnotes{ +Portable implementations of OpenSHMEM and MPI must ensure that the initialization +calls can be made in an arbitrary order within a program; the same rule also +applies to the finalization calls. A software runtime that utilizes a shared +communication resource for \openshmem and MPI communication may maintain an +internal reference counter in order to ensure that the shared resource is +initialized only once and thus no shared resource is released until the last +finalization call is made. +} + + +\subsection{Dynamic Process Creation and MPMD Programming} +\label{subsec:interoperability:mpmd} + +MPI defines a dynamic process model that allows creation of processes after +an MPI application has started (e.g., by calling \FUNC{MPI\_Comm\_spawn}) and +connection to independent processes (e.g., through \FUNC{MPI\_Comm\_accept} +and \FUNC{MPI\_Comm\_connect}) +and provides a mechanism to establish communication +between the newly created processes and the existing MPI application (see +MPI standard version 3.1, Chapter 10). +Unlike MPI, \openshmem starts all processes at once and requires all PEs to +collectively allocate and initialize resources (e.g., symmetric heap) used by +the \openshmem library before any other \openshmem routine may +be called. Communicating with a dynamically created process in the \openshmem +environment may result in undefined behavior. +Hence, users should not use \openshmem and MPI dynamic process model +in the same program. + + +\subsection{Thread Safety} +\label{subsec:interoperability:thread} +Both \openshmem and MPI define the interaction with user threads in a program +with routines that can be used for initializing and querying the thread +environment. In a hybrid program, the user may request different thread levels +at the initialization calls of \openshmem and MPI environments; however, the +returned support level provided by the \openshmem library might be different +from that returned in an \openshmem-only program. For instance, the former +initialization call in a hybrid program may initialize a resource with the +user-requested thread level, but the supported level cannot be updated by the latter +initialization call if the underlying software runtime of \openshmem and MPI +share the same internal communication resource. +The program should always check the \VAR{provided} thread level returned +at the corresponding initialization call or query the level of thread support +after initialization to portably ensure thread support in each communication +environment. + +Both \openshmem and MPI define similar thread levels, namely, \VAR{THREAD\_SINGLE}, +\VAR{THREAD\_FUNNELED}, \VAR{THREAD\_SERIALIZED}, and \VAR{THREAD\_MULTIPLE}. +When requesting threading support in a hybrid program, however, +users should follow additional rules as described below. + +\begin{itemize} + \item The \VAR{THREAD\_SINGLE} thread level requires a single-threaded program. + Hence, users should not request \VAR{THREAD\_SINGLE} at the initialization + call of either \openshmem or MPI but request a different thread level at the + initialization call of the other model in the same program. + + \item The \VAR{THREAD\_FUNNELED} thread level allows only the main thread to + make communication calls. A hybrid program using the \VAR{THREAD\_FUNNELED} + thread level in both \openshmem and MPI should ensure the same main thread + is used in both communication environments. + + \item The \VAR{THREAD\_SERIALIZED} thread level requires the program to ensure + communication calls are not made concurrently by multiple threads. A hybrid + program should ensure serialized calls to both \openshmem and MPI libraries, + if the program uses \VAR{THREAD\_SERIALIZED} in one communication environment + and \VAR{THREAD\_SERIALIZED} or \VAR{THREAD\_FUNNELED} in the other one. +\end{itemize} + +\subsection{Mapping Process Identification Numbers} +\label{subsec:interoperability:id} + +Similar to the PE identifier in \openshmem, MPI defines rank as the +identification number of a process in a communicator. Both \openshmem PE +and MPI rank are unique integers assigned from zero to one less than the total +number of processes. In a hybrid program, the \openshmem +PE and the MPI rank in \VAR{MPI\_COMM\_WORLD} of a process can be equal. +This feature, however, may be provided by only some of the \openshmem and MPI +implementations (e.g., if both environments share the same underlying process +manager) and is not portably guaranteed. A portable program should always +use the standard functions in each model, namely, \FUNC{shmem\_my\_pe} in \openshmem +and \FUNC{MPI\_Comm\_rank} in MPI, to query the process identification numbers +in each communication environment and manage the mapping of identifiers in the +program when necessary. + +\subsubsection{Example} +\label{subsubsec:interoperability:id:example} +The following example demonstrates how to manage the mapping between \openshmem +PE identifier and MPI ranks in \VAR{MPI\_COMM\_WORLD} in a hybrid \openshmem +and MPI program. + +\lstinputlisting[language={C}, tabsize=2, + basicstyle=\ttfamily\footnotesize] + {example_code/hybrid_mpi_mapping_id.c} + +\subsection{RMA Programming Models} +\label{subsec:interoperability:rma} + +Both \openshmem and MPI define similar RMA and atomic operations for remote memory +access, however, a portable program should not assume interoperability between these +two RMA models. +For instance, \openshmem guarantees the atomicity only of concurrent \openshmem AMO operations +that operate on symmetric data with the same datatype. Access to the same symmetric +object with MPI atomic operations, such as an \FUNC{MPI\_Fetch\_and\_op}, may +result in an undefined result. Furthermore, +because most RMA programs can be written using either \openshmem or MPI RMA, +users should choose only one of the RMA models in the same program, whenever +possible, for performance and code simplicity. + +\subsection{Communication Progress} +\label{subsec:interoperability:progress} + +\openshmem promises the progression of communication both with and without +\openshmem calls and requires the software progress mechanism in the implementation +(e.g., a progress thread) when the hardware does not provide asynchronous communication +capabilities. In MPI, however, a weak progress semantics is applied. That is, +an MPI communication call is guaranteed only to complete in finite time. For +instance, an \FUNC{MPI\_Put} may be completed only when the remote process makes an MPI +call that internally triggers the progress of MPI, if the underlying hardware +does not support asynchronous communication. A hybrid program +should not assume that the \openshmem library also makes progress for MPI. +A call to \FUNC{shmem\_query\_interoperability} with the \VAR{SHMEM\_PROGRESS\_MPI} +property (see definition in \ref{subsec:interoperability:query}) +can be used to portably check whether the implementation provides asynchronous +progression also for MPI. If it is not provided, the user program may have to +explicitly manage the asynchronous communication in MPI in +order to prevent any deadlock or performance degradation. + +\apiimpnotes{ +Implementations that provide both \openshmem and MPI interfaces should try +to ensure progress for both models when necessary and possible, for performance +reasons. For instance, an implementation may start making progress for +both \openshmem and MPI whenever possible, after the user program has called +\FUNC{shmem\_init} and \FUNC{MPI\_init} provided by the same system. +} + + +\section{Query Interoperability} + +A hybrid user program can query the interoperability feature of an \openshmem +implementation in order to avoid unnecessary overhead and programming complexity. +For instance, the user program can eliminate manual progress polling for MPI +communication if the underlying software runtime guarantees the progression of +communication also for MPI even without explicit function calls. + +\subsection{\textbf{SHMEM\_QUERY\_INTEROPERABILITY}} +\label{subsec:interoperability:query} +\input{content/shmem_query_interoperability} \ No newline at end of file From 6c2e7b6b82b55e4e4479a97db6ce329ea96c16a3 Mon Sep 17 00:00:00 2001 From: Min Si Date: Mon, 21 Oct 2019 15:08:38 -0500 Subject: [PATCH 311/319] interop/dynamic: delete MPMD in section title --- content/interoperability.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/interoperability.tex b/content/interoperability.tex index 7257537ab..01d2ba9eb 100644 --- a/content/interoperability.tex +++ b/content/interoperability.tex @@ -46,7 +46,7 @@ \subsection{Initialization} } -\subsection{Dynamic Process Creation and MPMD Programming} +\subsection{Dynamic Process Creation} \label{subsec:interoperability:mpmd} MPI defines a dynamic process model that allows creation of processes after From 9cb51e1dddbe5a4fe1f967275160f09ec9957da5 Mon Sep 17 00:00:00 2001 From: Min Si Date: Mon, 21 Oct 2019 17:27:12 -0500 Subject: [PATCH 312/319] interop/threads: adjust text based on f2f meeting feedback Adjust the text to address two issues: 1. It is recommendation to users but not requirement because such constraints are valid only when the implementation provides both models. 2. The additional rule for THREAD_SERIALIZED is misleading. --- content/interoperability.tex | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/content/interoperability.tex b/content/interoperability.tex index 01d2ba9eb..a516328d1 100644 --- a/content/interoperability.tex +++ b/content/interoperability.tex @@ -85,7 +85,10 @@ \subsection{Thread Safety} Both \openshmem and MPI define similar thread levels, namely, \VAR{THREAD\_SINGLE}, \VAR{THREAD\_FUNNELED}, \VAR{THREAD\_SERIALIZED}, and \VAR{THREAD\_MULTIPLE}. When requesting threading support in a hybrid program, however, -users should follow additional rules as described below. +the following additional rules are applied if the implementations of \openshmem +and MPI share the same internal communication resource. +Users are strongly advised to always follow these rules to ensure program +portability. \begin{itemize} \item The \VAR{THREAD\_SINGLE} thread level requires a single-threaded program. @@ -99,10 +102,11 @@ \subsection{Thread Safety} is used in both communication environments. \item The \VAR{THREAD\_SERIALIZED} thread level requires the program to ensure - communication calls are not made concurrently by multiple threads. A hybrid - program should ensure serialized calls to both \openshmem and MPI libraries, - if the program uses \VAR{THREAD\_SERIALIZED} in one communication environment - and \VAR{THREAD\_SERIALIZED} or \VAR{THREAD\_FUNNELED} in the other one. + communication calls are not made concurrently by multiple threads. If a + hybrid program uses \VAR{THREAD\_SERIALIZED} in one communication environment + and \VAR{THREAD\_SERIALIZED} or \VAR{THREAD\_FUNNELED} in the other one, it + should also guarantee that the \openshmem and MPI calls are not made concurrently + from two distinct threads. \end{itemize} \subsection{Mapping Process Identification Numbers} From 683f42373b32ab763460404aa8aee9ad975a65e1 Mon Sep 17 00:00:00 2001 From: Min Si Date: Mon, 21 Oct 2019 17:51:02 -0500 Subject: [PATCH 313/319] interop/id: fix example --- example_code/hybrid_mpi_mapping_id.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/example_code/hybrid_mpi_mapping_id.c b/example_code/hybrid_mpi_mapping_id.c index c72168d6e..1e30b3879 100644 --- a/example_code/hybrid_mpi_mapping_id.c +++ b/example_code/hybrid_mpi_mapping_id.c @@ -1,5 +1,4 @@ #include -#include #include #include @@ -20,7 +19,6 @@ int main(int argc, char *argv[]) int *mpi_ranks = shmem_calloc(npes, sizeof(int)); - shmem_sync_all(); shmem_collect32(mpi_ranks, &myrank, 1, 0, 0, npes, pSync); if (mype == 0) From a21c85502cb022fbf98ffc806811cd49f9a9598f Mon Sep 17 00:00:00 2001 From: Min Si Date: Mon, 21 Oct 2019 17:53:29 -0500 Subject: [PATCH 314/319] interop/rma: adjust text based on f2f meeting feedback --- content/interoperability.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/interoperability.tex b/content/interoperability.tex index a516328d1..9ae69062c 100644 --- a/content/interoperability.tex +++ b/content/interoperability.tex @@ -138,9 +138,9 @@ \subsubsection{Example} \subsection{RMA Programming Models} \label{subsec:interoperability:rma} -Both \openshmem and MPI define similar RMA and atomic operations for remote memory -access, however, a portable program should not assume interoperability between these -two RMA models. +\openshmem and MPI each defines similar one-sided communication models, +however, a portable program should not assume interoperability between these +models. For instance, \openshmem guarantees the atomicity only of concurrent \openshmem AMO operations that operate on symmetric data with the same datatype. Access to the same symmetric object with MPI atomic operations, such as an \FUNC{MPI\_Fetch\_and\_op}, may From 09a6b841512b86c5735c210ffeadd8895180c35f Mon Sep 17 00:00:00 2001 From: Min Si Date: Mon, 21 Oct 2019 17:55:57 -0500 Subject: [PATCH 315/319] interop/query: delete note to implementors --- content/shmem_query_interoperability.tex | 5 ----- 1 file changed, 5 deletions(-) diff --git a/content/shmem_query_interoperability.tex b/content/shmem_query_interoperability.tex index 8af1e26ca..df5d977b6 100644 --- a/content/shmem_query_interoperability.tex +++ b/content/shmem_query_interoperability.tex @@ -32,8 +32,3 @@ otherwise, it is \CONST{0}. } \end{apidefinition} - -\apiimpnotes{ -Implementations that do not support interoperability with other programming models -may simply return \CONST{0} for the relevant interoperability query. -} From 0b801f5e3d0c7f9da0dcf5d6a4743c5b29f074b8 Mon Sep 17 00:00:00 2001 From: Min Si Date: Mon, 21 Oct 2019 23:58:09 -0500 Subject: [PATCH 316/319] interop/progress: adjust note to implementor --- content/interoperability.tex | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/content/interoperability.tex b/content/interoperability.tex index 9ae69062c..a1c55a3a4 100644 --- a/content/interoperability.tex +++ b/content/interoperability.tex @@ -170,10 +170,11 @@ \subsection{Communication Progress} \apiimpnotes{ Implementations that provide both \openshmem and MPI interfaces should try -to ensure progress for both models when necessary and possible, for performance -reasons. For instance, an implementation may start making progress for -both \openshmem and MPI whenever possible, after the user program has called -\FUNC{shmem\_init} and \FUNC{MPI\_init} provided by the same system. +to ensure progress for both models, when necessary and possible, for performance +reasons. For instance, an implementation +may utilize a software progress thread to process any software-handled +communication requests, after the user program has called +\FUNC{shmem\_init} and \FUNC{MPI\_Init} provided by the same system. } From f8eebf676660f7fe098720c4fcbd793fbce6cad0 Mon Sep 17 00:00:00 2001 From: Min Si Date: Mon, 21 Oct 2019 23:59:07 -0500 Subject: [PATCH 317/319] interop/query: shorten overview example --- content/interoperability.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/interoperability.tex b/content/interoperability.tex index a1c55a3a4..0fcc78b0a 100644 --- a/content/interoperability.tex +++ b/content/interoperability.tex @@ -183,8 +183,8 @@ \section{Query Interoperability} A hybrid user program can query the interoperability feature of an \openshmem implementation in order to avoid unnecessary overhead and programming complexity. For instance, the user program can eliminate manual progress polling for MPI -communication if the underlying software runtime guarantees the progression of -communication also for MPI even without explicit function calls. +communication if the \openshmem implementation guarantees asynchronous +communication also for MPI. \subsection{\textbf{SHMEM\_QUERY\_INTEROPERABILITY}} \label{subsec:interoperability:query} From b937c0a1f44a05cdf25437e78ace5d02e4a05040 Mon Sep 17 00:00:00 2001 From: Min Si Date: Mon, 21 Oct 2019 23:59:51 -0500 Subject: [PATCH 318/319] interop/query: add example with MPI progress support --- content/shmem_query_interoperability.tex | 13 +++++++++ example_code/shmem_query_mpi_progress.c | 34 ++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 example_code/shmem_query_mpi_progress.c diff --git a/content/shmem_query_interoperability.tex b/content/shmem_query_interoperability.tex index df5d977b6..a656f2497 100644 --- a/content/shmem_query_interoperability.tex +++ b/content/shmem_query_interoperability.tex @@ -31,4 +31,17 @@ The return value is \CONST{1} if \VAR{property} is supported by the \openshmem library; otherwise, it is \CONST{0}. } + +\begin{apiexamples} + +\apicexample + {The following example queries whether the \openshmem library supports asynchronous +progress for MPI. If it returns 1, the library guarantees the MPI nonblocking send +is processed while PE 0 is in the busy wait loop with repeated calls to +\FUNC{shmem\_int\_atomic\_fetch} so that deadlock will not occur.} + {./example_code/shmem_query_mpi_progress.c} + {} + +\end{apiexamples} + \end{apidefinition} diff --git a/example_code/shmem_query_mpi_progress.c b/example_code/shmem_query_mpi_progress.c new file mode 100644 index 000000000..063c320f7 --- /dev/null +++ b/example_code/shmem_query_mpi_progress.c @@ -0,0 +1,34 @@ +#include +#include +#include + +int main(int argc, char *argv[]) +{ + MPI_Init(&argc, &argv); + shmem_init(); + + int mype = shmem_my_pe(); + + if (!shmem_query_interoperability(SHMEM_PROGRESS_MPI)) + shmem_global_exit(EXIT_FAILURE); + + int a[100]; + static int b = 0; + if (mype == 0) { + MPI_Request req = MPI_REQUEST_NULL; + MPI_Isend(a, 100, MPI_INT, 1, 0, MPI_COMM_WORLD, &req); + + while (shmem_int_atomic_fetch(&b, 0) != 1); + + MPI_Wait(req, MPI_STATUS_IGNORE); + } else { + MPI_Recv(a, 100, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + shmem_int_atomic_set(&b, 1, 0); + } + + shmem_finalize(); + MPI_Finalize(); + + return 0; +} From c4da312bc3678df0e9452e8793e31a9d7f171b3a Mon Sep 17 00:00:00 2001 From: Gail Pieper Date: Tue, 22 Oct 2019 18:30:14 +0000 Subject: [PATCH 319/319] interop: made a pass by English editor --- content/interoperability.tex | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/content/interoperability.tex b/content/interoperability.tex index 0fcc78b0a..1ce88945d 100644 --- a/content/interoperability.tex +++ b/content/interoperability.tex @@ -1,4 +1,4 @@ -\chapter{Interoperability with other Programming Models}\label{sec:interoperability} +\chapter{Interoperability with Other Programming Models}\label{sec:interoperability} OpenSHMEM routines may be used in conjunction with the routines of other communication libraries or parallel languages in the same program. This section @@ -19,8 +19,8 @@ \section{MPI Interoperability} instance, one may implement both \openshmem and MPI as standalone libraries, each of which allocates and initializes fully isolated communication resources. -As the other common approach, however, -a vendor may implement both \openshmem and MPI interfaces within the +Another common approach +is to implement both \openshmem and MPI interfaces within the same software system in order to share a communication resource when possible. To improve interoperability and portability in \openshmem + MPI hybrid @@ -28,7 +28,7 @@ \section{MPI Interoperability} \subsection{Initialization} -To ensure that a hybrid program can be portably performed with different vendor +In order to ensure that a hybrid program can be portably performed with different vendor implementations, the \openshmem environment of the program must be initialized by a call to \FUNC{shmem\_init} or \FUNC{shmem\_init\_thread} and be finalized by a call to \FUNC{shmem\_finalize}; the MPI environment of the program must be initialized @@ -61,7 +61,7 @@ \subsection{Dynamic Process Creation} the \openshmem library before any other \openshmem routine may be called. Communicating with a dynamically created process in the \openshmem environment may result in undefined behavior. -Hence, users should not use \openshmem and MPI dynamic process model +Hence, users should not use \openshmem and MPI dynamic process models in the same program. @@ -98,11 +98,11 @@ \subsection{Thread Safety} \item The \VAR{THREAD\_FUNNELED} thread level allows only the main thread to make communication calls. A hybrid program using the \VAR{THREAD\_FUNNELED} - thread level in both \openshmem and MPI should ensure the same main thread + thread level in both \openshmem and MPI should ensure that the same main thread is used in both communication environments. \item The \VAR{THREAD\_SERIALIZED} thread level requires the program to ensure - communication calls are not made concurrently by multiple threads. If a + that communication calls are not made concurrently by multiple threads. If a hybrid program uses \VAR{THREAD\_SERIALIZED} in one communication environment and \VAR{THREAD\_SERIALIZED} or \VAR{THREAD\_FUNNELED} in the other one, it should also guarantee that the \openshmem and MPI calls are not made concurrently @@ -113,8 +113,8 @@ \subsection{Mapping Process Identification Numbers} \label{subsec:interoperability:id} Similar to the PE identifier in \openshmem, MPI defines rank as the -identification number of a process in a communicator. Both \openshmem PE -and MPI rank are unique integers assigned from zero to one less than the total +identification number of a process in a communicator. Both the \openshmem PE +and the MPI rank are unique integers assigned from zero to one less than the total number of processes. In a hybrid program, the \openshmem PE and the MPI rank in \VAR{MPI\_COMM\_WORLD} of a process can be equal. This feature, however, may be provided by only some of the \openshmem and MPI @@ -125,7 +125,7 @@ \subsection{Mapping Process Identification Numbers} in each communication environment and manage the mapping of identifiers in the program when necessary. -\subsubsection{Example} +\subsubsection*{Example} \label{subsubsec:interoperability:id:example} The following example demonstrates how to manage the mapping between \openshmem PE identifier and MPI ranks in \VAR{MPI\_COMM\_WORLD} in a hybrid \openshmem @@ -138,14 +138,14 @@ \subsubsection{Example} \subsection{RMA Programming Models} \label{subsec:interoperability:rma} -\openshmem and MPI each defines similar one-sided communication models, +\openshmem and MPI each define similar one-sided communication models; however, a portable program should not assume interoperability between these models. For instance, \openshmem guarantees the atomicity only of concurrent \openshmem AMO operations that operate on symmetric data with the same datatype. Access to the same symmetric object with MPI atomic operations, such as an \FUNC{MPI\_Fetch\_and\_op}, may result in an undefined result. Furthermore, -because most RMA programs can be written using either \openshmem or MPI RMA, +because most RMA programs can be written by using either \openshmem or MPI RMA, users should choose only one of the RMA models in the same program, whenever possible, for performance and code simplicity.