From 23874333484b2f8d24fa1d36c372c745f1f71dc5 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 14 Oct 2021 16:07:35 -0400 Subject: [PATCH 01/30] Add 1st draft of bundle start/stop API & example --- content/bundles_intro.tex | 23 +++++++++++++++++ content/shmem_bundle_start.tex | 35 +++++++++++++++++++++++++ content/shmem_bundle_stop.tex | 40 +++++++++++++++++++++++++++++ example_code/shmem_bundle_example.c | 29 +++++++++++++++++++++ main_spec.tex | 15 +++++++++++ 5 files changed, 142 insertions(+) create mode 100644 content/bundles_intro.tex create mode 100644 content/shmem_bundle_start.tex create mode 100644 content/shmem_bundle_stop.tex create mode 100644 example_code/shmem_bundle_example.c diff --git a/content/bundles_intro.tex b/content/bundles_intro.tex new file mode 100644 index 00000000..b36d5572 --- /dev/null +++ b/content/bundles_intro.tex @@ -0,0 +1,23 @@ +\newtext{The performance of \openshmem programs that issue many consecutive and +small-sized communication routines might be improved by combining these +operations into fewer messages. +The \emph{bundling routines} provide a convenient interface for indicating to +the \openshmem library that a series of operations on a communication context +are eligible for bundling-related optimization. +The \FUNC{shmem\_bundle\_start} routine indicates the beginning of a bundling +phase, and the \FUNC{shmem\_bundle\_stop} routine indicates the end of a +bundling phase.} + +\newtext{The bundling routines are \textit{hints} to the \openshmem library, +and they do not affect the completion or ordering semantics of any \openshmem +routines in the program. +For this reason, routines such as non-blocking RMAs, non-blocking AMOs, +non-blocking \OPR{put-with-signal}, blocking scalar \OPR{puts}, and blocking +non-fetching AMOs are viable candidates for bundling optimizations. +Other routines, such as blocking non-scalar \OPR{puts} and \OPR{gets}, blocking +fetching AMOs, blocking scalar \OPR{gets}, and the memory ordering routines +might require the library to enforce remote completion, reducing the potential +benefit of bundling optimizations. +Because bundling is performed with respect to an \openshmem communication +context, routines not performed on a communication context are ineligible for +bundling optimization.} diff --git a/content/shmem_bundle_start.tex b/content/shmem_bundle_start.tex new file mode 100644 index 00000000..8bbd0ed6 --- /dev/null +++ b/content/shmem_bundle_start.tex @@ -0,0 +1,35 @@ +\apisummary{ + Start a communication bundle. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_bundle\_start}@(void); +void @\FuncDecl{shmem\_ctx\_bundle\_start}@(shmem_ctx_t ctx); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the optimization. When this argument is not provided, the + optimization is performed on the default context.} +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_bundle\_start} routine provides a hint to the \openshmem + library to being applying bundling-related optimizations. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + The \FUNC{shmem\_bundle\_start} routine is a hint for improving + performance, and \openshmem implementations are not required to apply any + optimization. +} + +\end{apidefinition} + + diff --git a/content/shmem_bundle_stop.tex b/content/shmem_bundle_stop.tex new file mode 100644 index 00000000..6b981268 --- /dev/null +++ b/content/shmem_bundle_stop.tex @@ -0,0 +1,40 @@ +\apisummary{ + Stop a communication bundle. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_bundle\_stop}@(void); +void @\FuncDecl{shmem\_ctx\_bundle\_stop}@(shmem_ctx_t ctx); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the optimization. When this argument is not provided, the + optimization is performed on the default context.} +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_bundle\_stop} routine provides a hint to the \openshmem + library to stop applying bundling-related optimizations. +} + +\apireturnvalues{ + None. +} + +\begin{apiexamples} + +\apicexample + {The following example demonstrates the usage of + \FUNC{shmem\_bundle\_start} and \FUNC{shmem\_bundle\_stop} with a loop of + random atomic non-fetching XOR updates to a distributed table, similar to + the Giga-updates per second (GUPS) microbenchmark + \footnote{http://icl.cs.utk.edu/projectsfiles/hpcc/RandomAccess/}.} + {./example_code/shmem_bundle_example.c} + {} +\end{apiexamples} + +\end{apidefinition} + diff --git a/example_code/shmem_bundle_example.c b/example_code/shmem_bundle_example.c new file mode 100644 index 00000000..70f39d5f --- /dev/null +++ b/example_code/shmem_bundle_example.c @@ -0,0 +1,29 @@ +#include +#include + +#define N_UPDATES (1 << 20) +#define N_INDICES (1 << 10) +#define N_VALUES (1 << 31) + +int main(void) { + + shmem_init(); + + uint64_t *table = shmem_malloc(N_INDICES * sizeof(uint64_t)); + + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); + + shmem_bundle_start(); + for (size_t i = 0; i < N_UPDATES; i++) { + int random_pe = rand() % npes; + size_t random_idx = rand() % N_INDICES; + uint64_t random_val = rand() % N_VALUES; + shmem_uint64_atomic_xor(&table[random_idx], random_val, random_pe); + } + shmem_bundle_stop(); + + shmem_free(table); + shmem_finalize(); + return 0; +} diff --git a/main_spec.tex b/main_spec.tex index 19b7200f..982f814b 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -352,6 +352,21 @@ \subsubsection{\textbf{SHMEM\_SIGNAL\_FETCH}}\label{subsec:shmem_signal_fetch} \input{content/shmem_signal_fetch.tex} +\newtext{ +\subsection{Bundling Routines}\label{subsec:bundle} +\input{content/bundles_intro.tex} +} + +\newtext{ +\subsubsection{\textbf{SHMEM\_BUNDLE\_START}}\label{subsec:shmem_bundle_start} +\input{content/shmem_bundle_start.tex} +} + +\newtext{ +\subsubsection{\textbf{SHMEM\_BUNDLE\_STOP}}\label{subsec:shmem_bundle_stop} +\input{content/shmem_bundle_stop.tex} +} + \subsection{Collective Routines}\label{subsec:coll} \input{content/collective_intro.tex} From 352f20f45898ed6aae6a9650f22d09bb0bf2b30b Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 15 Oct 2021 10:49:15 -0400 Subject: [PATCH 02/30] Added change-log entry for the bundle routines --- content/backmatter.tex | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/content/backmatter.tex b/content/backmatter.tex index 755d4bc0..0a58a124 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -668,6 +668,11 @@ \section{Version 1.6} \openshmem[1.5] Table 10, and clarified the types, names, and supporting operations for team-based reductions. \ChangelogRef{teamreducetypes}% +% +\item Added the bundling routines, \FUNC{shmem\_bundle\_start} and + \FUNC{shmem\_bundle\_stop}, which hint to the \openshmem library to apply + bundling-related optimizations. +\ChangelogRef{subsec:bundle}% \end{itemize} \section{Version 1.5} From ca0303782bf05cc6cbe9b6b77422d162fb1dfc3c Mon Sep 17 00:00:00 2001 From: David Ozog Date: Mon, 13 Dec 2021 10:18:34 -0500 Subject: [PATCH 03/30] Branch from wip/bundles w/ feedback (highlighted) --- content/bundles_intro.tex | 26 +++++++++++++++----------- content/shmem_bundle_start.tex | 21 ++++++++++++++++++--- content/shmem_bundle_stop.tex | 25 ++++++++++++++++++++++--- main_spec.tex | 6 ------ utils/defs.tex | 1 + 5 files changed, 56 insertions(+), 23 deletions(-) diff --git a/content/bundles_intro.tex b/content/bundles_intro.tex index b36d5572..eefaea63 100644 --- a/content/bundles_intro.tex +++ b/content/bundles_intro.tex @@ -1,15 +1,19 @@ -\newtext{The performance of \openshmem programs that issue many consecutive and -small-sized communication routines might be improved by combining these -operations into fewer messages. -The \emph{bundling routines} provide a convenient interface for indicating to -the \openshmem library that a series of operations on a communication context -are eligible for bundling-related optimization. +The performance of \openshmem programs that issue many +consecutive and small-sized communication routines might be +improved by combining these \oldtext{operations} +\newtext{routines} into fewer \newtext{operations} +\oldtext{messages}. +The \emph{bundling routines} provide a \newtext{mechanism for an application to +indicate} \oldtext{convenient interface for indicating} to the \openshmem +library that \newtext{it intends to issue} a series of operations on a +communication context \newtext{that} are \newtext{suitable} \oldtext{eligible} +for bundling\oldtext{-related} optimization. The \FUNC{shmem\_bundle\_start} routine indicates the beginning of a bundling phase, and the \FUNC{shmem\_bundle\_stop} routine indicates the end of a -bundling phase.} +bundling phase. -\newtext{The bundling routines are \textit{hints} to the \openshmem library, -and they do not affect the completion or ordering semantics of any \openshmem +The bundling routines are \textit{hints} to the \openshmem library\newtext{;}\oldtext{,} +\oldtext{and} they do not affect the completion or ordering semantics of any \openshmem routines in the program. For this reason, routines such as non-blocking RMAs, non-blocking AMOs, non-blocking \OPR{put-with-signal}, blocking scalar \OPR{puts}, and blocking @@ -19,5 +23,5 @@ might require the library to enforce remote completion, reducing the potential benefit of bundling optimizations. Because bundling is performed with respect to an \openshmem communication -context, routines not performed on a communication context are ineligible for -bundling optimization.} +context, routines not performed on a communication context \newtext{(like collective routines)} are ineligible for +bundling optimization. diff --git a/content/shmem_bundle_start.tex b/content/shmem_bundle_start.tex index 8bbd0ed6..57b659cb 100644 --- a/content/shmem_bundle_start.tex +++ b/content/shmem_bundle_start.tex @@ -5,19 +5,34 @@ \begin{apidefinition} \begin{Csynopsis} -void @\FuncDecl{shmem\_bundle\_start}@(void); -void @\FuncDecl{shmem\_ctx\_bundle\_start}@(shmem_ctx_t ctx); +void @\FuncDecl{shmem\_bundle\_start}@(long options); +void @\FuncDecl{shmem\_ctx\_bundle\_start}@(shmem_ctx_t ctx, long options); \end{Csynopsis} \begin{apiarguments} \apiargument{IN}{ctx}{A context handle specifying the context on which to perform the optimization. When this argument is not provided, the optimization is performed on the default context.} + \newtext{ + \apiargument{IN}{options}{The set of options requested for the + given bundle. Multiple options may be requested by combining + them with a bitwise OR operation; otherwise, \CONST{0} can be + given if no options are requested.} + } \end{apiarguments} \apidescription{ The \FUNC{shmem\_bundle\_start} routine provides a hint to the \openshmem - library to being applying bundling-related optimizations. + library to \newtext{begin a bundling phase} \oldtext{being applying + bundling-related optimizations}. + \newtext{ + If a bundling phase is already started on a given + communication context, another call to + \FUNC{shmem\_bundle\_start} on that context has no effect. + A bundling phase on a communication context must be + stopped with a call to \FUNC{shmem\_bundle\_stop} on the + same context. + } } \apireturnvalues{ diff --git a/content/shmem_bundle_stop.tex b/content/shmem_bundle_stop.tex index 6b981268..ad1dd410 100644 --- a/content/shmem_bundle_stop.tex +++ b/content/shmem_bundle_stop.tex @@ -5,25 +5,44 @@ \begin{apidefinition} \begin{Csynopsis} -void @\FuncDecl{shmem\_bundle\_stop}@(void); -void @\FuncDecl{shmem\_ctx\_bundle\_stop}@(shmem_ctx_t ctx); +void @\FuncDecl{shmem\_bundle\_stop}@(long options); +void @\FuncDecl{shmem\_ctx\_bundle\_stop}@(shmem_ctx_t ctx, long options); \end{Csynopsis} \begin{apiarguments} \apiargument{IN}{ctx}{A context handle specifying the context on which to perform the optimization. When this argument is not provided, the optimization is performed on the default context.} + \newtext{ + \apiargument{IN}{options}{The set of options requested for the + given bundle. Multiple options may be requested by combining + them with a bitwise OR operation; otherwise, \CONST{0} can be + given if no options are requested.} + } \end{apiarguments} \apidescription{ The \FUNC{shmem\_bundle\_stop} routine provides a hint to the \openshmem - library to stop applying bundling-related optimizations. + library to \newtext{end a bundling phase} \oldtext{stop applying bundling-related optimizations}. + \newtext{ + If a bundling phase is already stopped on a given + communication context, another call to + \FUNC{shmem\_bundle\_stop} on that context has no effect. + } } \apireturnvalues{ None. } +\newtext{ +\apinotes{ + We discourage users from including non-\openshmem code, such + as a long computation loop, within a bundling phase without + first calling \FUNC{shmem\_bundle\_stop}. +} +} + \begin{apiexamples} \apicexample diff --git a/main_spec.tex b/main_spec.tex index 982f814b..17a1f7d3 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -352,20 +352,14 @@ \subsubsection{\textbf{SHMEM\_SIGNAL\_FETCH}}\label{subsec:shmem_signal_fetch} \input{content/shmem_signal_fetch.tex} -\newtext{ \subsection{Bundling Routines}\label{subsec:bundle} \input{content/bundles_intro.tex} -} -\newtext{ \subsubsection{\textbf{SHMEM\_BUNDLE\_START}}\label{subsec:shmem_bundle_start} \input{content/shmem_bundle_start.tex} -} -\newtext{ \subsubsection{\textbf{SHMEM\_BUNDLE\_STOP}}\label{subsec:shmem_bundle_stop} \input{content/shmem_bundle_stop.tex} -} \subsection{Collective Routines}\label{subsec:coll} diff --git a/utils/defs.tex b/utils/defs.tex index 4496b6fe..7e89f1af 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -32,6 +32,7 @@ \def\colorswapot{\colorlet{saved}{.}\color{red}} \def\prevcolor{\color{saved}} +\newcommand{\newertext}[1]{\textcolor{blue}{#1}} \newcommand{\newtext}[1]{\textcolor{ForestGreen}{#1}} \newcommand{\oldtext}[1]{\textcolor{magenta}{\sout{#1}}} \newcommand{\insertDocVersion}{1.5} From 0b532e042e4f817668fafa88b65e68e06d635533 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 31 Mar 2022 12:50:37 -0400 Subject: [PATCH 04/30] WIP text / rearrangment for Sessions --- .../{bundles_intro.tex => sessions_intro.tex} | 19 +++++++++---------- main_spec.tex | 18 ++++++++---------- 2 files changed, 17 insertions(+), 20 deletions(-) rename content/{bundles_intro.tex => sessions_intro.tex} (64%) diff --git a/content/bundles_intro.tex b/content/sessions_intro.tex similarity index 64% rename from content/bundles_intro.tex rename to content/sessions_intro.tex index eefaea63..508888e1 100644 --- a/content/bundles_intro.tex +++ b/content/sessions_intro.tex @@ -1,13 +1,12 @@ -The performance of \openshmem programs that issue many -consecutive and small-sized communication routines might be -improved by combining these \oldtext{operations} -\newtext{routines} into fewer \newtext{operations} -\oldtext{messages}. -The \emph{bundling routines} provide a \newtext{mechanism for an application to -indicate} \oldtext{convenient interface for indicating} to the \openshmem -library that \newtext{it intends to issue} a series of operations on a -communication context \newtext{that} are \newtext{suitable} \oldtext{eligible} -for bundling\oldtext{-related} optimization. +\openSHMEM \emph{Sessions} provide a mechanism for applications to apply property to +the \openshmem library that a series of operations on a communication context. + + +The \emph{sessions routines} provide a mechanism for an application to +indicate to the \openshmem +library that it intends to issue a series of operations on a +communication context optimization. + The \FUNC{shmem\_bundle\_start} routine indicates the beginning of a bundling phase, and the \FUNC{shmem\_bundle\_stop} routine indicates the end of a bundling phase. diff --git a/main_spec.tex b/main_spec.tex index 17a1f7d3..19e8a7c5 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -144,6 +144,9 @@ \subsubsection{\textbf{SHMEM\_TEAM\_DESTROY}}\label{subsec:shmem_team_destroy} \subsection{Communication Management Routines}\label{sec:ctx} \input{content/context_intro.tex} +\subsubsection{Sessions for Communication Contexts}\label{subsec:sessions} +\input{content/sessions_intro.tex} + \subsubsection{\textbf{SHMEM\_CTX\_CREATE}} \label{subsec:shmem_ctx_create} \input{content/shmem_ctx_create.tex} @@ -160,6 +163,11 @@ \subsubsection{\textbf{SHMEM\_CTX\_GET\_TEAM}} \label{subsec:shmem_ctx_get_team} \input{content/shmem_ctx_get_team.tex} +\subsubsection{\textbf{SHMEM\_BUNDLE\_START}}\label{subsec:shmem_bundle_start} +\input{content/shmem_bundle_start.tex} + +\subsubsection{\textbf{SHMEM\_BUNDLE\_STOP}}\label{subsec:shmem_bundle_stop} +\input{content/shmem_bundle_stop.tex} \subsection{Remote Memory Access Routines}\label{sec:rma} \input{content/rma_intro.tex} @@ -352,16 +360,6 @@ \subsubsection{\textbf{SHMEM\_SIGNAL\_FETCH}}\label{subsec:shmem_signal_fetch} \input{content/shmem_signal_fetch.tex} -\subsection{Bundling Routines}\label{subsec:bundle} -\input{content/bundles_intro.tex} - -\subsubsection{\textbf{SHMEM\_BUNDLE\_START}}\label{subsec:shmem_bundle_start} -\input{content/shmem_bundle_start.tex} - -\subsubsection{\textbf{SHMEM\_BUNDLE\_STOP}}\label{subsec:shmem_bundle_stop} -\input{content/shmem_bundle_stop.tex} - - \subsection{Collective Routines}\label{subsec:coll} \input{content/collective_intro.tex} From 9d7b266f778ea4c9a8ffad4299656d59de157c73 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 23 Sep 2022 00:08:05 -0600 Subject: [PATCH 05/30] Sessions: rewrite bundles text to suit sessions --- content/sessions_intro.tex | 37 ++++---- content/shmem_bundle_start.tex | 50 ---------- content/shmem_bundle_stop.tex | 59 ------------ content/shmem_session_start.tex | 94 +++++++++++++++++++ content/shmem_session_stop.tex | 46 +++++++++ ...ndle_example.c => shmem_session_example.c} | 16 ++-- main_spec.tex | 20 ++-- 7 files changed, 179 insertions(+), 143 deletions(-) delete mode 100644 content/shmem_bundle_start.tex delete mode 100644 content/shmem_bundle_stop.tex create mode 100644 content/shmem_session_start.tex create mode 100644 content/shmem_session_stop.tex rename example_code/{shmem_bundle_example.c => shmem_session_example.c} (59%) diff --git a/content/sessions_intro.tex b/content/sessions_intro.tex index 508888e1..c769e1d9 100644 --- a/content/sessions_intro.tex +++ b/content/sessions_intro.tex @@ -1,26 +1,25 @@ -\openSHMEM \emph{Sessions} provide a mechanism for applications to apply property to -the \openshmem library that a series of operations on a communication context. +\openshmem \emph{sessions} provide a mechanism for applications to inform the +\openshmem library of an upcoming sequence of communication routines that +exhibit suitable patterns for runtime optimizations. +A session is associated with a specific \openshmem communication context +(Section~\ref{sec:ctx}), and it indicates the beginning and ending of +communication phases on that context. +The \FUNC{shmem\_session\_start} routine indicates the beginning of a session, +and the \FUNC{shmem\_session\_stop} routine indicates the end of a session. +The \LibConstRef{SHMEM\_SESSION\_*} options (Table~\ref{session_opts}) indicate +which patterns of \openshmem RMA and AMO routines will occur within a session. +These options serve only as \textit{hints} to the library; it is up to the +implementation whether or not to apply any optimizations within a session. - -The \emph{sessions routines} provide a mechanism for an application to -indicate to the \openshmem -library that it intends to issue a series of operations on a -communication context optimization. - -The \FUNC{shmem\_bundle\_start} routine indicates the beginning of a bundling -phase, and the \FUNC{shmem\_bundle\_stop} routine indicates the end of a -bundling phase. - -The bundling routines are \textit{hints} to the \openshmem library\newtext{;}\oldtext{,} -\oldtext{and} they do not affect the completion or ordering semantics of any \openshmem +Sessions do not affect the completion or ordering semantics of any \openshmem routines in the program. For this reason, routines such as non-blocking RMAs, non-blocking AMOs, non-blocking \OPR{put-with-signal}, blocking scalar \OPR{puts}, and blocking -non-fetching AMOs are viable candidates for bundling optimizations. +non-fetching AMOs are viable candidates for optimizations. Other routines, such as blocking non-scalar \OPR{puts} and \OPR{gets}, blocking fetching AMOs, blocking scalar \OPR{gets}, and the memory ordering routines might require the library to enforce remote completion, reducing the potential -benefit of bundling optimizations. -Because bundling is performed with respect to an \openshmem communication -context, routines not performed on a communication context \newtext{(like collective routines)} are ineligible for -bundling optimization. +benefit of session optimizations. +Because sessions are associated with an \openshmem communication context, +routines not performed on a communication context (like collective routines) +are ineligible for session hints. diff --git a/content/shmem_bundle_start.tex b/content/shmem_bundle_start.tex deleted file mode 100644 index 57b659cb..00000000 --- a/content/shmem_bundle_start.tex +++ /dev/null @@ -1,50 +0,0 @@ -\apisummary{ - Start a communication bundle. -} - -\begin{apidefinition} - -\begin{Csynopsis} -void @\FuncDecl{shmem\_bundle\_start}@(long options); -void @\FuncDecl{shmem\_ctx\_bundle\_start}@(shmem_ctx_t ctx, long options); -\end{Csynopsis} - -\begin{apiarguments} - \apiargument{IN}{ctx}{A context handle specifying the context on which to - perform the optimization. When this argument is not provided, the - optimization is performed on the default context.} - \newtext{ - \apiargument{IN}{options}{The set of options requested for the - given bundle. Multiple options may be requested by combining - them with a bitwise OR operation; otherwise, \CONST{0} can be - given if no options are requested.} - } -\end{apiarguments} - -\apidescription{ - The \FUNC{shmem\_bundle\_start} routine provides a hint to the \openshmem - library to \newtext{begin a bundling phase} \oldtext{being applying - bundling-related optimizations}. - \newtext{ - If a bundling phase is already started on a given - communication context, another call to - \FUNC{shmem\_bundle\_start} on that context has no effect. - A bundling phase on a communication context must be - stopped with a call to \FUNC{shmem\_bundle\_stop} on the - same context. - } -} - -\apireturnvalues{ - None. -} - -\apinotes{ - The \FUNC{shmem\_bundle\_start} routine is a hint for improving - performance, and \openshmem implementations are not required to apply any - optimization. -} - -\end{apidefinition} - - diff --git a/content/shmem_bundle_stop.tex b/content/shmem_bundle_stop.tex deleted file mode 100644 index ad1dd410..00000000 --- a/content/shmem_bundle_stop.tex +++ /dev/null @@ -1,59 +0,0 @@ -\apisummary{ - Stop a communication bundle. -} - -\begin{apidefinition} - -\begin{Csynopsis} -void @\FuncDecl{shmem\_bundle\_stop}@(long options); -void @\FuncDecl{shmem\_ctx\_bundle\_stop}@(shmem_ctx_t ctx, long options); -\end{Csynopsis} - -\begin{apiarguments} - \apiargument{IN}{ctx}{A context handle specifying the context on which to - perform the optimization. When this argument is not provided, the - optimization is performed on the default context.} - \newtext{ - \apiargument{IN}{options}{The set of options requested for the - given bundle. Multiple options may be requested by combining - them with a bitwise OR operation; otherwise, \CONST{0} can be - given if no options are requested.} - } -\end{apiarguments} - -\apidescription{ - The \FUNC{shmem\_bundle\_stop} routine provides a hint to the \openshmem - library to \newtext{end a bundling phase} \oldtext{stop applying bundling-related optimizations}. - \newtext{ - If a bundling phase is already stopped on a given - communication context, another call to - \FUNC{shmem\_bundle\_stop} on that context has no effect. - } -} - -\apireturnvalues{ - None. -} - -\newtext{ -\apinotes{ - We discourage users from including non-\openshmem code, such - as a long computation loop, within a bundling phase without - first calling \FUNC{shmem\_bundle\_stop}. -} -} - -\begin{apiexamples} - -\apicexample - {The following example demonstrates the usage of - \FUNC{shmem\_bundle\_start} and \FUNC{shmem\_bundle\_stop} with a loop of - random atomic non-fetching XOR updates to a distributed table, similar to - the Giga-updates per second (GUPS) microbenchmark - \footnote{http://icl.cs.utk.edu/projectsfiles/hpcc/RandomAccess/}.} - {./example_code/shmem_bundle_example.c} - {} -\end{apiexamples} - -\end{apidefinition} - diff --git a/content/shmem_session_start.tex b/content/shmem_session_start.tex new file mode 100644 index 00000000..df0ba7c2 --- /dev/null +++ b/content/shmem_session_start.tex @@ -0,0 +1,94 @@ +\apisummary{ + Start a communication session. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_session\_start}@(shmem_ctx_t ctx, long options); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{ctx}{A context handle specifying the context associated + with this session.} + \apiargument{IN}{options}{The set of requested options from + Table~\ref{session_opts} for this session. Multiple options may be + requested by combining them with a bitwise OR operation; otherwise, + \CONST{0} can be given if no options are requested.} +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_session\_start} routine begins a session on communication + context \VAR{ctx} with hints requested via \VAR{options}. + If a session is already started on a given context, another call to + \FUNC{shmem\_session\_start} on that context combines any new options via a + bitwise OR operation. + Sessions on a communication context must be stopped with a call to + \FUNC{shmem\_session\_stop} on the same context. + Passing false or ambiguous \VAR{options} to a session should not result in + undefined behavior, but may result in the library aborting the program. +} + +\apireturnvalues{ + None. +} + +\begin{longtable}{|p{0.45\textwidth}|p{0.5\textwidth}|} + \hline + \hline + \textbf{Option} & \textbf{Usage hint} + \tabularnewline \hline + \endhead + %% + \LibConstDecl{SHMEM\_SESSION\_OP\_PUT} & + \newline + The session will contain non-blocking \textit{put} and/or scalar put operations. + \tabularnewline \hline + + \LibConstDecl{SHMEM\_SESSION\_OP\_GET} & + \newline + The session will contain non-blocking \textit{get} operations. + \tabularnewline \hline + + \LibConstDecl{SHMEM\_SESSION\_OP\_PUT\_SIGNAL} & + \newline + The session will contain non-blocking \textit{put-with-signal} operations. + \tabularnewline \hline + + \LibConstDecl{SHMEM\_SESSION\_OP\_AMO} & + \newline + The session will contain non-fetching AMOs. + \tabularnewline \hline + + \LibConstDecl{SHMEM\_SESSION\_OP\_AMO\_FETCH} & + \newline + The session will contain non-blocking fetching AMOs. + \tabularnewline \hline + + \LibConstDecl{SHMEM\_SESSION\_CHAIN} & + \newline + The session will contain a chain (a trivial repeating pattern) of similar RMA operations. + \tabularnewline \hline + + \LibConstDecl{SHMEM\_SESSION\_UNIFORM\_AMO} & + \newline + The session will contain a chain of AMOs that will not occur concurrently + across any different signal operators (i.e.~\ref{subsec:signal_operator}), + operations (\ref{sec:amo}), or types (Tables \ref{stdamotypes} and + \ref{extamotypes}). + \tabularnewline \hline + \TableCaptionRef{Session options} + \label{session_opts} +\end{longtable} + +\apinotes{ + The \FUNC{shmem\_session\_start} routine is a hint for improving + performance, and \openshmem implementations are not required to apply any + optimization. + + Implementations are encouraged to supply users with information about the + session options being applied or ignored; for instance, when + \LibConstRef{SHMEM\_DEBUG} is set. +} + +\end{apidefinition} diff --git a/content/shmem_session_stop.tex b/content/shmem_session_stop.tex new file mode 100644 index 00000000..d10b007c --- /dev/null +++ b/content/shmem_session_stop.tex @@ -0,0 +1,46 @@ +\apisummary{ + Stop a communication session. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_session\_stop}@(shmem_ctx_t ctx); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{ctx}{A context handle specifying the context associated + with this session.} +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_session\_stop} routine ends a session on context \VAR{ctx}. + If a session is already stopped on a given communication context, another + call to \FUNC{shmem\_session\_stop} on that context has no effect. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + Users are discourage from including non-\openshmem code, such as a long + computation loop, within a session without first calling + \FUNC{shmem\_session\_stop}. +} + + +\begin{apiexamples} + +\apicexample + {The following example demonstrates the usage of + \FUNC{shmem\_session\_start} and \FUNC{shmem\_session\_stop} with a loop of + random atomic non-fetching XOR updates to a distributed table, similar to + the Giga-updates per second (GUPS) microbenchmark + \footnote{http://icl.cs.utk.edu/projectsfiles/hpcc/RandomAccess/}.} + {./example_code/shmem_session_example.c} + {} +\end{apiexamples} + +\end{apidefinition} + diff --git a/example_code/shmem_bundle_example.c b/example_code/shmem_session_example.c similarity index 59% rename from example_code/shmem_bundle_example.c rename to example_code/shmem_session_example.c index 70f39d5f..da5e1494 100644 --- a/example_code/shmem_bundle_example.c +++ b/example_code/shmem_session_example.c @@ -1,27 +1,31 @@ #include +#include #include -#define N_UPDATES (1 << 20) -#define N_INDICES (1 << 10) -#define N_VALUES (1 << 31) +#define N_UPDATES (1lu << 22) +#define N_INDICES (1lu << 10) +#define N_VALUES (1lu << 31) int main(void) { shmem_init(); - uint64_t *table = shmem_malloc(N_INDICES * sizeof(uint64_t)); + uint64_t *table = shmem_calloc(N_INDICES, sizeof(uint64_t)); int mype = shmem_my_pe(); int npes = shmem_n_pes(); + srand(mype); + + shmem_session_start(SHMEM_CTX_DEFAULT, SHMEM_SESSION_UNIFORM_AMO); - shmem_bundle_start(); for (size_t i = 0; i < N_UPDATES; i++) { int random_pe = rand() % npes; size_t random_idx = rand() % N_INDICES; uint64_t random_val = rand() % N_VALUES; shmem_uint64_atomic_xor(&table[random_idx], random_val, random_pe); } - shmem_bundle_stop(); + + shmem_session_stop(SHMEM_CTX_DEFAULT); shmem_free(table); shmem_finalize(); diff --git a/main_spec.tex b/main_spec.tex index 980898ce..73a570be 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -1,5 +1,6 @@ \documentclass[10pt,oneside]{book} + \input{utils/packages} \input{utils/defs} @@ -150,9 +151,6 @@ \subsubsection{\textbf{SHMEM\_TEAM\_DESTROY}}\label{subsec:shmem_team_destroy} \subsection{Communication Management Routines}\label{sec:ctx} \input{content/context_intro.tex} -\subsubsection{Sessions for Communication Contexts}\label{subsec:sessions} -\input{content/sessions_intro.tex} - \subsubsection{\textbf{SHMEM\_CTX\_CREATE}} \label{subsec:shmem_ctx_create} \input{content/shmem_ctx_create.tex} @@ -169,12 +167,6 @@ \subsubsection{\textbf{SHMEM\_CTX\_GET\_TEAM}} \label{subsec:shmem_ctx_get_team} \input{content/shmem_ctx_get_team.tex} -\subsubsection{\textbf{SHMEM\_BUNDLE\_START}}\label{subsec:shmem_bundle_start} -\input{content/shmem_bundle_start.tex} - -\subsubsection{\textbf{SHMEM\_BUNDLE\_STOP}}\label{subsec:shmem_bundle_stop} -\input{content/shmem_bundle_stop.tex} - \subsection{Remote Memory Access Routines}\label{sec:rma} \input{content/rma_intro.tex} @@ -366,6 +358,16 @@ \subsubsection{\textbf{SHMEM\_SIGNAL\_FETCH}}\label{subsec:shmem_signal_fetch} \input{content/shmem_signal_fetch.tex} +\subsection{Session Routines}\label{subsec:sessions} +\input{content/sessions_intro.tex} + +\subsubsection{\textbf{SHMEM\_SESSION\_START}}\label{subsec:shmem_session_start} +\input{content/shmem_session_start.tex} + +\subsubsection{\textbf{SHMEM\_SESSION\_STOP}}\label{subsec:shmem_session_stop} +\input{content/shmem_session_stop.tex} + + \subsection{Collective Routines}\label{subsec:coll} \input{content/collective_intro.tex} From ef0d9c69991bae230a18a56eeb7d769bb5354ce0 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 23 Sep 2022 00:08:05 -0600 Subject: [PATCH 06/30] Sessions: rewrite bundles text to suit sessions --- content/sessions_intro.tex | 37 ++++--- content/shmem_bundle_start.tex | 50 ---------- content/shmem_bundle_stop.tex | 59 ------------ content/shmem_session_start.tex | 96 +++++++++++++++++++ content/shmem_session_stop.tex | 46 +++++++++ ...ndle_example.c => shmem_session_example.c} | 16 ++-- main_spec.tex | 20 ++-- 7 files changed, 181 insertions(+), 143 deletions(-) delete mode 100644 content/shmem_bundle_start.tex delete mode 100644 content/shmem_bundle_stop.tex create mode 100644 content/shmem_session_start.tex create mode 100644 content/shmem_session_stop.tex rename example_code/{shmem_bundle_example.c => shmem_session_example.c} (59%) diff --git a/content/sessions_intro.tex b/content/sessions_intro.tex index 508888e1..c769e1d9 100644 --- a/content/sessions_intro.tex +++ b/content/sessions_intro.tex @@ -1,26 +1,25 @@ -\openSHMEM \emph{Sessions} provide a mechanism for applications to apply property to -the \openshmem library that a series of operations on a communication context. +\openshmem \emph{sessions} provide a mechanism for applications to inform the +\openshmem library of an upcoming sequence of communication routines that +exhibit suitable patterns for runtime optimizations. +A session is associated with a specific \openshmem communication context +(Section~\ref{sec:ctx}), and it indicates the beginning and ending of +communication phases on that context. +The \FUNC{shmem\_session\_start} routine indicates the beginning of a session, +and the \FUNC{shmem\_session\_stop} routine indicates the end of a session. +The \LibConstRef{SHMEM\_SESSION\_*} options (Table~\ref{session_opts}) indicate +which patterns of \openshmem RMA and AMO routines will occur within a session. +These options serve only as \textit{hints} to the library; it is up to the +implementation whether or not to apply any optimizations within a session. - -The \emph{sessions routines} provide a mechanism for an application to -indicate to the \openshmem -library that it intends to issue a series of operations on a -communication context optimization. - -The \FUNC{shmem\_bundle\_start} routine indicates the beginning of a bundling -phase, and the \FUNC{shmem\_bundle\_stop} routine indicates the end of a -bundling phase. - -The bundling routines are \textit{hints} to the \openshmem library\newtext{;}\oldtext{,} -\oldtext{and} they do not affect the completion or ordering semantics of any \openshmem +Sessions do not affect the completion or ordering semantics of any \openshmem routines in the program. For this reason, routines such as non-blocking RMAs, non-blocking AMOs, non-blocking \OPR{put-with-signal}, blocking scalar \OPR{puts}, and blocking -non-fetching AMOs are viable candidates for bundling optimizations. +non-fetching AMOs are viable candidates for optimizations. Other routines, such as blocking non-scalar \OPR{puts} and \OPR{gets}, blocking fetching AMOs, blocking scalar \OPR{gets}, and the memory ordering routines might require the library to enforce remote completion, reducing the potential -benefit of bundling optimizations. -Because bundling is performed with respect to an \openshmem communication -context, routines not performed on a communication context \newtext{(like collective routines)} are ineligible for -bundling optimization. +benefit of session optimizations. +Because sessions are associated with an \openshmem communication context, +routines not performed on a communication context (like collective routines) +are ineligible for session hints. diff --git a/content/shmem_bundle_start.tex b/content/shmem_bundle_start.tex deleted file mode 100644 index 57b659cb..00000000 --- a/content/shmem_bundle_start.tex +++ /dev/null @@ -1,50 +0,0 @@ -\apisummary{ - Start a communication bundle. -} - -\begin{apidefinition} - -\begin{Csynopsis} -void @\FuncDecl{shmem\_bundle\_start}@(long options); -void @\FuncDecl{shmem\_ctx\_bundle\_start}@(shmem_ctx_t ctx, long options); -\end{Csynopsis} - -\begin{apiarguments} - \apiargument{IN}{ctx}{A context handle specifying the context on which to - perform the optimization. When this argument is not provided, the - optimization is performed on the default context.} - \newtext{ - \apiargument{IN}{options}{The set of options requested for the - given bundle. Multiple options may be requested by combining - them with a bitwise OR operation; otherwise, \CONST{0} can be - given if no options are requested.} - } -\end{apiarguments} - -\apidescription{ - The \FUNC{shmem\_bundle\_start} routine provides a hint to the \openshmem - library to \newtext{begin a bundling phase} \oldtext{being applying - bundling-related optimizations}. - \newtext{ - If a bundling phase is already started on a given - communication context, another call to - \FUNC{shmem\_bundle\_start} on that context has no effect. - A bundling phase on a communication context must be - stopped with a call to \FUNC{shmem\_bundle\_stop} on the - same context. - } -} - -\apireturnvalues{ - None. -} - -\apinotes{ - The \FUNC{shmem\_bundle\_start} routine is a hint for improving - performance, and \openshmem implementations are not required to apply any - optimization. -} - -\end{apidefinition} - - diff --git a/content/shmem_bundle_stop.tex b/content/shmem_bundle_stop.tex deleted file mode 100644 index ad1dd410..00000000 --- a/content/shmem_bundle_stop.tex +++ /dev/null @@ -1,59 +0,0 @@ -\apisummary{ - Stop a communication bundle. -} - -\begin{apidefinition} - -\begin{Csynopsis} -void @\FuncDecl{shmem\_bundle\_stop}@(long options); -void @\FuncDecl{shmem\_ctx\_bundle\_stop}@(shmem_ctx_t ctx, long options); -\end{Csynopsis} - -\begin{apiarguments} - \apiargument{IN}{ctx}{A context handle specifying the context on which to - perform the optimization. When this argument is not provided, the - optimization is performed on the default context.} - \newtext{ - \apiargument{IN}{options}{The set of options requested for the - given bundle. Multiple options may be requested by combining - them with a bitwise OR operation; otherwise, \CONST{0} can be - given if no options are requested.} - } -\end{apiarguments} - -\apidescription{ - The \FUNC{shmem\_bundle\_stop} routine provides a hint to the \openshmem - library to \newtext{end a bundling phase} \oldtext{stop applying bundling-related optimizations}. - \newtext{ - If a bundling phase is already stopped on a given - communication context, another call to - \FUNC{shmem\_bundle\_stop} on that context has no effect. - } -} - -\apireturnvalues{ - None. -} - -\newtext{ -\apinotes{ - We discourage users from including non-\openshmem code, such - as a long computation loop, within a bundling phase without - first calling \FUNC{shmem\_bundle\_stop}. -} -} - -\begin{apiexamples} - -\apicexample - {The following example demonstrates the usage of - \FUNC{shmem\_bundle\_start} and \FUNC{shmem\_bundle\_stop} with a loop of - random atomic non-fetching XOR updates to a distributed table, similar to - the Giga-updates per second (GUPS) microbenchmark - \footnote{http://icl.cs.utk.edu/projectsfiles/hpcc/RandomAccess/}.} - {./example_code/shmem_bundle_example.c} - {} -\end{apiexamples} - -\end{apidefinition} - diff --git a/content/shmem_session_start.tex b/content/shmem_session_start.tex new file mode 100644 index 00000000..ae4fe397 --- /dev/null +++ b/content/shmem_session_start.tex @@ -0,0 +1,96 @@ +\apisummary{ + Start a communication session. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_session\_start}@(long options, shmem_ctx_t ctx); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{ctx}{A context handle specifying the context associated + with this session.} + \apiargument{IN}{options}{The set of requested options from + Table~\ref{session_opts} for this session. Multiple options may be + requested by combining them with a bitwise OR operation; otherwise, + \CONST{0} can be given if no options are requested.} +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_session\_start} is a non-collective routine that begins a + session on communication context \VAR{ctx} with hints requested via + \VAR{options}. + Sessions on a communication context must be stopped with a call to + \FUNC{shmem\_session\_stop} on the same context. + If a session is already started on a given context, another call to + \FUNC{shmem\_session\_start} on that same context combines new options via a + bitwise OR operation. + Passing false or ambiguous \VAR{options} to a session should never result in + undefined behavior, but may result in the library aborting the program. +} + +\apireturnvalues{ + None. +} + +\begin{longtable}{|p{0.45\textwidth}|p{0.5\textwidth}|} + \hline + \hline + \textbf{Option} & \textbf{Usage hint} + \tabularnewline \hline + \endhead + %% + \LibConstDecl{SHMEM\_SESSION\_OP\_PUT} & + \newline + The session will contain non-blocking \textit{put} and/or scalar put operations. + \tabularnewline \hline + + \LibConstDecl{SHMEM\_SESSION\_OP\_GET} & + \newline + The session will contain non-blocking \textit{get} operations. + \tabularnewline \hline + + \LibConstDecl{SHMEM\_SESSION\_OP\_PUT\_SIGNAL} & + \newline + The session will contain non-blocking \textit{put-with-signal} operations. + \tabularnewline \hline + + \LibConstDecl{SHMEM\_SESSION\_OP\_AMO} & + \newline + The session will contain non-fetching AMOs. + \tabularnewline \hline + + \LibConstDecl{SHMEM\_SESSION\_OP\_AMO\_FETCH} & + \newline + The session will contain non-blocking fetching AMOs. + \tabularnewline \hline + + \LibConstDecl{SHMEM\_SESSION\_CHAIN} & + \newline + The session will contain a chain (a trivial repeating pattern) of similar RMA operations. + \tabularnewline \hline + + \LibConstDecl{SHMEM\_SESSION\_UNIFORM\_AMO} & + \newline + The session will contain a chain of AMOs that will not occur concurrently + across any different signal operators (i.e.~\ref{subsec:signal_operator}), + operations (\ref{sec:amo}), or types (Tables \ref{stdamotypes} and + \ref{extamotypes}). + \tabularnewline \hline + \TableCaptionRef{Session options} + \label{session_opts} +\end{longtable} + +\apinotes{ + The \FUNC{shmem\_session\_start} routine provides hints for improving + performance, and \openshmem implementations are not required to apply any + optimization. + \FUNC{shmem\_session\_start} is non-collective, so there is no implied + synchronization. + Implementations are encouraged to supply users with information about the + session options being applied or ignored; for instance, when + \LibConstRef{SHMEM\_DEBUG} is set. +} + +\end{apidefinition} diff --git a/content/shmem_session_stop.tex b/content/shmem_session_stop.tex new file mode 100644 index 00000000..d10b007c --- /dev/null +++ b/content/shmem_session_stop.tex @@ -0,0 +1,46 @@ +\apisummary{ + Stop a communication session. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_session\_stop}@(shmem_ctx_t ctx); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{ctx}{A context handle specifying the context associated + with this session.} +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_session\_stop} routine ends a session on context \VAR{ctx}. + If a session is already stopped on a given communication context, another + call to \FUNC{shmem\_session\_stop} on that context has no effect. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + Users are discourage from including non-\openshmem code, such as a long + computation loop, within a session without first calling + \FUNC{shmem\_session\_stop}. +} + + +\begin{apiexamples} + +\apicexample + {The following example demonstrates the usage of + \FUNC{shmem\_session\_start} and \FUNC{shmem\_session\_stop} with a loop of + random atomic non-fetching XOR updates to a distributed table, similar to + the Giga-updates per second (GUPS) microbenchmark + \footnote{http://icl.cs.utk.edu/projectsfiles/hpcc/RandomAccess/}.} + {./example_code/shmem_session_example.c} + {} +\end{apiexamples} + +\end{apidefinition} + diff --git a/example_code/shmem_bundle_example.c b/example_code/shmem_session_example.c similarity index 59% rename from example_code/shmem_bundle_example.c rename to example_code/shmem_session_example.c index 70f39d5f..270533f2 100644 --- a/example_code/shmem_bundle_example.c +++ b/example_code/shmem_session_example.c @@ -1,27 +1,31 @@ #include +#include #include -#define N_UPDATES (1 << 20) -#define N_INDICES (1 << 10) -#define N_VALUES (1 << 31) +#define N_UPDATES (1lu << 18) +#define N_INDICES (1lu << 10) +#define N_VALUES (1lu << 31) int main(void) { shmem_init(); - uint64_t *table = shmem_malloc(N_INDICES * sizeof(uint64_t)); + uint64_t *table = shmem_calloc(N_INDICES, sizeof(uint64_t)); int mype = shmem_my_pe(); int npes = shmem_n_pes(); + srand(mype); + + shmem_session_start(SHMEM_SESSION_UNIFORM_AMO, SHMEM_CTX_DEFAULT); - shmem_bundle_start(); for (size_t i = 0; i < N_UPDATES; i++) { int random_pe = rand() % npes; size_t random_idx = rand() % N_INDICES; uint64_t random_val = rand() % N_VALUES; shmem_uint64_atomic_xor(&table[random_idx], random_val, random_pe); } - shmem_bundle_stop(); + + shmem_session_stop(SHMEM_CTX_DEFAULT); shmem_free(table); shmem_finalize(); diff --git a/main_spec.tex b/main_spec.tex index 980898ce..73a570be 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -1,5 +1,6 @@ \documentclass[10pt,oneside]{book} + \input{utils/packages} \input{utils/defs} @@ -150,9 +151,6 @@ \subsubsection{\textbf{SHMEM\_TEAM\_DESTROY}}\label{subsec:shmem_team_destroy} \subsection{Communication Management Routines}\label{sec:ctx} \input{content/context_intro.tex} -\subsubsection{Sessions for Communication Contexts}\label{subsec:sessions} -\input{content/sessions_intro.tex} - \subsubsection{\textbf{SHMEM\_CTX\_CREATE}} \label{subsec:shmem_ctx_create} \input{content/shmem_ctx_create.tex} @@ -169,12 +167,6 @@ \subsubsection{\textbf{SHMEM\_CTX\_GET\_TEAM}} \label{subsec:shmem_ctx_get_team} \input{content/shmem_ctx_get_team.tex} -\subsubsection{\textbf{SHMEM\_BUNDLE\_START}}\label{subsec:shmem_bundle_start} -\input{content/shmem_bundle_start.tex} - -\subsubsection{\textbf{SHMEM\_BUNDLE\_STOP}}\label{subsec:shmem_bundle_stop} -\input{content/shmem_bundle_stop.tex} - \subsection{Remote Memory Access Routines}\label{sec:rma} \input{content/rma_intro.tex} @@ -366,6 +358,16 @@ \subsubsection{\textbf{SHMEM\_SIGNAL\_FETCH}}\label{subsec:shmem_signal_fetch} \input{content/shmem_signal_fetch.tex} +\subsection{Session Routines}\label{subsec:sessions} +\input{content/sessions_intro.tex} + +\subsubsection{\textbf{SHMEM\_SESSION\_START}}\label{subsec:shmem_session_start} +\input{content/shmem_session_start.tex} + +\subsubsection{\textbf{SHMEM\_SESSION\_STOP}}\label{subsec:shmem_session_stop} +\input{content/shmem_session_stop.tex} + + \subsection{Collective Routines}\label{subsec:coll} \input{content/collective_intro.tex} From 09a0f104aa7c5e035162de49e74751217473511c Mon Sep 17 00:00:00 2001 From: David Ozog Date: Wed, 28 Sep 2022 11:06:38 -0600 Subject: [PATCH 07/30] Sessions: remove unecessary drafting artifacts --- main_spec.tex | 1 - utils/defs.tex | 1 - 2 files changed, 2 deletions(-) diff --git a/main_spec.tex b/main_spec.tex index 73a570be..f14a4f49 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -1,6 +1,5 @@ \documentclass[10pt,oneside]{book} - \input{utils/packages} \input{utils/defs} diff --git a/utils/defs.tex b/utils/defs.tex index 7e89f1af..4496b6fe 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -32,7 +32,6 @@ \def\colorswapot{\colorlet{saved}{.}\color{red}} \def\prevcolor{\color{saved}} -\newcommand{\newertext}[1]{\textcolor{blue}{#1}} \newcommand{\newtext}[1]{\textcolor{ForestGreen}{#1}} \newcommand{\oldtext}[1]{\textcolor{magenta}{\sout{#1}}} \newcommand{\insertDocVersion}{1.5} From 93f510e33d3158619fc7687bed8b7bb46c26e86d Mon Sep 17 00:00:00 2001 From: David Ozog Date: Mon, 10 Oct 2022 11:05:43 -0600 Subject: [PATCH 08/30] Sessions: address some feedback from reading --- content/sessions_intro.tex | 16 +++++++--------- content/shmem_session_start.tex | 6 +++--- content/shmem_session_stop.tex | 2 +- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/content/sessions_intro.tex b/content/sessions_intro.tex index c769e1d9..d8554a84 100644 --- a/content/sessions_intro.tex +++ b/content/sessions_intro.tex @@ -11,15 +11,13 @@ These options serve only as \textit{hints} to the library; it is up to the implementation whether or not to apply any optimizations within a session. -Sessions do not affect the completion or ordering semantics of any \openshmem -routines in the program. -For this reason, routines such as non-blocking RMAs, non-blocking AMOs, -non-blocking \OPR{put-with-signal}, blocking scalar \OPR{puts}, and blocking -non-fetching AMOs are viable candidates for optimizations. -Other routines, such as blocking non-scalar \OPR{puts} and \OPR{gets}, blocking -fetching AMOs, blocking scalar \OPR{gets}, and the memory ordering routines -might require the library to enforce remote completion, reducing the potential -benefit of session optimizations. +Usage of the \openshmem session APIs on a particular context must comply with +the requirements of all options set on that context. +Starting and stoping \openshmem sessions should not affect the completion or +ordering semantics of any \openshmem routines in the program. +For these reasons, multi-threaded \openshmem programs may require additional +thread synchronization to ensure sessions hints are correctly applied to +shareable contexts. Because sessions are associated with an \openshmem communication context, routines not performed on a communication context (like collective routines) are ineligible for session hints. diff --git a/content/shmem_session_start.tex b/content/shmem_session_start.tex index ae4fe397..6bb5954b 100644 --- a/content/shmem_session_start.tex +++ b/content/shmem_session_start.tex @@ -9,12 +9,12 @@ \end{Csynopsis} \begin{apiarguments} - \apiargument{IN}{ctx}{A context handle specifying the context associated - with this session.} \apiargument{IN}{options}{The set of requested options from Table~\ref{session_opts} for this session. Multiple options may be requested by combining them with a bitwise OR operation; otherwise, \CONST{0} can be given if no options are requested.} + \apiargument{IN}{ctx}{A context handle specifying the context associated + with this session.} \end{apiarguments} \apidescription{ @@ -27,7 +27,7 @@ \FUNC{shmem\_session\_start} on that same context combines new options via a bitwise OR operation. Passing false or ambiguous \VAR{options} to a session should never result in - undefined behavior, but may result in the library aborting the program. + undefined behavior, but may result in poor library performance. } \apireturnvalues{ diff --git a/content/shmem_session_stop.tex b/content/shmem_session_stop.tex index d10b007c..acc94f1f 100644 --- a/content/shmem_session_stop.tex +++ b/content/shmem_session_stop.tex @@ -24,7 +24,7 @@ } \apinotes{ - Users are discourage from including non-\openshmem code, such as a long + Users are discouraged from including non-\openshmem code, such as a long computation loop, within a session without first calling \FUNC{shmem\_session\_stop}. } From 0a9873a6dfbd67613aa1970dedc4df3ceffcd9fd Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 17 Mar 2023 09:09:35 -0600 Subject: [PATCH 09/30] Sessions: rewording and edits based on WG feedback --- content/shmem_session_start.tex | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/content/shmem_session_start.tex b/content/shmem_session_start.tex index 6bb5954b..d94d4ff2 100644 --- a/content/shmem_session_start.tex +++ b/content/shmem_session_start.tex @@ -26,8 +26,12 @@ If a session is already started on a given context, another call to \FUNC{shmem\_session\_start} on that same context combines new options via a bitwise OR operation. - Passing false or ambiguous \VAR{options} to a session should never result in - undefined behavior, but may result in poor library performance. + + No combination of \VAR{options} passed to \FUNC{shmem\_session\_start} + results in undefined behavior, but some combinations may be detrimental for + performance; for example, when selecting an option that is not applicable + to the session. It is the user's responsibility to determine which + combination of \VAR{options} benefits the performance of the session. } \apireturnvalues{ @@ -88,9 +92,6 @@ optimization. \FUNC{shmem\_session\_start} is non-collective, so there is no implied synchronization. - Implementations are encouraged to supply users with information about the - session options being applied or ignored; for instance, when - \LibConstRef{SHMEM\_DEBUG} is set. } \end{apidefinition} From c2dd73ef6053be4795be45374f6c9f4baaf7886b Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 17 Mar 2023 09:13:35 -0600 Subject: [PATCH 10/30] Update content/sessions_intro.tex Co-authored-by: James Dinan --- content/sessions_intro.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/sessions_intro.tex b/content/sessions_intro.tex index d8554a84..09d3c3c9 100644 --- a/content/sessions_intro.tex +++ b/content/sessions_intro.tex @@ -13,7 +13,7 @@ Usage of the \openshmem session APIs on a particular context must comply with the requirements of all options set on that context. -Starting and stoping \openshmem sessions should not affect the completion or +Starting and stopping \openshmem sessions should not affect the completion or ordering semantics of any \openshmem routines in the program. For these reasons, multi-threaded \openshmem programs may require additional thread synchronization to ensure sessions hints are correctly applied to From 96758c93dcd79a624ce25c40c98a48f96f7675e8 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 17 Mar 2023 09:13:53 -0600 Subject: [PATCH 11/30] Update content/backmatter.tex --- content/backmatter.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 846c7be9..60d7b7bc 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -677,7 +677,7 @@ \section{Version 1.6} \item Added the session routines, \FUNC{shmem\_session\_start} and \FUNC{shmem\_session\_stop}, which allow users to pass hints to the \openshmem library to apply runtime optimizations. -\ChangelogRef{subsec:session}% +\ChangelogRef{subsec:sessions}% \item Added fine grained completion routine: \FUNC{shmem\_pe\_quiet}. \ChangelogRef{subsec:shmem_pe_quiet}% % From 4616742db12cccd1cc546c517815fbe467c37546 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Mon, 10 Jul 2023 10:13:20 -0600 Subject: [PATCH 12/30] Sessions: better define "chain" and add an example --- content/shmem_session_start.tex | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/content/shmem_session_start.tex b/content/shmem_session_start.tex index d94d4ff2..fd347d83 100644 --- a/content/shmem_session_start.tex +++ b/content/shmem_session_start.tex @@ -72,7 +72,10 @@ \LibConstDecl{SHMEM\_SESSION\_CHAIN} & \newline - The session will contain a chain (a trivial repeating pattern) of similar RMA operations. + The session will contain a \textit{chain} (i.e. a repeating sequence) of + RMA operations. For example, an iterative loop of only non-blocking RMA + operations forms a chain. A chain must not include a memory ordering or + collective operation. \tabularnewline \hline \LibConstDecl{SHMEM\_SESSION\_UNIFORM\_AMO} & From 641792507af320192cdabcca231d4c98fad90cb2 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 17 Nov 2023 11:48:41 -0500 Subject: [PATCH 13/30] rm extraneous sessions options, clarify "chaining" --- content/shmem_session_start.tex | 57 +++++++++++++++------------------ 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/content/shmem_session_start.tex b/content/shmem_session_start.tex index fd347d83..efa038bc 100644 --- a/content/shmem_session_start.tex +++ b/content/shmem_session_start.tex @@ -38,52 +38,45 @@ None. } -\begin{longtable}{|p{0.45\textwidth}|p{0.5\textwidth}|} +\begin{longtable}{|p{0.40\textwidth}|p{0.55\textwidth}|} \hline \hline \textbf{Option} & \textbf{Usage hint} \tabularnewline \hline \endhead %% - \LibConstDecl{SHMEM\_SESSION\_OP\_PUT} & - \newline - The session will contain non-blocking \textit{put} and/or scalar put operations. - \tabularnewline \hline - - \LibConstDecl{SHMEM\_SESSION\_OP\_GET} & + \LibConstDecl{SHMEM\_SESSION\_CHAIN} & \newline - The session will contain non-blocking \textit{get} operations. - \tabularnewline \hline + The performance of \openshmem programs that issue many consecutive and + small-sized communication routines might be improved by combining these + routines into fewer operations. - \LibConstDecl{SHMEM\_SESSION\_OP\_PUT\_SIGNAL} & - \newline - The session will contain non-blocking \textit{put-with-signal} operations. - \tabularnewline \hline - - \LibConstDecl{SHMEM\_SESSION\_OP\_AMO} & - \newline - The session will contain non-fetching AMOs. - \tabularnewline \hline + The \VAR{SHMEM\_SESSION\_CHAIN} hint indicates to the \openshmem library + that the program intends to issue a series of operations on a communication + context that are suitable for chaining optimizations. An example of a + chain is an iterative loop of non-blocking RMA and/or AMO routines. A chain + must not include a memory ordering or collective operation. - \LibConstDecl{SHMEM\_SESSION\_OP\_AMO\_FETCH} & - \newline - The session will contain non-blocking fetching AMOs. - \tabularnewline \hline + Because sessions do not affect the completion or ordering semantics of any + \openshmem routines in the program, routines such as non-blocking RMAs, + non-blocking AMOs, non-blocking \OPR{put-with-signal}, blocking scalar + \OPR{puts}, and blocking non-fetching AMOs are viable candidates for + chaining optimizations. Other routines, such as blocking non-scalar + \OPR{puts} and \OPR{gets}, blocking fetching AMOs, blocking scalar + \OPR{gets}, and the memory ordering routines might require the library to + enforce remote completion, reducing the potential benefit of chaining + optimizations. - \LibConstDecl{SHMEM\_SESSION\_CHAIN} & - \newline - The session will contain a \textit{chain} (i.e. a repeating sequence) of - RMA operations. For example, an iterative loop of only non-blocking RMA - operations forms a chain. A chain must not include a memory ordering or - collective operation. \tabularnewline \hline \LibConstDecl{SHMEM\_SESSION\_UNIFORM\_AMO} & \newline - The session will contain a chain of AMOs that will not occur concurrently - across any different signal operators (i.e.~\ref{subsec:signal_operator}), - operations (\ref{sec:amo}), or types (Tables \ref{stdamotypes} and - \ref{extamotypes}). + The session will contain a chain (as defined by the + \VAR{SHMEM\_SESSION\_CHAIN} option) of only AMOs that will not occur + concurrently across any different signal operators + (i.e.~\ref{subsec:signal_operator}), operations (\ref{sec:amo}), or types + (Tables \ref{stdamotypes} and \ref{extamotypes}). + \tabularnewline \hline \TableCaptionRef{Session options} \label{session_opts} From bd37f7ca38f4beaa98c5fa7378a96366ccc10fcb Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 8 Dec 2023 20:17:20 -0500 Subject: [PATCH 14/30] sessions: swap shmem_session_start arguments --- content/shmem_session_start.tex | 6 +++--- example_code/shmem_session_example.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/content/shmem_session_start.tex b/content/shmem_session_start.tex index efa038bc..c0c0f694 100644 --- a/content/shmem_session_start.tex +++ b/content/shmem_session_start.tex @@ -5,16 +5,16 @@ \begin{apidefinition} \begin{Csynopsis} -void @\FuncDecl{shmem\_session\_start}@(long options, shmem_ctx_t ctx); +void @\FuncDecl{shmem\_session\_start}@(shmem_ctx_t ctx, long options); \end{Csynopsis} \begin{apiarguments} + \apiargument{IN}{ctx}{A context handle specifying the context associated + with this session.} \apiargument{IN}{options}{The set of requested options from Table~\ref{session_opts} for this session. Multiple options may be requested by combining them with a bitwise OR operation; otherwise, \CONST{0} can be given if no options are requested.} - \apiargument{IN}{ctx}{A context handle specifying the context associated - with this session.} \end{apiarguments} \apidescription{ diff --git a/example_code/shmem_session_example.c b/example_code/shmem_session_example.c index 270533f2..7e194374 100644 --- a/example_code/shmem_session_example.c +++ b/example_code/shmem_session_example.c @@ -16,7 +16,7 @@ int main(void) { int npes = shmem_n_pes(); srand(mype); - shmem_session_start(SHMEM_SESSION_UNIFORM_AMO, SHMEM_CTX_DEFAULT); + shmem_session_start(SHMEM_CTX_DEFAULT, SHMEM_SESSION_UNIFORM_AMO); for (size_t i = 0; i < N_UPDATES; i++) { int random_pe = rand() % npes; From e0153701942ef48fcc61359572c7fb827e0d7744 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 8 Dec 2023 20:22:43 -0500 Subject: [PATCH 15/30] sessions: "chain together", not "optimizations" --- content/shmem_session_start.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/shmem_session_start.tex b/content/shmem_session_start.tex index c0c0f694..518ac2be 100644 --- a/content/shmem_session_start.tex +++ b/content/shmem_session_start.tex @@ -51,9 +51,9 @@ small-sized communication routines might be improved by combining these routines into fewer operations. - The \VAR{SHMEM\_SESSION\_CHAIN} hint indicates to the \openshmem library - that the program intends to issue a series of operations on a communication - context that are suitable for chaining optimizations. An example of a + The \VAR{SHMEM\_SESSION\_CHAIN} hint indicates that a communication context + will be used to issue a series of operations that are suitable for the + \openshmem library to chain together. An example of a chain is an iterative loop of non-blocking RMA and/or AMO routines. A chain must not include a memory ordering or collective operation. From 242595b61e8726a78590f71fda83e38627b5276e Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 8 Dec 2023 20:26:38 -0500 Subject: [PATCH 16/30] sessions: small vs. large blocking puts for chains --- content/shmem_session_start.tex | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/content/shmem_session_start.tex b/content/shmem_session_start.tex index 518ac2be..9a5713cc 100644 --- a/content/shmem_session_start.tex +++ b/content/shmem_session_start.tex @@ -59,13 +59,12 @@ Because sessions do not affect the completion or ordering semantics of any \openshmem routines in the program, routines such as non-blocking RMAs, - non-blocking AMOs, non-blocking \OPR{put-with-signal}, blocking scalar - \OPR{puts}, and blocking non-fetching AMOs are viable candidates for - chaining optimizations. Other routines, such as blocking non-scalar - \OPR{puts} and \OPR{gets}, blocking fetching AMOs, blocking scalar - \OPR{gets}, and the memory ordering routines might require the library to - enforce remote completion, reducing the potential benefit of chaining - optimizations. + non-blocking AMOs, non-blocking \OPR{put-with-signals}, blocking scalar + \OPR{puts}, small blocking \OPR{puts}, and blocking non-fetching AMOs are + viable candidates for chaining. Other routines, such as large blocking + \OPR{puts}, all blocking \OPR{gets}, blocking fetching AMOs, and the memory + ordering routines might require the library to enforce completions, + reducing the potential benefit of chaining. \tabularnewline \hline From ff3a2dba6401194289347cfc4cf848bed76555ff Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 8 Dec 2023 20:27:48 -0500 Subject: [PATCH 17/30] sessions: Say "HPC Challenge benchmark" not "GUPS" --- content/shmem_session_stop.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_session_stop.tex b/content/shmem_session_stop.tex index acc94f1f..6e985721 100644 --- a/content/shmem_session_stop.tex +++ b/content/shmem_session_stop.tex @@ -36,7 +36,7 @@ {The following example demonstrates the usage of \FUNC{shmem\_session\_start} and \FUNC{shmem\_session\_stop} with a loop of random atomic non-fetching XOR updates to a distributed table, similar to - the Giga-updates per second (GUPS) microbenchmark + the HPC Challenge RandomAccess GUPS (Giga-updates per second) benchmark \footnote{http://icl.cs.utk.edu/projectsfiles/hpcc/RandomAccess/}.} {./example_code/shmem_session_example.c} {} From 94dd7c11833de094b1cd09e52f1b95bb4be563d3 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 8 Dec 2023 20:51:57 -0500 Subject: [PATCH 18/30] sessions: add quiet & comments to sessions example --- example_code/shmem_session_example.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/example_code/shmem_session_example.c b/example_code/shmem_session_example.c index 7e194374..3e93563d 100644 --- a/example_code/shmem_session_example.c +++ b/example_code/shmem_session_example.c @@ -27,6 +27,11 @@ int main(void) { shmem_session_stop(SHMEM_CTX_DEFAULT); + /* shmem_session_stop does not quiet the context or synchronize */ + shmem_barrier_all(); + + /* Check the table result here... */ + shmem_free(table); shmem_finalize(); return 0; From 7a6f600d3caa71759b5a0105b3c515981e19ad2c Mon Sep 17 00:00:00 2001 From: David Ozog Date: Wed, 20 Dec 2023 14:55:16 -0500 Subject: [PATCH 19/30] sessions: no-op if ctx equals SHMEM_CTX_INVALID --- content/shmem_session_start.tex | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/shmem_session_start.tex b/content/shmem_session_start.tex index 9a5713cc..208733a6 100644 --- a/content/shmem_session_start.tex +++ b/content/shmem_session_start.tex @@ -26,6 +26,8 @@ If a session is already started on a given context, another call to \FUNC{shmem\_session\_start} on that same context combines new options via a bitwise OR operation. + If \VAR{ctx} compares equal to \LibConstRef{SHMEM\_CTX\_INVALID} then + \FUNC{shmem\_session\_start} performs no action and returns immediately. No combination of \VAR{options} passed to \FUNC{shmem\_session\_start} results in undefined behavior, but some combinations may be detrimental for From 9e24b470f681494a56b29d62265e1c41f642a1f2 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Wed, 20 Dec 2023 14:57:57 -0500 Subject: [PATCH 20/30] sessions: redo options table, rename chain->batch --- content/shmem_session_start.tex | 54 ++++++++++++++++----------------- utils/defs.tex | 20 ++++++++++++ 2 files changed, 46 insertions(+), 28 deletions(-) diff --git a/content/shmem_session_start.tex b/content/shmem_session_start.tex index 208733a6..dec88bda 100644 --- a/content/shmem_session_start.tex +++ b/content/shmem_session_start.tex @@ -40,48 +40,46 @@ None. } -\begin{longtable}{|p{0.40\textwidth}|p{0.55\textwidth}|} - \hline - \hline - \textbf{Option} & \textbf{Usage hint} - \tabularnewline \hline - \endhead - %% - \LibConstDecl{SHMEM\_SESSION\_CHAIN} & - \newline - The performance of \openshmem programs that issue many consecutive and - small-sized communication routines might be improved by combining these - routines into fewer operations. +\sessiontablebegin - The \VAR{SHMEM\_SESSION\_CHAIN} hint indicates that a communication context - will be used to issue a series of operations that are suitable for the - \openshmem library to chain together. An example of a - chain is an iterative loop of non-blocking RMA and/or AMO routines. A chain +\sessiontablerow{\LibConstRef{SHMEM\_SESSION\_BATCH}}{ + A \textit{batch} is a series of calls to OpenSHMEM routines, which occur + within a session in program order, that might tolerate an increase in + individual call latencies at the opportunity to decrease the overall + overhead typically involved with the OpenSHMEM library implementing the + series as individual RMA operations. + In other words, the performance of \openshmem programs that issue many + consecutive and small-sized RMA routines (called a \textit{batch}) might be + improved by informing the library implementation ahead of time that it is + free to delay initiating the operations in order to combine or coalesce the + issued \openshmem routines. + The specific mechanisms for improving performance using batching + optimizations depend on the \openshmem library implementation. + + The \VAR{SHMEM\_SESSION\_BATCH} hint indicates that a communication context + will be used to issue a batch. An example of a + batch is an iterative loop of non-blocking RMA and/or AMO routines. A batch must not include a memory ordering or collective operation. Because sessions do not affect the completion or ordering semantics of any \openshmem routines in the program, routines such as non-blocking RMAs, non-blocking AMOs, non-blocking \OPR{put-with-signals}, blocking scalar \OPR{puts}, small blocking \OPR{puts}, and blocking non-fetching AMOs are - viable candidates for chaining. Other routines, such as large blocking + viable candidates for batching. Other routines, such as large blocking \OPR{puts}, all blocking \OPR{gets}, blocking fetching AMOs, and the memory ordering routines might require the library to enforce completions, - reducing the potential benefit of chaining. - - \tabularnewline \hline + reducing the potential benefit of batching. + } \hline - \LibConstDecl{SHMEM\_SESSION\_UNIFORM\_AMO} & - \newline - The session will contain a chain (as defined by the - \VAR{SHMEM\_SESSION\_CHAIN} option) of only AMOs that will not occur +\sessiontablerow{\LibConstRef{SHMEM\_SESSION\_SAME\_AMO}}{ + The session will contain a batch (as defined by the + \VAR{SHMEM\_SESSION\_BATCH} option) of only AMOs that will not occur concurrently across any different signal operators (i.e.~\ref{subsec:signal_operator}), operations (\ref{sec:amo}), or types (Tables \ref{stdamotypes} and \ref{extamotypes}). + } \hline - \tabularnewline \hline - \TableCaptionRef{Session options} - \label{session_opts} -\end{longtable} +\sessiontableend \apinotes{ The \FUNC{shmem\_session\_start} routine provides hints for improving diff --git a/utils/defs.tex b/utils/defs.tex index 4496b6fe..b429e663 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -372,6 +372,26 @@ \end{tabular}\\ } +\newcommand{\sessiontablebegin} { +\begin{table}[h] +\hspace{-0.5cm} +\begin{tabular}{|p{4.8cm}|p{12cm}|} + \hline + \textbf{Option} & \textbf{Usage hint} + \tabularnewline \hline +} + +\newcommand{\sessiontableend} { +\end{tabular} +\TableCaptionRef{Session options} +\label{session_opts} +\end{table} +} + +\newcommand{\sessiontablerow}[2]{ + #1 & #2 \tabularnewline +} + \newcommand{\apinotes}[1]{ \item[Notes] \hfill \\ #1 From 7034807e85744b9ff3b171ba1178dc6a2c4e169d Mon Sep 17 00:00:00 2001 From: David Ozog Date: Wed, 20 Dec 2023 15:00:18 -0500 Subject: [PATCH 21/30] sessions: note "small" depends on implementation --- content/shmem_session_start.tex | 3 +++ 1 file changed, 3 insertions(+) diff --git a/content/shmem_session_start.tex b/content/shmem_session_start.tex index dec88bda..d87a3c58 100644 --- a/content/shmem_session_start.tex +++ b/content/shmem_session_start.tex @@ -87,6 +87,9 @@ optimization. \FUNC{shmem\_session\_start} is non-collective, so there is no implied synchronization. + Blocking puts must be sufficiently small to benefit from batching, and the + exact threshold for this benefit depends on the \openshmem implemenation + and/or the application. } \end{apidefinition} From 4a0e4820591c6cc022410fba3658583bdf5519eb Mon Sep 17 00:00:00 2001 From: David Ozog Date: Wed, 20 Dec 2023 15:01:15 -0500 Subject: [PATCH 22/30] sessions: update code example based on WG feedback --- content/shmem_session_stop.tex | 2 +- example_code/shmem_session_example.c | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/content/shmem_session_stop.tex b/content/shmem_session_stop.tex index 6e985721..f39cb784 100644 --- a/content/shmem_session_stop.tex +++ b/content/shmem_session_stop.tex @@ -33,7 +33,7 @@ \begin{apiexamples} \apicexample - {The following example demonstrates the usage of + {The following \Cstd[11] program demonstrates the usage of \FUNC{shmem\_session\_start} and \FUNC{shmem\_session\_stop} with a loop of random atomic non-fetching XOR updates to a distributed table, similar to the HPC Challenge RandomAccess GUPS (Giga-updates per second) benchmark diff --git a/example_code/shmem_session_example.c b/example_code/shmem_session_example.c index 3e93563d..9ebb8890 100644 --- a/example_code/shmem_session_example.c +++ b/example_code/shmem_session_example.c @@ -16,22 +16,25 @@ int main(void) { int npes = shmem_n_pes(); srand(mype); - shmem_session_start(SHMEM_CTX_DEFAULT, SHMEM_SESSION_UNIFORM_AMO); + shmem_ctx_t ctx; /* 'ctx' exists only to explain session completions, so */ + shmem_ctx_create(0, &ctx); /* the return value of shmem_ctx_create() is ignored. */ + + shmem_session_start(ctx, SHMEM_SESSION_SAME_AMO); for (size_t i = 0; i < N_UPDATES; i++) { int random_pe = rand() % npes; size_t random_idx = rand() % N_INDICES; uint64_t random_val = rand() % N_VALUES; - shmem_uint64_atomic_xor(&table[random_idx], random_val, random_pe); + shmem_uint64_atomic_xor(ctx, &table[random_idx], random_val, random_pe); } - shmem_session_stop(SHMEM_CTX_DEFAULT); - - /* shmem_session_stop does not quiet the context or synchronize */ - shmem_barrier_all(); - - /* Check the table result here... */ + shmem_session_stop(ctx); /* shmem_session_stop() does not quiet the context or */ + shmem_ctx_quiet(ctx); /* synchronize. If this were SHMEM_CTX_DEFAULT, one */ + shmem_sync_all(); /* could simply call shmem_barrier_all() instead of */ + /* shmem_ctx_quiet() followed by shmem_sync_all(). */ + /* At this point, it is safe to check and/or validate the table result... */ + shmem_ctx_destroy(ctx); shmem_free(table); shmem_finalize(); return 0; From 30ce68e852d59214d33a7ccb5b9bce34eb18d9f3 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Wed, 17 Jan 2024 10:54:57 -0500 Subject: [PATCH 23/30] sessions/example: check ctx_create return & clean --- example_code/shmem_session_example.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/example_code/shmem_session_example.c b/example_code/shmem_session_example.c index 9ebb8890..de37815e 100644 --- a/example_code/shmem_session_example.c +++ b/example_code/shmem_session_example.c @@ -16,8 +16,12 @@ int main(void) { int npes = shmem_n_pes(); srand(mype); - shmem_ctx_t ctx; /* 'ctx' exists only to explain session completions, so */ - shmem_ctx_create(0, &ctx); /* the return value of shmem_ctx_create() is ignored. */ + shmem_ctx_t ctx; + int ret = shmem_ctx_create(0, &ctx); + if (ret != 0) { + printf("%d: Error creating context (%d)\n", mype, ret); + shmem_global_exit(1); + } shmem_session_start(ctx, SHMEM_SESSION_SAME_AMO); @@ -28,11 +32,11 @@ int main(void) { shmem_uint64_atomic_xor(ctx, &table[random_idx], random_val, random_pe); } - shmem_session_stop(ctx); /* shmem_session_stop() does not quiet the context or */ - shmem_ctx_quiet(ctx); /* synchronize. If this were SHMEM_CTX_DEFAULT, one */ - shmem_sync_all(); /* could simply call shmem_barrier_all() instead of */ - /* shmem_ctx_quiet() followed by shmem_sync_all(). */ - /* At this point, it is safe to check and/or validate the table result... */ + shmem_session_stop(ctx); + shmem_ctx_quiet(ctx); /* shmem_session_stop() does not quiet the context. */ + shmem_sync_all(); /* shmem_session_stop() does not synchronize. */ + + /* At this point, it is safe to check and/or validate the table result... */ shmem_ctx_destroy(ctx); shmem_free(table); From caacb66855de36e7e3cedbfedcdc355d2bb3f4dc Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 18 Jan 2024 12:28:03 -0500 Subject: [PATCH 24/30] sessions/example: add config struct and mask --- content/sessions_intro.tex | 3 + content/shmem_session_config_t.tex | 93 ++++++++++++++++++++++++++++ content/shmem_session_start.tex | 85 ++++++++++++++++++------- example_code/shmem_session_example.c | 7 ++- main_spec.tex | 3 + 5 files changed, 167 insertions(+), 24 deletions(-) create mode 100644 content/shmem_session_config_t.tex diff --git a/content/sessions_intro.tex b/content/sessions_intro.tex index 09d3c3c9..73ae017e 100644 --- a/content/sessions_intro.tex +++ b/content/sessions_intro.tex @@ -21,3 +21,6 @@ Because sessions are associated with an \openshmem communication context, routines not performed on a communication context (like collective routines) are ineligible for session hints. + +The \FUNC{shmem\_config\_t} object requires the \CONST{SIZE\_MAX} macro +defined in \HEADER{stdint.h} by \Cstd[99]~\S7.18.3 and \Cstd[11]~\S7.20.3. diff --git a/content/shmem_session_config_t.tex b/content/shmem_session_config_t.tex new file mode 100644 index 00000000..58d997d1 --- /dev/null +++ b/content/shmem_session_config_t.tex @@ -0,0 +1,93 @@ +\apisummary{ + A structure type representing communication session configuration arguments +} + +\begin{apidefinition} + +\begin{Csynopsis} +typedef struct { + size_t total_ops; + size_t completion_rate; +} shmem_session_config_t; +\end{Csynopsis} + +\begin{apiarguments} + None. +\end{apiarguments} + + +\apidescription{ + A communication session configuration object is provided as an argument to + the \FUNC{shmem\_session\_start} routine. + The \VAR{shmem\_session\_config\_t} object contains optional parameters + that are associated with the options of a communication session. + These parameters serve only as \textit{hints} to the library; it is up to + the implementation whether or not to use the parameter values within + a session. + + The \VAR{total\_ops} member indicates the expected maximum number of all + calls to \openshmem RMA routines within the session (i.e., after a call to + \FUNC{shmem\_session\_start} and before a corresponding call to + \FUNC{shmem\_session\_stop}). + If \VAR{total\_ops} is lower than the actual number of calls to \openshmem + RMA routines within the session, then application performance may be + suboptimal; however, the result of any data transfers, completions, or + memory ordering operations are unaffected by the value of + \FUNC{total\_ops}. + + The \VAR{completion\_rate} member indicates the expected number of + \openshmem RMA operations to buffer, combine, and/or coalesce (see + \LibConstRef{SHMEM\_SESSION\_BATCH} option to + \FUNC{shmem\_session\_start} in \ref{subsec:shmem_session_start}) before + the library enforces local completion of all pending RMA routines in the + session. + The value of \VAR{completion\_rate} is expected to be less than the value + of \VAR{total\_ops}; however, the result of any data transfers, + completions, or memory ordering operations are unaffected by the value of + \FUNC{completion\_rate}. + + When using the configuration structure to start a communication session, a + mask parameter controls which fields may be accessed by the \openshmem + library. + Any configuration parameter value that is not indicated in the mask will be + ignored, and the default value will be used instead. + Therefore, a program must set only the fields for which it does not want + the default value. + + A configuration mask is created through a bitwise OR operation of the + following library constants. + A configuration mask value of \CONST{0} indicates that the team + should be created with the default values for all configuration + parameters. + + \apitablerow{\LibConstRef{SHMEM\_SESSION\_TOTAL\_OPS}}{ + The value of the \VAR{total\_ops} member of the \VAR{config} structure is + unmasked within the session and applied as a hint. + } + + \apitablerow{\LibConstRef{SHMEM\_SESSION\_COMPLETION\_RATE}}{ + \vspace{2mm} + The value of the \VAR{completion\_rate} member of the \VAR{config} + structure is unmasked within the session and applied as a hint. + } + + The default values for configuration parameters are: + + \apitablerow{\VAR{total\_ops} = \CONST{SIZE\_MAX}}{ + By default, the number of expected maximum number of calls to \openshmem + RMA routines is set to the upper bound of a \VAR{size\_t} variable, + \VAR{SIZE\_MAX}. This is a representative constant that indicates the + \openshmem library is free to select any value appropriate for the + implementation. + } + + \apitablerow{\VAR{completion\_rate} = \CONST{SIZE\_MAX}}{ + By default, the number of expected number of \openshmem RMA calls to + buffer, combine, and/or coalesce is set to the upper bound of a + \VAR{size\_t} variable, \VAR{SIZE\_MAX}. This is a representative + constant that indicates the \openshmem library is free to select any + value appropriate for the implementation. + } +} + +\end{apidefinition} diff --git a/content/shmem_session_start.tex b/content/shmem_session_start.tex index d87a3c58..180d7c74 100644 --- a/content/shmem_session_start.tex +++ b/content/shmem_session_start.tex @@ -5,7 +5,7 @@ \begin{apidefinition} \begin{Csynopsis} -void @\FuncDecl{shmem\_session\_start}@(shmem_ctx_t ctx, long options); +void @\FuncDecl{shmem\_session\_start}@(shmem_ctx_t ctx, long options, const shmem_session_config_t *config, long config_mask); \end{Csynopsis} \begin{apiarguments} @@ -15,6 +15,11 @@ Table~\ref{session_opts} for this session. Multiple options may be requested by combining them with a bitwise OR operation; otherwise, \CONST{0} can be given if no options are requested.} + \apiargument{IN}{config}{ + A pointer to the configuration parameters for the session.} + \apiargument{IN}{config\_mask}{ + The bitwise mask representing the set of configuration parameters to use + from \VAR{config}.} \end{apiarguments} \apidescription{ @@ -24,8 +29,11 @@ Sessions on a communication context must be stopped with a call to \FUNC{shmem\_session\_stop} on the same context. If a session is already started on a given context, another call to - \FUNC{shmem\_session\_start} on that same context combines new options via a - bitwise OR operation. + \FUNC{shmem\_session\_start} on that same context combines new options + via a bitwise OR operation. In such a case, unmasked member values in the + \VAR{config} argument replace any existing configuration values that are + already applied to the session. + If \VAR{ctx} compares equal to \LibConstRef{SHMEM\_CTX\_INVALID} then \FUNC{shmem\_session\_start} performs no action and returns immediately. @@ -34,6 +42,16 @@ performance; for example, when selecting an option that is not applicable to the session. It is the user's responsibility to determine which combination of \VAR{options} benefits the performance of the session. + + The \VAR{config} argument specifies session configuration parameters, + which are described in Section~\ref{subsec:shmem_session_config_t}. + + The \VAR{config\_mask} argument is a bitwise mask representing the set of + configuration parameters to use from \VAR{config}. + A \VAR{config\_mask} value of \CONST{0} indicates that the session should + be started with the default values for all configuration parameters. + See Section~\ref{subsec:shmem_session_config_t} for field mask names and + default configuration parameters. } \apireturnvalues{ @@ -43,32 +61,46 @@ \sessiontablebegin \sessiontablerow{\LibConstRef{SHMEM\_SESSION\_BATCH}}{ - A \textit{batch} is a series of calls to OpenSHMEM routines, which occur - within a session in program order, that might tolerate an increase in - individual call latencies at the opportunity to decrease the overall - overhead typically involved with the OpenSHMEM library implementing the - series as individual RMA operations. - In other words, the performance of \openshmem programs that issue many - consecutive and small-sized RMA routines (called a \textit{batch}) might be - improved by informing the library implementation ahead of time that it is - free to delay initiating the operations in order to combine or coalesce the - issued \openshmem routines. - The specific mechanisms for improving performance using batching - optimizations depend on the \openshmem library implementation. - - The \VAR{SHMEM\_SESSION\_BATCH} hint indicates that a communication context - will be used to issue a batch. An example of a - batch is an iterative loop of non-blocking RMA and/or AMO routines. A batch - must not include a memory ordering or collective operation. + A \textit{batch} is a series of calls to \openshmem routines that occur + within a session on a communication context (i.e., after a call to + \FUNC{shmem\_session\_start} and before a corresponding call to + \FUNC{shmem\_session\_stop}), that might tolerate an increase in + individual call latencies. Designating a batch may provide an opportunity + to decrease the overall overhead typically involved with the \openshmem + library implementing the series as individual RMA operations. In other + words, the performance of \openshmem programs that issue many consecutive + and small-sized RMA routines might be improved by informing the library + implementation ahead of time that it is free to delay transferring data + in order to buffer, combine, and/or coalesce the issued \openshmem + routines. The specific mechanisms for improving performance using + batching optimizations depend on the \openshmem library implementation. + + The \VAR{SHMEM\_SESSION\_BATCH} hint indicates that a communication + context will be used to issue a batch. An example of a batch is an + iterative loop of non-blocking RMA and/or AMO routines. A batch may + include a memory ordering or collective operation, but such routines + might require completions and/or synchronization that could degrade + performance. Because sessions do not affect the completion or ordering semantics of any \openshmem routines in the program, routines such as non-blocking RMAs, non-blocking AMOs, non-blocking \OPR{put-with-signals}, blocking scalar \OPR{puts}, small blocking \OPR{puts}, and blocking non-fetching AMOs are viable candidates for batching. Other routines, such as large blocking - \OPR{puts}, all blocking \OPR{gets}, blocking fetching AMOs, and the memory - ordering routines might require the library to enforce completions, - reducing the potential benefit of batching. + \OPR{puts}, all blocking \OPR{gets}, blocking fetching AMOs, and the + memory ordering routines might require the library to enforce + completions, reducing the potential benefit of batching. + + The \VAR{total\_ops} field of \VAR{config} with mask value + \LibConstRef{SHMEM\_SESSION\_TOTAL\_OPS} indicates the expected maximum + number of calls to \openshmem RMA routines within the session. + + The \VAR{completion\_rate} field of \VAR{config} with mask value + \LibConstRef{SHMEM\_SESSION\_COMPLETION\_RATE} indicates a desired + number of \openshmem RMA operations to buffer, combine, and/or coalesce + before the library enforces local completion of all pending RMA routines + in the session. See Section~\ref{subsec:shmem_session_config_t} for + details about these parameters. } \hline \sessiontablerow{\LibConstRef{SHMEM\_SESSION\_SAME\_AMO}}{ @@ -77,6 +109,13 @@ concurrently across any different signal operators (i.e.~\ref{subsec:signal_operator}), operations (\ref{sec:amo}), or types (Tables \ref{stdamotypes} and \ref{extamotypes}). + + The same members of the \VAR{config} structure that apply to the + \LibConstRef{SHMEM\_SESSION\_BATCH} option (\VAR{total\_ops} and + \VAR{completion\_rate}) also apply to the + \LibConstRef{SHMEM\_SESSION\_SAME\_AMO} option. See + Section~\ref{subsec:shmem_session_config_t} for details about these + parameters. } \hline \sessiontableend diff --git a/example_code/shmem_session_example.c b/example_code/shmem_session_example.c index de37815e..045d0ba0 100644 --- a/example_code/shmem_session_example.c +++ b/example_code/shmem_session_example.c @@ -23,7 +23,12 @@ int main(void) { shmem_global_exit(1); } - shmem_session_start(ctx, SHMEM_SESSION_SAME_AMO); + shmem_config_t config; + long config_mask; + config.total_ops = N_UPDATES; + config_mask = SHMEM_SESSION_TOTAL_OPS; + + shmem_session_start(ctx, SHMEM_SESSION_SAME_AMO, config, config_mask); for (size_t i = 0; i < N_UPDATES; i++) { int random_pe = rand() % npes; diff --git a/main_spec.tex b/main_spec.tex index f14a4f49..c81de0cd 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -360,6 +360,9 @@ \subsubsection{\textbf{SHMEM\_SIGNAL\_FETCH}}\label{subsec:shmem_signal_fetch} \subsection{Session Routines}\label{subsec:sessions} \input{content/sessions_intro.tex} +\subsubsection{\textbf{SHMEM\_SESSION\_CONFIG\_T}}\label{subsec:shmem_session_config_t} +\input{content/shmem_session_config_t.tex} + \subsubsection{\textbf{SHMEM\_SESSION\_START}}\label{subsec:shmem_session_start} \input{content/shmem_session_start.tex} From 6d0f714096f64accd76ecb92564e6795429c41f9 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Tue, 13 Feb 2024 08:20:22 -0500 Subject: [PATCH 25/30] sessions: rename completion_rate to delivery_rate --- content/shmem_session_config_t.tex | 39 ++++++++++++++++++++---------- content/shmem_session_start.tex | 14 +++++------ 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/content/shmem_session_config_t.tex b/content/shmem_session_config_t.tex index 58d997d1..2d748567 100644 --- a/content/shmem_session_config_t.tex +++ b/content/shmem_session_config_t.tex @@ -7,7 +7,7 @@ \begin{Csynopsis} typedef struct { size_t total_ops; - size_t completion_rate; + size_t delivery_rate; } shmem_session_config_t; \end{Csynopsis} @@ -25,29 +25,29 @@ the implementation whether or not to use the parameter values within a session. - The \VAR{total\_ops} member indicates the expected maximum number of all + The \VAR{total\_ops} member indicates the expected maximum number of all calls to \openshmem RMA routines within the session (i.e., after a call to \FUNC{shmem\_session\_start} and before a corresponding call to \FUNC{shmem\_session\_stop}). If \VAR{total\_ops} is lower than the actual number of calls to \openshmem - RMA routines within the session, then application performance may be + RMA routines within the session, then application performance might be suboptimal; however, the result of any data transfers, completions, or memory ordering operations are unaffected by the value of \FUNC{total\_ops}. - The \VAR{completion\_rate} member indicates the expected number of + The \VAR{delivery\_rate} member indicates the expected number of \openshmem RMA operations to buffer, combine, and/or coalesce (see \LibConstRef{SHMEM\_SESSION\_BATCH} option to - \FUNC{shmem\_session\_start} in \ref{subsec:shmem_session_start}) before - the library enforces local completion of all pending RMA routines in the - session. - The value of \VAR{completion\_rate} is expected to be less than the value + \FUNC{shmem\_session\_start} within \ref{subsec:shmem_session_start}) before + the library delivers all pending RMA routines in the + session to the network. + The value of \VAR{delivery\_rate} is expected to be less than the value of \VAR{total\_ops}; however, the result of any data transfers, completions, or memory ordering operations are unaffected by the value of - \FUNC{completion\_rate}. + \FUNC{delivery\_rate}. When using the configuration structure to start a communication session, a - mask parameter controls which fields may be accessed by the \openshmem + mask parameter controls which fields are accessed by the \openshmem library. Any configuration parameter value that is not indicated in the mask will be ignored, and the default value will be used instead. @@ -65,9 +65,9 @@ unmasked within the session and applied as a hint. } - \apitablerow{\LibConstRef{SHMEM\_SESSION\_COMPLETION\_RATE}}{ + \apitablerow{\LibConstRef{SHMEM\_SESSION\_DELIVERY\_RATE}}{ \vspace{2mm} - The value of the \VAR{completion\_rate} member of the \VAR{config} + The value of the \VAR{delivery\_rate} member of the \VAR{config} structure is unmasked within the session and applied as a hint. } @@ -81,7 +81,7 @@ implementation. } - \apitablerow{\VAR{completion\_rate} = \CONST{SIZE\_MAX}}{ + \apitablerow{\VAR{delivery\_rate} = \CONST{SIZE\_MAX}}{ By default, the number of expected number of \openshmem RMA calls to buffer, combine, and/or coalesce is set to the upper bound of a \VAR{size\_t} variable, \VAR{SIZE\_MAX}. This is a representative @@ -90,4 +90,17 @@ } } +\apinotes{ + Users are discouraged from calling \FUNC{shmem\_fence}, + \FUNC{shmem\_cxt\_fence}, \FUNC{shmem\_quiet}, or + \FUNC{shmem\_ctx\_quiet} routines within a session when tolerant, because + the library must impose strict completions to comply with ordering + semantics. Users are instead encouraged to provide the \FUNC{total\_ops} + and \FUNC{delivery\_rate} hints to influence how the library pipelines + batched operations (see \LibConstRef{SHMEM\_SESSION\_BATCH} option to + \FUNC{shmem\_session\_start} in \ref{subsec:shmem_session_start}). + However, only quiet and fence operations guarantee that in-use data + buffers can be updated after previously invoked RMA routines. +} + \end{apidefinition} diff --git a/content/shmem_session_start.tex b/content/shmem_session_start.tex index 180d7c74..36d276a9 100644 --- a/content/shmem_session_start.tex +++ b/content/shmem_session_start.tex @@ -95,12 +95,12 @@ \LibConstRef{SHMEM\_SESSION\_TOTAL\_OPS} indicates the expected maximum number of calls to \openshmem RMA routines within the session. - The \VAR{completion\_rate} field of \VAR{config} with mask value - \LibConstRef{SHMEM\_SESSION\_COMPLETION\_RATE} indicates a desired - number of \openshmem RMA operations to buffer, combine, and/or coalesce - before the library enforces local completion of all pending RMA routines - in the session. See Section~\ref{subsec:shmem_session_config_t} for - details about these parameters. + The \VAR{delivery\_rate} field of \VAR{config} with mask value + \LibConstRef{SHMEM\_SESSION\_DELIVERY\_RATE} indicates a desired number + of \openshmem RMA operations to buffer, combine, and/or coalesce before + the library delivers all pending RMA routines in the session to the + network. See Section~\ref{subsec:shmem_session_config_t} for details + about these parameters. } \hline \sessiontablerow{\LibConstRef{SHMEM\_SESSION\_SAME\_AMO}}{ @@ -112,7 +112,7 @@ The same members of the \VAR{config} structure that apply to the \LibConstRef{SHMEM\_SESSION\_BATCH} option (\VAR{total\_ops} and - \VAR{completion\_rate}) also apply to the + \VAR{delivery\_rate}) also apply to the \LibConstRef{SHMEM\_SESSION\_SAME\_AMO} option. See Section~\ref{subsec:shmem_session_config_t} for details about these parameters. From 933f8ffcd61ffa62b9462424ec0fe61542591e61 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Wed, 10 Apr 2024 13:26:54 -0400 Subject: [PATCH 26/30] sessions: remove delivery_rate config_t parameter --- content/shmem_session_config_t.tex | 56 ++++++++---------------------- content/shmem_session_start.tex | 12 ++----- 2 files changed, 18 insertions(+), 50 deletions(-) diff --git a/content/shmem_session_config_t.tex b/content/shmem_session_config_t.tex index 2d748567..250c061a 100644 --- a/content/shmem_session_config_t.tex +++ b/content/shmem_session_config_t.tex @@ -7,7 +7,6 @@ \begin{Csynopsis} typedef struct { size_t total_ops; - size_t delivery_rate; } shmem_session_config_t; \end{Csynopsis} @@ -29,22 +28,11 @@ calls to \openshmem RMA routines within the session (i.e., after a call to \FUNC{shmem\_session\_start} and before a corresponding call to \FUNC{shmem\_session\_stop}). - If \VAR{total\_ops} is lower than the actual number of calls to \openshmem - RMA routines within the session, then application performance might be - suboptimal; however, the result of any data transfers, completions, or - memory ordering operations are unaffected by the value of - \FUNC{total\_ops}. - - The \VAR{delivery\_rate} member indicates the expected number of - \openshmem RMA operations to buffer, combine, and/or coalesce (see - \LibConstRef{SHMEM\_SESSION\_BATCH} option to - \FUNC{shmem\_session\_start} within \ref{subsec:shmem_session_start}) before - the library delivers all pending RMA routines in the - session to the network. - The value of \VAR{delivery\_rate} is expected to be less than the value - of \VAR{total\_ops}; however, the result of any data transfers, + If \VAR{total\_ops} is lower than the \textit{actual} number of calls to + \openshmem RMA routines within the session, then application performance + might be suboptimal; however, the result of any data transfers, completions, or memory ordering operations are unaffected by the value of - \FUNC{delivery\_rate}. + \FUNC{total\_ops}. When using the configuration structure to start a communication session, a mask parameter controls which fields are accessed by the \openshmem @@ -56,8 +44,8 @@ A configuration mask is created through a bitwise OR operation of the following library constants. - A configuration mask value of \CONST{0} indicates that the team - should be created with the default values for all configuration + A configuration mask value of \CONST{0} indicates that the session + should be started with the default values for all configuration parameters. \apitablerow{\LibConstRef{SHMEM\_SESSION\_TOTAL\_OPS}}{ @@ -65,12 +53,6 @@ unmasked within the session and applied as a hint. } - \apitablerow{\LibConstRef{SHMEM\_SESSION\_DELIVERY\_RATE}}{ - \vspace{2mm} - The value of the \VAR{delivery\_rate} member of the \VAR{config} - structure is unmasked within the session and applied as a hint. - } - The default values for configuration parameters are: \apitablerow{\VAR{total\_ops} = \CONST{SIZE\_MAX}}{ @@ -80,27 +62,19 @@ \openshmem library is free to select any value appropriate for the implementation. } - - \apitablerow{\VAR{delivery\_rate} = \CONST{SIZE\_MAX}}{ - By default, the number of expected number of \openshmem RMA calls to - buffer, combine, and/or coalesce is set to the upper bound of a - \VAR{size\_t} variable, \VAR{SIZE\_MAX}. This is a representative - constant that indicates the \openshmem library is free to select any - value appropriate for the implementation. - } } \apinotes{ Users are discouraged from calling \FUNC{shmem\_fence}, - \FUNC{shmem\_cxt\_fence}, \FUNC{shmem\_quiet}, or - \FUNC{shmem\_ctx\_quiet} routines within a session when tolerant, because - the library must impose strict completions to comply with ordering - semantics. Users are instead encouraged to provide the \FUNC{total\_ops} - and \FUNC{delivery\_rate} hints to influence how the library pipelines - batched operations (see \LibConstRef{SHMEM\_SESSION\_BATCH} option to - \FUNC{shmem\_session\_start} in \ref{subsec:shmem_session_start}). - However, only quiet and fence operations guarantee that in-use data - buffers can be updated after previously invoked RMA routines. + \FUNC{shmem\_cxt\_fence}, \FUNC{shmem\_quiet}, or \FUNC{shmem\_ctx\_quiet} + routines within a session whenever possible, because the library must + impose strict completions to comply with ordering semantics. + However, hints provided by \FUNC{shmem\_session\_config\_t} do not imply + the occurence of any completion or memory ordering operations. + The requirements on buffers provided to \openshmem routines that are + \textit{in-use} (as described in Section + \ref{subsec:invoking_openshmem_operations}) apply regardless of any + \FUNC{shmem\_session\_config\_t} hints. } \end{apidefinition} diff --git a/content/shmem_session_start.tex b/content/shmem_session_start.tex index 36d276a9..838cb19b 100644 --- a/content/shmem_session_start.tex +++ b/content/shmem_session_start.tex @@ -94,13 +94,8 @@ The \VAR{total\_ops} field of \VAR{config} with mask value \LibConstRef{SHMEM\_SESSION\_TOTAL\_OPS} indicates the expected maximum number of calls to \openshmem RMA routines within the session. - - The \VAR{delivery\_rate} field of \VAR{config} with mask value - \LibConstRef{SHMEM\_SESSION\_DELIVERY\_RATE} indicates a desired number - of \openshmem RMA operations to buffer, combine, and/or coalesce before - the library delivers all pending RMA routines in the session to the - network. See Section~\ref{subsec:shmem_session_config_t} for details - about these parameters. + See Section~\ref{subsec:shmem_session_config_t} for details + about \VAR{shmem\_session\_config\_t} parameters. } \hline \sessiontablerow{\LibConstRef{SHMEM\_SESSION\_SAME\_AMO}}{ @@ -111,8 +106,7 @@ (Tables \ref{stdamotypes} and \ref{extamotypes}). The same members of the \VAR{config} structure that apply to the - \LibConstRef{SHMEM\_SESSION\_BATCH} option (\VAR{total\_ops} and - \VAR{delivery\_rate}) also apply to the + \LibConstRef{SHMEM\_SESSION\_BATCH} option also apply to the \LibConstRef{SHMEM\_SESSION\_SAME\_AMO} option. See Section~\ref{subsec:shmem_session_config_t} for details about these parameters. From aa84edeb9cba7b8ce80c21885e115e5b48146ba9 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 12 Apr 2024 14:04:30 -0400 Subject: [PATCH 27/30] sessions: improve language regarding config_t hint --- content/shmem_session_config_t.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_session_config_t.tex b/content/shmem_session_config_t.tex index 250c061a..5982da7b 100644 --- a/content/shmem_session_config_t.tex +++ b/content/shmem_session_config_t.tex @@ -28,7 +28,7 @@ calls to \openshmem RMA routines within the session (i.e., after a call to \FUNC{shmem\_session\_start} and before a corresponding call to \FUNC{shmem\_session\_stop}). - If \VAR{total\_ops} is lower than the \textit{actual} number of calls to + If \VAR{total\_ops} differs from the \textit{actual} number of calls to \openshmem RMA routines within the session, then application performance might be suboptimal; however, the result of any data transfers, completions, or memory ordering operations are unaffected by the value of From 3fc9bf3f0defdf9796caec4b86ae94116442b68d Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 9 May 2024 11:26:08 -0400 Subject: [PATCH 28/30] sessions: improve writing, fix example, SAME_AMO --- content/sessions_intro.tex | 6 ++++- content/shmem_session_config_t.tex | 17 ++++++------- content/shmem_session_start.tex | 37 ++++++++++++++++++---------- content/shmem_session_stop.tex | 2 +- content/shmem_team_config_t.tex | 4 +-- example_code/shmem_session_example.c | 8 +++--- utils/defs.tex | 2 +- 7 files changed, 46 insertions(+), 30 deletions(-) diff --git a/content/sessions_intro.tex b/content/sessions_intro.tex index 73ae017e..0e1aebdc 100644 --- a/content/sessions_intro.tex +++ b/content/sessions_intro.tex @@ -10,6 +10,10 @@ which patterns of \openshmem RMA and AMO routines will occur within a session. These options serve only as \textit{hints} to the library; it is up to the implementation whether or not to apply any optimizations within a session. +A session may be provided a configuration argument that specifies attributes +assosiated with the session. This configuration argument is of type +\CTYPE{shmem\_session\_config\_t}, which is detailed further in +Section~\ref{subsec:shmem_team_config_t}. Usage of the \openshmem session APIs on a particular context must comply with the requirements of all options set on that context. @@ -22,5 +26,5 @@ routines not performed on a communication context (like collective routines) are ineligible for session hints. -The \FUNC{shmem\_config\_t} object requires the \CONST{SIZE\_MAX} macro +The \FUNC{shmem\_session\_config\_t} object requires the \CONST{SIZE\_MAX} macro defined in \HEADER{stdint.h} by \Cstd[99]~\S7.18.3 and \Cstd[11]~\S7.20.3. diff --git a/content/shmem_session_config_t.tex b/content/shmem_session_config_t.tex index 5982da7b..5472fd1b 100644 --- a/content/shmem_session_config_t.tex +++ b/content/shmem_session_config_t.tex @@ -34,9 +34,9 @@ completions, or memory ordering operations are unaffected by the value of \FUNC{total\_ops}. - When using the configuration structure to start a communication session, a - mask parameter controls which fields are accessed by the \openshmem - library. + When passing a configuration structure to \FUNC{shmem\_session\_start}, the + mask parameter specifies which fields the application requests to associate + with the session. Any configuration parameter value that is not indicated in the mask will be ignored, and the default value will be used instead. Therefore, a program must set only the fields for which it does not want @@ -56,17 +56,16 @@ The default values for configuration parameters are: \apitablerow{\VAR{total\_ops} = \CONST{SIZE\_MAX}}{ - By default, the number of expected maximum number of calls to \openshmem - RMA routines is set to the upper bound of a \VAR{size\_t} variable, - \VAR{SIZE\_MAX}. This is a representative constant that indicates the - \openshmem library is free to select any value appropriate for the - implementation. + By default, the expected maximum number of calls to \openshmem RMA routines + in the session is set to the maximum value of a \VAR{size\_t} variable, + \VAR{SIZE\_MAX}. This default setting indicates that the \openshmem + application chooses not to specify a value for \VAR{total\_ops}. } } \apinotes{ Users are discouraged from calling \FUNC{shmem\_fence}, - \FUNC{shmem\_cxt\_fence}, \FUNC{shmem\_quiet}, or \FUNC{shmem\_ctx\_quiet} + \FUNC{shmem\_ctx\_fence}, \FUNC{shmem\_quiet}, or \FUNC{shmem\_ctx\_quiet} routines within a session whenever possible, because the library must impose strict completions to comply with ordering semantics. However, hints provided by \FUNC{shmem\_session\_config\_t} do not imply diff --git a/content/shmem_session_start.tex b/content/shmem_session_start.tex index 838cb19b..c2286ffe 100644 --- a/content/shmem_session_start.tex +++ b/content/shmem_session_start.tex @@ -91,25 +91,36 @@ memory ordering routines might require the library to enforce completions, reducing the potential benefit of batching. - The \VAR{total\_ops} field of \VAR{config} with mask value - \LibConstRef{SHMEM\_SESSION\_TOTAL\_OPS} indicates the expected maximum + The \VAR{total\_ops} field of \VAR{config} indicates the expected maximum number of calls to \openshmem RMA routines within the session. See Section~\ref{subsec:shmem_session_config_t} for details about \VAR{shmem\_session\_config\_t} parameters. } \hline \sessiontablerow{\LibConstRef{SHMEM\_SESSION\_SAME\_AMO}}{ - The session will contain a batch (as defined by the - \VAR{SHMEM\_SESSION\_BATCH} option) of only AMOs that will not occur - concurrently across any different signal operators - (i.e.~\ref{subsec:signal_operator}), operations (\ref{sec:amo}), or types - (Tables \ref{stdamotypes} and \ref{extamotypes}). - - The same members of the \VAR{config} structure that apply to the - \LibConstRef{SHMEM\_SESSION\_BATCH} option also apply to the - \LibConstRef{SHMEM\_SESSION\_SAME\_AMO} option. See - Section~\ref{subsec:shmem_session_config_t} for details about these - parameters. + The \VAR{SHMEM\_SESSION\_SAME\_AMO} hint indicates the session will contain + a series of calls to AMO and/or signaling routines that do not differ in + their signal operators (see Section \ref{subsec:signal_operator}), atomic + operations (see Section \ref{sec:amo}), or datatypes (see + Tables~\ref{stdamotypes} and \ref{extamotypes}). + For example, this hint would apply to a session that includes \textit{only} + calls to \FUNC{shmem\_int\_atomic\_inc}. + However, this hint would not apply to a session that includes both calls to + \FUNC{shmem\_int\_atomic\_inc} and \FUNC{shmem\_int\_atomic\_fetch}, + because the operation \textit{fetch} differs from \textit{increment}. + (Similarly, this hint would not apply to a session that includes both calls to + \FUNC{shmem\_int\_atomic\_inc} and \FUNC{shmem\_long\_atomic\_inc}, + because the datatype \textit{long} differs from \textit{int}.) + The \VAR{SHMEM\_SESSION\_SAME\_AMO} hint is applicable to sessions that + exclusively use \textit{either} the \VAR{SHMEM\_SIGNAL\_SET} or the + \VAR{SHMEM\_SIGNAL\_ADD} operators in signaling operations, but not both. + This hint does not restrict the application from calling other (non-atomic) + RMA routines within the session. + + The \VAR{total\_ops} field of \VAR{config} indicates the expected maximum + number of calls to \openshmem RMA routines within the session. + See Section~\ref{subsec:shmem_session_config_t} for details about + \VAR{shmem\_session\_config\_t} parameters. } \hline \sessiontableend diff --git a/content/shmem_session_stop.tex b/content/shmem_session_stop.tex index f39cb784..9ace13b5 100644 --- a/content/shmem_session_stop.tex +++ b/content/shmem_session_stop.tex @@ -33,7 +33,7 @@ \begin{apiexamples} \apicexample - {The following \Cstd[11] program demonstrates the usage of + {The following \CorCpp{} program demonstrates the usage of \FUNC{shmem\_session\_start} and \FUNC{shmem\_session\_stop} with a loop of random atomic non-fetching XOR updates to a distributed table, similar to the HPC Challenge RandomAccess GUPS (Giga-updates per second) benchmark diff --git a/content/shmem_team_config_t.tex b/content/shmem_team_config_t.tex index a82ce2c2..dd2ad01b 100644 --- a/content/shmem_team_config_t.tex +++ b/content/shmem_team_config_t.tex @@ -32,8 +32,8 @@ See Section~\ref{sec:ctx} for more on communication contexts and Section~\ref{subsec:shmem_team_create_ctx} for team-based context creation. - When using the configuration structure to create teams, a mask parameter - controls which fields may be accessed by the \openshmem library. + When passing a configuration structure to a team creation routine, the mask parameter + specifies which fields the application requests to associate with the new team. Any configuration parameter value that is not indicated in the mask will be ignored, and the default value will be used instead. Therefore, a program must set only the fields for which it does not want the default value. diff --git a/example_code/shmem_session_example.c b/example_code/shmem_session_example.c index 045d0ba0..93e4414f 100644 --- a/example_code/shmem_session_example.c +++ b/example_code/shmem_session_example.c @@ -1,6 +1,7 @@ #include #include #include +#include #define N_UPDATES (1lu << 18) #define N_INDICES (1lu << 10) @@ -23,18 +24,19 @@ int main(void) { shmem_global_exit(1); } - shmem_config_t config; + shmem_session_config_t config; long config_mask; config.total_ops = N_UPDATES; config_mask = SHMEM_SESSION_TOTAL_OPS; + long options = SHMEM_SESSION_BATCH | SHMEM_SESSION_SAME_AMO; - shmem_session_start(ctx, SHMEM_SESSION_SAME_AMO, config, config_mask); + shmem_session_start(ctx, options, &config, config_mask); for (size_t i = 0; i < N_UPDATES; i++) { int random_pe = rand() % npes; size_t random_idx = rand() % N_INDICES; uint64_t random_val = rand() % N_VALUES; - shmem_uint64_atomic_xor(ctx, &table[random_idx], random_val, random_pe); + shmem_ctx_uint64_atomic_xor(ctx, &table[random_idx], random_val, random_pe); } shmem_session_stop(ctx); diff --git a/utils/defs.tex b/utils/defs.tex index b429e663..b21649f2 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -373,7 +373,7 @@ } \newcommand{\sessiontablebegin} { -\begin{table}[h] +\begin{table}[h!] \hspace{-0.5cm} \begin{tabular}{|p{4.8cm}|p{12cm}|} \hline From 26f3cc04cac8583e3917e625e8ad800f7528ccb7 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 9 May 2024 16:57:16 -0400 Subject: [PATCH 29/30] sessions: rename entire API to shmem_ctx_session_* --- content/sessions_intro.tex | 15 +++---- ...g_t.tex => shmem_ctx_session_config_t.tex} | 24 +++++------ ..._start.tex => shmem_ctx_session_start.tex} | 42 +++++++++---------- ...on_stop.tex => shmem_ctx_session_stop.tex} | 12 +++--- ..._example.c => shmem_ctx_session_example.c} | 14 +++---- main_spec.tex | 12 +++--- utils/defs.tex | 10 ++++- 7 files changed, 68 insertions(+), 61 deletions(-) rename content/{shmem_session_config_t.tex => shmem_ctx_session_config_t.tex} (78%) rename content/{shmem_session_start.tex => shmem_ctx_session_start.tex} (77%) rename content/{shmem_session_stop.tex => shmem_ctx_session_stop.tex} (69%) rename example_code/{shmem_session_example.c => shmem_ctx_session_example.c} (70%) diff --git a/content/sessions_intro.tex b/content/sessions_intro.tex index 0e1aebdc..8e4d4ab8 100644 --- a/content/sessions_intro.tex +++ b/content/sessions_intro.tex @@ -4,15 +4,15 @@ A session is associated with a specific \openshmem communication context (Section~\ref{sec:ctx}), and it indicates the beginning and ending of communication phases on that context. -The \FUNC{shmem\_session\_start} routine indicates the beginning of a session, -and the \FUNC{shmem\_session\_stop} routine indicates the end of a session. -The \LibConstRef{SHMEM\_SESSION\_*} options (Table~\ref{session_opts}) indicate +The \FUNC{shmem\_ctx\_session\_start} routine indicates the beginning of a session, +and the \FUNC{shmem\_ctx\_session\_stop} routine indicates the end of a session. +The \LibConstRef{SHMEM\_CTX\_SESSION\_*} options (Table~\ref{session_opts}) indicate which patterns of \openshmem RMA and AMO routines will occur within a session. These options serve only as \textit{hints} to the library; it is up to the implementation whether or not to apply any optimizations within a session. A session may be provided a configuration argument that specifies attributes -assosiated with the session. This configuration argument is of type -\CTYPE{shmem\_session\_config\_t}, which is detailed further in +associated with the session. This configuration argument is of type +\CTYPE{shmem\_ctx\_session\_config\_t}, which is detailed further in Section~\ref{subsec:shmem_team_config_t}. Usage of the \openshmem session APIs on a particular context must comply with @@ -26,5 +26,6 @@ routines not performed on a communication context (like collective routines) are ineligible for session hints. -The \FUNC{shmem\_session\_config\_t} object requires the \CONST{SIZE\_MAX} macro -defined in \HEADER{stdint.h} by \Cstd[99]~\S7.18.3 and \Cstd[11]~\S7.20.3. +The \FUNC{shmem\_ctx\_session\_config\_t} object requires the \CONST{SIZE\_MAX} +macro defined in \HEADER{stdint.h} by \Cstd[99]~\S7.18.3 and +\Cstd[11]~\S7.20.3. diff --git a/content/shmem_session_config_t.tex b/content/shmem_ctx_session_config_t.tex similarity index 78% rename from content/shmem_session_config_t.tex rename to content/shmem_ctx_session_config_t.tex index 5472fd1b..11adff1f 100644 --- a/content/shmem_session_config_t.tex +++ b/content/shmem_ctx_session_config_t.tex @@ -7,7 +7,7 @@ \begin{Csynopsis} typedef struct { size_t total_ops; -} shmem_session_config_t; +} shmem_ctx_session_config_t; \end{Csynopsis} \begin{apiarguments} @@ -17,8 +17,8 @@ \apidescription{ A communication session configuration object is provided as an argument to - the \FUNC{shmem\_session\_start} routine. - The \VAR{shmem\_session\_config\_t} object contains optional parameters + the \FUNC{shmem\_ctx\_session\_start} routine. + The \VAR{shmem\_ctx\_session\_config\_t} object contains optional parameters that are associated with the options of a communication session. These parameters serve only as \textit{hints} to the library; it is up to the implementation whether or not to use the parameter values within @@ -26,17 +26,17 @@ The \VAR{total\_ops} member indicates the expected maximum number of all calls to \openshmem RMA routines within the session (i.e., after a call to - \FUNC{shmem\_session\_start} and before a corresponding call to - \FUNC{shmem\_session\_stop}). + \FUNC{shmem\_ctx\_session\_start} and before a corresponding call to + \FUNC{shmem\_ctx\_session\_stop}). If \VAR{total\_ops} differs from the \textit{actual} number of calls to \openshmem RMA routines within the session, then application performance might be suboptimal; however, the result of any data transfers, completions, or memory ordering operations are unaffected by the value of \FUNC{total\_ops}. - When passing a configuration structure to \FUNC{shmem\_session\_start}, the - mask parameter specifies which fields the application requests to associate - with the session. + When passing a configuration structure to \FUNC{shmem\_ctx\_session\_start}, + the mask parameter specifies which fields the application requests to + associate with the session. Any configuration parameter value that is not indicated in the mask will be ignored, and the default value will be used instead. Therefore, a program must set only the fields for which it does not want @@ -48,14 +48,14 @@ should be started with the default values for all configuration parameters. - \apitablerow{\LibConstRef{SHMEM\_SESSION\_TOTAL\_OPS}}{ + \widetablerow{\LibConstRef{SHMEM\_CTX\_SESSION\_TOTAL\_OPS}}{ The value of the \VAR{total\_ops} member of the \VAR{config} structure is unmasked within the session and applied as a hint. } The default values for configuration parameters are: - \apitablerow{\VAR{total\_ops} = \CONST{SIZE\_MAX}}{ + \widetablerow{\VAR{total\_ops} = \CONST{SIZE\_MAX}}{ By default, the expected maximum number of calls to \openshmem RMA routines in the session is set to the maximum value of a \VAR{size\_t} variable, \VAR{SIZE\_MAX}. This default setting indicates that the \openshmem @@ -68,12 +68,12 @@ \FUNC{shmem\_ctx\_fence}, \FUNC{shmem\_quiet}, or \FUNC{shmem\_ctx\_quiet} routines within a session whenever possible, because the library must impose strict completions to comply with ordering semantics. - However, hints provided by \FUNC{shmem\_session\_config\_t} do not imply + However, hints provided by \FUNC{shmem\_ctx\_session\_config\_t} do not imply the occurence of any completion or memory ordering operations. The requirements on buffers provided to \openshmem routines that are \textit{in-use} (as described in Section \ref{subsec:invoking_openshmem_operations}) apply regardless of any - \FUNC{shmem\_session\_config\_t} hints. + \FUNC{shmem\_ctx\_session\_config\_t} hints. } \end{apidefinition} diff --git a/content/shmem_session_start.tex b/content/shmem_ctx_session_start.tex similarity index 77% rename from content/shmem_session_start.tex rename to content/shmem_ctx_session_start.tex index c2286ffe..9f7c6bb4 100644 --- a/content/shmem_session_start.tex +++ b/content/shmem_ctx_session_start.tex @@ -5,7 +5,7 @@ \begin{apidefinition} \begin{Csynopsis} -void @\FuncDecl{shmem\_session\_start}@(shmem_ctx_t ctx, long options, const shmem_session_config_t *config, long config_mask); +void @\FuncDecl{shmem\_ctx\_session\_start}@(shmem_ctx_t ctx, long options, const shmem_ctx_session_config_t *config, long config_mask); \end{Csynopsis} \begin{apiarguments} @@ -23,34 +23,34 @@ \end{apiarguments} \apidescription{ - \FUNC{shmem\_session\_start} is a non-collective routine that begins a + \FUNC{shmem\_ctx\_session\_start} is a non-collective routine that begins a session on communication context \VAR{ctx} with hints requested via \VAR{options}. Sessions on a communication context must be stopped with a call to - \FUNC{shmem\_session\_stop} on the same context. + \FUNC{shmem\_ctx\_session\_stop} on the same context. If a session is already started on a given context, another call to - \FUNC{shmem\_session\_start} on that same context combines new options + \FUNC{shmem\_ctx\_session\_start} on that same context combines new options via a bitwise OR operation. In such a case, unmasked member values in the \VAR{config} argument replace any existing configuration values that are already applied to the session. If \VAR{ctx} compares equal to \LibConstRef{SHMEM\_CTX\_INVALID} then - \FUNC{shmem\_session\_start} performs no action and returns immediately. + \FUNC{shmem\_ctx\_session\_start} performs no action and returns immediately. - No combination of \VAR{options} passed to \FUNC{shmem\_session\_start} + No combination of \VAR{options} passed to \FUNC{shmem\_ctx\_session\_start} results in undefined behavior, but some combinations may be detrimental for performance; for example, when selecting an option that is not applicable to the session. It is the user's responsibility to determine which combination of \VAR{options} benefits the performance of the session. The \VAR{config} argument specifies session configuration parameters, - which are described in Section~\ref{subsec:shmem_session_config_t}. + which are described in Section~\ref{subsec:shmem_ctx_session_config_t}. The \VAR{config\_mask} argument is a bitwise mask representing the set of configuration parameters to use from \VAR{config}. A \VAR{config\_mask} value of \CONST{0} indicates that the session should be started with the default values for all configuration parameters. - See Section~\ref{subsec:shmem_session_config_t} for field mask names and + See Section~\ref{subsec:shmem_ctx_session_config_t} for field mask names and default configuration parameters. } @@ -60,11 +60,11 @@ \sessiontablebegin -\sessiontablerow{\LibConstRef{SHMEM\_SESSION\_BATCH}}{ +\sessiontablerow{\LibConstRef{SHMEM\_CTX\_SESSION\_BATCH}}{ A \textit{batch} is a series of calls to \openshmem routines that occur within a session on a communication context (i.e., after a call to - \FUNC{shmem\_session\_start} and before a corresponding call to - \FUNC{shmem\_session\_stop}), that might tolerate an increase in + \FUNC{shmem\_ctx\_session\_start} and before a corresponding call to + \FUNC{shmem\_ctx\_session\_stop}), that might tolerate an increase in individual call latencies. Designating a batch may provide an opportunity to decrease the overall overhead typically involved with the \openshmem library implementing the series as individual RMA operations. In other @@ -75,7 +75,7 @@ routines. The specific mechanisms for improving performance using batching optimizations depend on the \openshmem library implementation. - The \VAR{SHMEM\_SESSION\_BATCH} hint indicates that a communication + The \VAR{SHMEM\_CTX\_SESSION\_BATCH} hint indicates that a communication context will be used to issue a batch. An example of a batch is an iterative loop of non-blocking RMA and/or AMO routines. A batch may include a memory ordering or collective operation, but such routines @@ -93,12 +93,12 @@ The \VAR{total\_ops} field of \VAR{config} indicates the expected maximum number of calls to \openshmem RMA routines within the session. - See Section~\ref{subsec:shmem_session_config_t} for details - about \VAR{shmem\_session\_config\_t} parameters. + See Section~\ref{subsec:shmem_ctx_session_config_t} for details + about \VAR{shmem\_ctx\_session\_config\_t} parameters. } \hline -\sessiontablerow{\LibConstRef{SHMEM\_SESSION\_SAME\_AMO}}{ - The \VAR{SHMEM\_SESSION\_SAME\_AMO} hint indicates the session will contain +\sessiontablerow{\LibConstRef{SHMEM\_CTX\_SESSION\_SAME\_AMO}}{ + The \VAR{SHMEM\_CTX\_SESSION\_SAME\_AMO} hint indicates the session will contain a series of calls to AMO and/or signaling routines that do not differ in their signal operators (see Section \ref{subsec:signal_operator}), atomic operations (see Section \ref{sec:amo}), or datatypes (see @@ -111,7 +111,7 @@ (Similarly, this hint would not apply to a session that includes both calls to \FUNC{shmem\_int\_atomic\_inc} and \FUNC{shmem\_long\_atomic\_inc}, because the datatype \textit{long} differs from \textit{int}.) - The \VAR{SHMEM\_SESSION\_SAME\_AMO} hint is applicable to sessions that + The \VAR{SHMEM\_CTX\_SESSION\_SAME\_AMO} hint is applicable to sessions that exclusively use \textit{either} the \VAR{SHMEM\_SIGNAL\_SET} or the \VAR{SHMEM\_SIGNAL\_ADD} operators in signaling operations, but not both. This hint does not restrict the application from calling other (non-atomic) @@ -119,17 +119,17 @@ The \VAR{total\_ops} field of \VAR{config} indicates the expected maximum number of calls to \openshmem RMA routines within the session. - See Section~\ref{subsec:shmem_session_config_t} for details about - \VAR{shmem\_session\_config\_t} parameters. + See Section~\ref{subsec:shmem_ctx_session_config_t} for details about + \VAR{shmem\_ctx\_session\_config\_t} parameters. } \hline \sessiontableend \apinotes{ - The \FUNC{shmem\_session\_start} routine provides hints for improving + The \FUNC{shmem\_ctx\_session\_start} routine provides hints for improving performance, and \openshmem implementations are not required to apply any optimization. - \FUNC{shmem\_session\_start} is non-collective, so there is no implied + \FUNC{shmem\_ctx\_session\_start} is non-collective, so there is no implied synchronization. Blocking puts must be sufficiently small to benefit from batching, and the exact threshold for this benefit depends on the \openshmem implemenation diff --git a/content/shmem_session_stop.tex b/content/shmem_ctx_session_stop.tex similarity index 69% rename from content/shmem_session_stop.tex rename to content/shmem_ctx_session_stop.tex index 9ace13b5..fc45fda8 100644 --- a/content/shmem_session_stop.tex +++ b/content/shmem_ctx_session_stop.tex @@ -5,7 +5,7 @@ \begin{apidefinition} \begin{Csynopsis} -void @\FuncDecl{shmem\_session\_stop}@(shmem_ctx_t ctx); +void @\FuncDecl{shmem\_ctx\_session\_stop}@(shmem_ctx_t ctx); \end{Csynopsis} \begin{apiarguments} @@ -14,9 +14,9 @@ \end{apiarguments} \apidescription{ - The \FUNC{shmem\_session\_stop} routine ends a session on context \VAR{ctx}. + The \FUNC{shmem\_ctx\_session\_stop} routine ends a session on context \VAR{ctx}. If a session is already stopped on a given communication context, another - call to \FUNC{shmem\_session\_stop} on that context has no effect. + call to \FUNC{shmem\_ctx\_session\_stop} on that context has no effect. } \apireturnvalues{ @@ -26,7 +26,7 @@ \apinotes{ Users are discouraged from including non-\openshmem code, such as a long computation loop, within a session without first calling - \FUNC{shmem\_session\_stop}. + \FUNC{shmem\_ctx\_session\_stop}. } @@ -34,11 +34,11 @@ \apicexample {The following \CorCpp{} program demonstrates the usage of - \FUNC{shmem\_session\_start} and \FUNC{shmem\_session\_stop} with a loop of + \FUNC{shmem\_ctx\_session\_start} and \FUNC{shmem\_ctx\_session\_stop} with a loop of random atomic non-fetching XOR updates to a distributed table, similar to the HPC Challenge RandomAccess GUPS (Giga-updates per second) benchmark \footnote{http://icl.cs.utk.edu/projectsfiles/hpcc/RandomAccess/}.} - {./example_code/shmem_session_example.c} + {./example_code/shmem_ctx_session_example.c} {} \end{apiexamples} diff --git a/example_code/shmem_session_example.c b/example_code/shmem_ctx_session_example.c similarity index 70% rename from example_code/shmem_session_example.c rename to example_code/shmem_ctx_session_example.c index 93e4414f..45f036ff 100644 --- a/example_code/shmem_session_example.c +++ b/example_code/shmem_ctx_session_example.c @@ -24,13 +24,13 @@ int main(void) { shmem_global_exit(1); } - shmem_session_config_t config; + shmem_ctx_session_config_t config; long config_mask; config.total_ops = N_UPDATES; - config_mask = SHMEM_SESSION_TOTAL_OPS; - long options = SHMEM_SESSION_BATCH | SHMEM_SESSION_SAME_AMO; + config_mask = SHMEM_CTX_SESSION_TOTAL_OPS; + long options = SHMEM_CTX_SESSION_BATCH | SHMEM_CTX_SESSION_SAME_AMO; - shmem_session_start(ctx, options, &config, config_mask); + shmem_ctx_session_start(ctx, options, &config, config_mask); for (size_t i = 0; i < N_UPDATES; i++) { int random_pe = rand() % npes; @@ -39,9 +39,9 @@ int main(void) { shmem_ctx_uint64_atomic_xor(ctx, &table[random_idx], random_val, random_pe); } - shmem_session_stop(ctx); - shmem_ctx_quiet(ctx); /* shmem_session_stop() does not quiet the context. */ - shmem_sync_all(); /* shmem_session_stop() does not synchronize. */ + shmem_ctx_session_stop(ctx); + shmem_ctx_quiet(ctx); /* shmem_ctx_session_stop() does not quiet the context. */ + shmem_sync_all(); /* shmem_ctx_session_stop() does not synchronize. */ /* At this point, it is safe to check and/or validate the table result... */ diff --git a/main_spec.tex b/main_spec.tex index c81de0cd..2eb31c02 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -360,14 +360,14 @@ \subsubsection{\textbf{SHMEM\_SIGNAL\_FETCH}}\label{subsec:shmem_signal_fetch} \subsection{Session Routines}\label{subsec:sessions} \input{content/sessions_intro.tex} -\subsubsection{\textbf{SHMEM\_SESSION\_CONFIG\_T}}\label{subsec:shmem_session_config_t} -\input{content/shmem_session_config_t.tex} +\subsubsection{\textbf{SHMEM\_CTX\_SESSION\_CONFIG\_T}}\label{subsec:shmem_ctx_session_config_t} +\input{content/shmem_ctx_session_config_t.tex} -\subsubsection{\textbf{SHMEM\_SESSION\_START}}\label{subsec:shmem_session_start} -\input{content/shmem_session_start.tex} +\subsubsection{\textbf{SHMEM\_CTX\_SESSION\_START}}\label{subsec:shmem_ctx_session_start} +\input{content/shmem_ctx_session_start.tex} -\subsubsection{\textbf{SHMEM\_SESSION\_STOP}}\label{subsec:shmem_session_stop} -\input{content/shmem_session_stop.tex} +\subsubsection{\textbf{SHMEM\_CTX\_SESSION\_STOP}}\label{subsec:shmem_ctx_session_stop} +\input{content/shmem_ctx_session_stop.tex} \subsection{Collective Routines}\label{subsec:coll} diff --git a/utils/defs.tex b/utils/defs.tex index b21649f2..771ba8a7 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -372,10 +372,16 @@ \end{tabular}\\ } +\newcommand{\widetablerow}[2]{ + \begin{tabular}{p{6cm} p{8cm}} + #1 & #2 \tabularnewline + \end{tabular}\\ +} + \newcommand{\sessiontablebegin} { \begin{table}[h!] -\hspace{-0.5cm} -\begin{tabular}{|p{4.8cm}|p{12cm}|} +\hspace{-1.0cm} +\begin{tabular}{|p{5.6cm}|p{12cm}|} \hline \textbf{Option} & \textbf{Usage hint} \tabularnewline \hline From e22a3f1503d9ff97487888900522c86a67a27342 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 9 May 2024 17:06:33 -0400 Subject: [PATCH 30/30] sessions: remove SESSION_SAME_AMO hint for now... --- content/shmem_ctx_session_start.tex | 26 ------------------------ example_code/shmem_ctx_session_example.c | 3 +-- 2 files changed, 1 insertion(+), 28 deletions(-) diff --git a/content/shmem_ctx_session_start.tex b/content/shmem_ctx_session_start.tex index 9f7c6bb4..7c771d24 100644 --- a/content/shmem_ctx_session_start.tex +++ b/content/shmem_ctx_session_start.tex @@ -97,32 +97,6 @@ about \VAR{shmem\_ctx\_session\_config\_t} parameters. } \hline -\sessiontablerow{\LibConstRef{SHMEM\_CTX\_SESSION\_SAME\_AMO}}{ - The \VAR{SHMEM\_CTX\_SESSION\_SAME\_AMO} hint indicates the session will contain - a series of calls to AMO and/or signaling routines that do not differ in - their signal operators (see Section \ref{subsec:signal_operator}), atomic - operations (see Section \ref{sec:amo}), or datatypes (see - Tables~\ref{stdamotypes} and \ref{extamotypes}). - For example, this hint would apply to a session that includes \textit{only} - calls to \FUNC{shmem\_int\_atomic\_inc}. - However, this hint would not apply to a session that includes both calls to - \FUNC{shmem\_int\_atomic\_inc} and \FUNC{shmem\_int\_atomic\_fetch}, - because the operation \textit{fetch} differs from \textit{increment}. - (Similarly, this hint would not apply to a session that includes both calls to - \FUNC{shmem\_int\_atomic\_inc} and \FUNC{shmem\_long\_atomic\_inc}, - because the datatype \textit{long} differs from \textit{int}.) - The \VAR{SHMEM\_CTX\_SESSION\_SAME\_AMO} hint is applicable to sessions that - exclusively use \textit{either} the \VAR{SHMEM\_SIGNAL\_SET} or the - \VAR{SHMEM\_SIGNAL\_ADD} operators in signaling operations, but not both. - This hint does not restrict the application from calling other (non-atomic) - RMA routines within the session. - - The \VAR{total\_ops} field of \VAR{config} indicates the expected maximum - number of calls to \openshmem RMA routines within the session. - See Section~\ref{subsec:shmem_ctx_session_config_t} for details about - \VAR{shmem\_ctx\_session\_config\_t} parameters. - } \hline - \sessiontableend \apinotes{ diff --git a/example_code/shmem_ctx_session_example.c b/example_code/shmem_ctx_session_example.c index 45f036ff..8c96f49f 100644 --- a/example_code/shmem_ctx_session_example.c +++ b/example_code/shmem_ctx_session_example.c @@ -28,9 +28,8 @@ int main(void) { long config_mask; config.total_ops = N_UPDATES; config_mask = SHMEM_CTX_SESSION_TOTAL_OPS; - long options = SHMEM_CTX_SESSION_BATCH | SHMEM_CTX_SESSION_SAME_AMO; - shmem_ctx_session_start(ctx, options, &config, config_mask); + shmem_ctx_session_start(ctx, SHMEM_CTX_SESSION_BATCH, &config, config_mask); for (size_t i = 0; i < N_UPDATES; i++) { int random_pe = rand() % npes;