Skip to content

Commit

Permalink
[SYCL][Docs] Allow copy-construction of device_global (intel#15075)
Browse files Browse the repository at this point in the history
This commit makes it possible to copy-construct device_global variables
if they do not have the device_image_scope property. The restriction on
device_image_scope is due to static construction not being allowed in
device code, which they would require, while other device_globals have
USM storage which will be initialized by the host code, so the
constructor on the device is a simple zero-initialization.

---------

Signed-off-by: Larsen, Steffen <[email protected]>
Co-authored-by: John Pennycook <[email protected]>
  • Loading branch information
steffenlarsen and Pennycook authored Oct 31, 2024
1 parent 5d5ec9e commit e6e45d0
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,8 @@ public:
// device_global initializes underlying T with the args argument
#if __cpp_consteval
// Available only if sizeof...(Args) > 1 or the one argument in args is not a
// device_global.
template <typename... Args>
consteval explicit device_global(Args&&... args);
#else
Expand All @@ -244,7 +246,14 @@ public:
device_global() = default;
#endif // __cpp_consteval
device_global(const device_global &) = delete;
// Available if PropertyListT::has_property<device_image_scope_key>() is false.
constexpr device_global(const device_global &other);
// Available if PropertyListT::has_property<device_image_scope_key>() is false
// and OtherT is convertible to T.
template <typename OtherT, typename OtherProps>
constexpr device_global(const device_global<OtherT, OtherProps> &other);
device_global(const device_global &&) = delete;
device_global &operator=(const device_global &) = delete;
device_global &operator=(const device_global &&) = delete;
Expand Down Expand Up @@ -318,12 +327,42 @@ template <typename... Args>
consteval explicit device_global(Args&&... args);
----
|
Available only if sizeof...(Args) != 1 or the one argument in args is not a device_global.

Constructs a `device_global` object, and implicit storage for `T` in the global address space on each device that may access it.

The object of type `T` is initialized from the `args` parameter pack using list initialization as defined in the {cpp} specification.

`T` must be trivially destructible.

// --- ROW BREAK ---
a|
[source,c++]
----
constexpr device_global(const device_global &other);
----
|
Available if `PropertyListT::has_property<device_image_scope_key>() == false`.

Constructs a `device_global` object, and implicit storage for `T` in the global address space on each device that may access it.

The storage on each device for `T` is initialized with a copy of the initial value of `other`. The behavior is undefined if `other` has been written to prior to a call to this constructor.

// --- ROW BREAK ---
a|
[source,c++]
----
template <typename OtherT, typename OtherProps>
constexpr device_global(const device_global<OtherT, OtherProps> &other);
----
|
Available if `PropertyListT::has_property<device_image_scope_key>() == false` and
`std::is_convertible_v<OtherT, T> == true`;

Constructs a `device_global` object, and implicit storage for `T` in the global address space on each device that may access it.

The storage on each device for `T` is initialized with a copy of the initial value of `other`. The behavior is undefined if `other` has been written to prior to a call to this constructor.

// --- ROW BREAK ---
a|
[source,c++]
Expand Down
78 changes: 73 additions & 5 deletions sycl/include/sycl/ext/oneapi/device_global/device_global.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ namespace sycl {
inline namespace _V1 {
namespace ext::oneapi::experimental {

template <typename T, typename PropertyListT> class device_global;

namespace detail {
// Type-trait for checking if a type defines `operator->`.
template <typename T, typename = void>
Expand All @@ -49,6 +51,20 @@ struct HasArrowOperator<T,
std::void_t<decltype(std::declval<T>().operator->())>>
: std::true_type {};

template <typename T, typename PropertyListT, typename>
class device_global_base;

// Checks that T is a reference to either device_global or
// device_global_base. This is used by the variadic ctor to allow copy ctors to
// take preference.
template <typename T> struct IsDeviceGlobalOrBaseRef : std::false_type {};
template <typename T, typename PropertyListT>
struct IsDeviceGlobalOrBaseRef<device_global_base<T, PropertyListT, void> &>
: std::true_type {};
template <typename T, typename PropertyListT>
struct IsDeviceGlobalOrBaseRef<device_global<T, PropertyListT> &>
: std::true_type {};

// Base class for device_global.
template <typename T, typename PropertyListT, typename = void>
class device_global_base {
Expand All @@ -63,14 +79,49 @@ class device_global_base {
pointer_t get_ptr() noexcept { return usmptr; }
pointer_t get_ptr() const noexcept { return usmptr; }

template <typename, typename, typename> friend class device_global_base;

#ifndef __SYCL_DEVICE_ONLY__
template <typename OtherT, typename OtherProps>
static constexpr const T &
ExtractInitialVal(const device_global_base<OtherT, OtherProps> &Other) {
if constexpr (OtherProps::template has_property<device_image_scope_key>())
return Other.val;
else
return Other.init_val;
}
#endif // __SYCL_DEVICE_ONLY__

public:
#if __cpp_consteval
template <typename... Args>
// The SFINAE is to allow the copy constructors to take priority.
template <
typename... Args,
std::enable_if_t<
sizeof...(Args) != 1 ||
(!IsDeviceGlobalOrBaseRef<std::remove_cv_t<Args>>::value && ...),
int> = 0>
consteval explicit device_global_base(Args &&...args) : init_val{args...} {}
#else
device_global_base() = default;
#endif // __cpp_consteval

#ifndef __SYCL_DEVICE_ONLY__
template <typename OtherT, typename OtherProps,
typename = std::enable_if_t<std::is_convertible_v<OtherT, T>>>
constexpr device_global_base(
const device_global_base<OtherT, OtherProps> &DGB)
: init_val{ExtractInitialVal(DGB)} {}
constexpr device_global_base(const device_global_base &DGB)
: init_val{DGB.init_val} {}
#else
template <typename OtherT, typename OtherProps,
typename = std::enable_if_t<std::is_convertible_v<OtherT, T>>>
constexpr device_global_base(const device_global_base<OtherT, OtherProps> &) {
}
constexpr device_global_base(const device_global_base &) {}
#endif // __SYCL_DEVICE_ONLY__

template <access::decorated IsDecorated>
multi_ptr<T, access::address_space::global_space, IsDecorated>
get_multi_ptr() noexcept {
Expand Down Expand Up @@ -100,14 +151,28 @@ class device_global_base<
T *get_ptr() noexcept { return &val; }
const T *get_ptr() const noexcept { return &val; }

template <typename, typename, typename> friend class device_global_base;

public:
#if __cpp_consteval
template <typename... Args>
// The SFINAE is to allow the copy constructors to take priority.
template <
typename... Args,
std::enable_if_t<
sizeof...(Args) != 1 ||
(!IsDeviceGlobalOrBaseRef<std::remove_cv_t<Args>>::value && ...),
int> = 0>
consteval explicit device_global_base(Args &&...args) : val{args...} {}
#else
device_global_base() = default;
#endif // __cpp_consteval

template <typename OtherT, typename OtherProps,
typename = std::enable_if_t<std::is_convertible_v<OtherT, T>>>
constexpr device_global_base(const device_global_base<OtherT, OtherProps> &) =
delete;
constexpr device_global_base(const device_global_base &) = delete;

template <access::decorated IsDecorated>
multi_ptr<T, access::address_space::global_space, IsDecorated>
get_multi_ptr() noexcept {
Expand All @@ -124,6 +189,7 @@ class device_global_base<
const T>(this->get_ptr());
}
};

} // namespace detail

template <typename T, typename PropertyListT = empty_properties_t>
Expand Down Expand Up @@ -151,6 +217,7 @@ class
: public detail::device_global_base<T, detail::properties_t<Props...>> {

using property_list_t = detail::properties_t<Props...>;
using base_t = detail::device_global_base<T, property_list_t>;

public:
using element_type = std::remove_extent_t<T>;
Expand All @@ -167,10 +234,11 @@ class
"Property list is invalid.");

// Inherit the base class' constructors
using detail::device_global_base<
T, detail::properties_t<Props...>>::device_global_base;
using detail::device_global_base<T, property_list_t>::device_global_base;

constexpr device_global(const device_global &DG)
: base_t(static_cast<const base_t &>(DG)) {}

device_global(const device_global &) = delete;
device_global(const device_global &&) = delete;
device_global &operator=(const device_global &) = delete;
device_global &operator=(const device_global &&) = delete;
Expand Down
68 changes: 68 additions & 0 deletions sycl/test-e2e/DeviceGlobal/device_global_copy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// RUN: %{build} -std=c++23 -o %t.out
// RUN: %{run} %t.out
//
// UNSUPPORTED: opencl && gpu
// UNSUPPORTED-TRACKER: GSD-4287
//
// Tests the copy ctor on device_global without device_image_scope.

#include <sycl/detail/core.hpp>

namespace oneapiext = sycl::ext::oneapi::experimental;

oneapiext::device_global<const int> DGInit1{3};
oneapiext::device_global<const int> DGCopy1{DGInit1};

oneapiext::device_global<int> DGInit2{4};
oneapiext::device_global<int> DGCopy2{DGInit2};

oneapiext::device_global<float> DGInit3{5.0f};
oneapiext::device_global<int> DGCopy3{DGInit3};

oneapiext::device_global<const int, decltype(oneapiext::properties{
oneapiext::device_image_scope})>
DGInit4{6};
oneapiext::device_global<const int> DGCopy4{DGInit4};

oneapiext::device_global<const int> DGInit5{7};
oneapiext::device_global<const int, decltype(oneapiext::properties{
oneapiext::host_access_read})>
DGCopy5{DGInit5};

int main() {
sycl::queue Q;

int ReadVals[10] = {0, 0};
{
sycl::buffer<int, 1> ReadValsBuff{ReadVals, 10};

Q.submit([&](sycl::handler &CGH) {
sycl::accessor ReadValsAcc{ReadValsBuff, CGH, sycl::write_only};
CGH.single_task([=]() {
ReadValsAcc[0] = DGInit1.get();
ReadValsAcc[1] = DGCopy1.get();
ReadValsAcc[2] = DGInit2.get();
ReadValsAcc[3] = DGCopy2.get();
ReadValsAcc[4] = DGInit3.get();
ReadValsAcc[5] = DGCopy3.get();
ReadValsAcc[6] = DGInit4.get();
ReadValsAcc[7] = DGCopy4.get();
ReadValsAcc[8] = DGInit5.get();
ReadValsAcc[9] = DGCopy5.get();
});
}).wait_and_throw();
}

assert(ReadVals[0] == 3);
assert(ReadVals[1] == 3);
assert(ReadVals[2] == 4);
assert(ReadVals[3] == 4);
assert(ReadVals[4] == 5);
assert(ReadVals[5] == 5);
assert(ReadVals[6] == 6);
assert(ReadVals[7] == 6);
assert(ReadVals[8] == 7);
assert(ReadVals[9] == 7);

return 0;
}
29 changes: 29 additions & 0 deletions sycl/test/extensions/device_global/device_global_copy_negative.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// RUN: %clangxx -std=c++23 -fsycl -fsycl-targets=%sycl_triple -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note %s
//
// Tests that the copy ctor on device_global with device_image_scope is
// unavailable.

#include <sycl/sycl.hpp>

namespace oneapiext = sycl::ext::oneapi::experimental;

using device_image_properties =
decltype(oneapiext::properties{oneapiext::device_image_scope});

// expected-error@sycl/ext/oneapi/device_global/device_global.hpp:* {{call to deleted constructor}}
oneapiext::device_global<const int, device_image_properties> DGInit1{3};
oneapiext::device_global<const int, device_image_properties> DGCopy1{DGInit1};

// expected-error@sycl/ext/oneapi/device_global/device_global.hpp:* {{call to deleted constructor}}
oneapiext::device_global<int, device_image_properties> DGInit2{3};
oneapiext::device_global<int, device_image_properties> DGCopy2{DGInit2};

// expected-error@+2 {{call to deleted constructor}}
oneapiext::device_global<int, device_image_properties> DGInit3{3};
oneapiext::device_global<float, device_image_properties> DGCopy3{DGInit3};

// expected-error@+2 {{call to deleted constructor}}
oneapiext::device_global<const int> DGInit4{3};
oneapiext::device_global<const int, device_image_properties> DGCopy4{DGInit4};

int main() { return 0; }

0 comments on commit e6e45d0

Please sign in to comment.