Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not expose Celerity internals in installed headers #308

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 45 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -206,15 +206,54 @@ set(CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS ON)
set(CELERITY_FEATURE_LOCAL_ACCESSOR ON)
set(CELERITY_FEATURE_UNNAMED_KERNELS ON)

# Add includes to library so they show up in IDEs
file(GLOB_RECURSE INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/include/*.h")
# Add header files to library so they show up in IDEs
file(GLOB_RECURSE ALL_INCLUDES
"${CMAKE_CURRENT_SOURCE_DIR}/include/*.h"
"${CMAKE_CURRENT_SOURCE_DIR}/src/*.h"
"${CMAKE_CURRENT_SOURCE_DIR}/src/*.inl")

if(CMAKE_GENERATOR STREQUAL "Ninja")
# Force colored warnings in Ninja's output, if the compiler has -fdiagnostics-color support.
# Rationale in https://github.com/ninja-build/ninja/issues/814
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
endif()

# TODO move all non-public headers from "include" to "src"
set(PUBLIC_HEADERS
include/access_modes.h
include/accessor.h
include/buffer.h
include/celerity.h
include/cgf_diagnostics.h
include/cgf.h
include/closure_hydrator.h
include/config.h
include/debug.h
include/device_selector.h
include/distr_queue.h
include/fence.h
include/grid.h
include/handler.h
include/hint.h
include/host_object.h
include/host_utils.h
include/item.h
include/log.h
include/partition.h
include/print_utils.h
include/queue.h
include/range_mapper.h
include/ranges.h
include/reduction.h
include/runtime.h
include/side_effect.h
include/sycl_wrappers.h
include/tracy.h
include/types.h
include/utils.h
include/workaround.h
)

set(SOURCES
src/affinity.cc
src/config.cc
Expand Down Expand Up @@ -268,13 +307,14 @@ elseif(CELERITY_SYCL_IMPL STREQUAL "SimSYCL")
endif()

configure_file(include/version.h.in include/version.h @ONLY)
list(APPEND INCLUDES "${CMAKE_CURRENT_BINARY_DIR}/include/version.h")
list(APPEND ALL_INCLUDES "${CMAKE_CURRENT_BINARY_DIR}/include/version.h")
list(APPEND PUBLIC_HEADERS "${CMAKE_CURRENT_BINARY_DIR}/include/version.h")

add_library(
celerity_runtime
STATIC
${SOURCES}
${INCLUDES}
${ALL_INCLUDES}
)

set_property(TARGET celerity_runtime PROPERTY CXX_STANDARD "${CELERITY_CXX_STANDARD}")
Expand Down Expand Up @@ -394,9 +434,8 @@ include(CMakePackageConfigHelpers)

# Install celerity
install(
DIRECTORY ${PROJECT_SOURCE_DIR}/include/
FILES ${PUBLIC_HEADERS}
DESTINATION include/celerity
PATTERN *.in EXCLUDE
)
install(
DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/
Expand Down
2 changes: 1 addition & 1 deletion ci/find-unformatted-files.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ if [[ ! -x "$(which clang-format)" ]]; then
exit 1
fi

SOURCES=$(find examples include src test \( -name "*.h" -o -name "*.cc" \) ! -name "stb*")
SOURCES=$(find examples include src test \( -name "*.h" -o -name "*.cc" -o -name "*.inl" \) ! -name "stb*")

for s in $SOURCES; do
# Since clang-format does not provide an option to check whether formatting is required,
Expand Down
4 changes: 2 additions & 2 deletions include/backend/backend.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#pragma once

#include "async_event.h"
#include "cgf.h"
#include "closure_hydrator.h"
#include "grid.h"
#include "launcher.h"
#include "nd_memory.h"
PeterTh marked this conversation as resolved.
Show resolved Hide resolved
#include "types.h"
PeterTh marked this conversation as resolved.
Show resolved Hide resolved

Expand Down Expand Up @@ -58,7 +58,7 @@ class backend {
/// Enqueues the asynchronous execution of a host task in a background thread identified by `host_lane`. The operation will complete in-order with respect
/// to any other asynchronous host operation on `host_lane`.
virtual async_event enqueue_host_task(size_t host_lane, const host_task_launcher& launcher, std::vector<closure_hydrator::accessor_info> accessor_infos,
const box<3>& execution_range, const communicator* collective_comm) = 0;
const range<3>& global_range, const box<3>& execution_range, const communicator* collective_comm) = 0;

/// Enqueues the asynchronous execution of a kernel in an in-order device queue identified by `device` and `device_lane`. The operation will complete
/// in-order with respect to any other asynchronous device operation on `device` and `device_lane`.
Expand Down
6 changes: 3 additions & 3 deletions include/backend/sycl_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

#include "async_event.h"
#include "backend/backend.h"
#include "cgf.h"
#include "closure_hydrator.h"
#include "grid.h"
#include "launcher.h"
#include "nd_memory.h"
#include "types.h"

Expand Down Expand Up @@ -110,7 +110,7 @@ class sycl_backend : public backend {
async_event enqueue_device_free(device_id device, void* ptr) override;

async_event enqueue_host_task(size_t host_lane, const host_task_launcher& launcher, std::vector<closure_hydrator::accessor_info> accessor_infos,
const box<3>& execution_range, const communicator* collective_comm) override;
const range<3>& global_range, const box<3>& execution_range, const communicator* collective_comm) override;

async_event enqueue_device_kernel(device_id device, size_t device_lane, const device_kernel_launcher& launcher,
std::vector<closure_hydrator::accessor_info> accessor_infos, const box<3>& execution_range, const std::vector<void*>& reduction_ptrs) override;
Expand Down Expand Up @@ -158,7 +158,7 @@ class sycl_cuda_backend final : public sycl_backend {
enum class sycl_backend_type { generic, cuda };

/// Enumerates the SYCL backends devices are compatible with and that Celerity has been compiled with.
/// This type implements the (nameless) concept accepted by `pick_devices`.
/// This type implements the (nameless) concept accepted by `select_devices`.
struct sycl_backend_enumerator {
using backend_type = sycl_backend_type;
using device_type = sycl::device;
Expand Down
1 change: 1 addition & 0 deletions include/celerity.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "debug.h"
#include "distr_queue.h"
#include "host_utils.h"
#include "log.h"
PeterTh marked this conversation as resolved.
Show resolved Hide resolved
#include "queue.h"
#include "side_effect.h"
#include "version.h"
Expand Down
72 changes: 72 additions & 0 deletions include/cgf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#pragma once

#include "grid.h"
#include "hint.h"
#include "range_mapper.h"
#include "ranges.h"
#include "reduction.h"
PeterTh marked this conversation as resolved.
Show resolved Hide resolved
#include "sycl_wrappers.h"
#include "types.h"
PeterTh marked this conversation as resolved.
Show resolved Hide resolved

#include <functional>
#include <memory>
#include <optional>
#include <variant>
PeterTh marked this conversation as resolved.
Show resolved Hide resolved
PeterTh marked this conversation as resolved.
Show resolved Hide resolved
PeterTh marked this conversation as resolved.
Show resolved Hide resolved
#include <vector>

#include <sycl/sycl.hpp>
PeterTh marked this conversation as resolved.
Show resolved Hide resolved

namespace celerity::detail {

class communicator;

struct task_geometry {
int dimensions = 0;
range<3> global_size{1, 1, 1};
id<3> global_offset;
PeterTh marked this conversation as resolved.
Show resolved Hide resolved
range<3> granularity{1, 1, 1};
};

struct buffer_access {
buffer_id bid = -1;
access_mode mode = access_mode::atomic;
std::unique_ptr<range_mapper_base> range_mapper;
};

struct host_object_effect {
host_object_id hoid = -1;
experimental::side_effect_order order = experimental::side_effect_order::sequential;
};

using device_kernel_launcher = std::function<void(sycl::handler& sycl_cgh, const box<3>& execution_range, const std::vector<void*>& reduction_ptrs)>;
using host_task_launcher = std::function<void(const range<3>& global_range, const box<3>& execution_range, const communicator* collective_comm)>;
using command_group_launcher = std::variant<device_kernel_launcher, host_task_launcher>;

/// Captures the raw contents of a command group function (CGF) invocation as recorded by `celerity::handler`.
/// This is passed on to `task_manager`, which validates the structure and turns it into a TDAG node.
struct raw_command_group {
std::optional<detail::task_type> task_type; ///< nullopt until a kernel or host task has been submitted
std::vector<buffer_access> buffer_accesses;
std::vector<host_object_effect> side_effects;
std::vector<reduction_info> reductions;
std::optional<detail::collective_group_id> collective_group_id;
std::optional<task_geometry> geometry;
std::optional<detail::command_group_launcher> launcher;
std::optional<std::string> task_name;
std::vector<std::unique_ptr<detail::hint_base>> hints;
};

class task_promise {
public:
task_promise() = default;
task_promise(const task_promise&) = delete;
task_promise(task_promise&&) = delete;
task_promise& operator=(const task_promise&) = delete;
task_promise& operator=(task_promise&&) = delete;
virtual ~task_promise() = default;

virtual void fulfill() = 0;
virtual allocation_id get_user_allocation_id() = 0; // TODO move to struct task instead
};

} // namespace celerity::detail
4 changes: 3 additions & 1 deletion include/cgf_diagnostics.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#pragma once

#include "task.h"
#include "cgf.h"
#include "print_utils.h"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ misc-include-cleaner ⚠️
included header print_utils.h is not used directly

Suggested change
#include "print_utils.h"


#include <optional>

#include <fmt/format.h>

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ misc-include-cleaner ⚠️
included header format.h is not used directly

Suggested change
#include <fmt/format.h>


namespace celerity::detail {

Expand Down
3 changes: 2 additions & 1 deletion include/command_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include "intrusive_graph.h"
#include "ranges.h"
#include "reduction.h"
#include "task.h"
#include "types.h"

#include <cstddef>
Expand All @@ -19,6 +18,8 @@

namespace celerity::detail {

class task;

class command : public intrusive_graph_node<command>,
// Accept visitors to enable matchbox::match() on the command inheritance hierarchy
public matchbox::acceptor<class epoch_command, class horizon_command, class execution_command, class push_command, class await_push_command,
Expand Down
2 changes: 0 additions & 2 deletions include/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ namespace detail {
size_t local_rank = -1;
};

enum class tracy_mode { off, fast, full };

class config {
public:
/**
Expand Down
15 changes: 15 additions & 0 deletions include/device_selector.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#pragma once

#include <functional>
#include <variant>
#include <vector>

#include <sycl/sycl.hpp>

namespace celerity::detail {

struct auto_select_devices {};
using device_selector = std::function<int(const sycl::device&)>;
using devices_or_selector = std::variant<auto_select_devices, std::vector<sycl::device>, device_selector>;

} // namespace celerity::detail
6 changes: 4 additions & 2 deletions include/distr_queue.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#pragma once

#include "buffer.h"
#include "device_selection.h"
#include "device_selector.h"
#include "fence.h"
#include "handler.h"
#include "host_object.h"
#include "ranges.h"
#include "runtime.h"
Expand Down Expand Up @@ -69,7 +70,8 @@ class [[deprecated("Use celerity::queue instead")]] distr_queue {
void submit(CGF cgf) { // NOLINT(readability-convert-member-functions-to-static)
// (Note while this function could be made static, it must not be! Otherwise we can't be sure the runtime has been initialized.)
CELERITY_DETAIL_TRACY_ZONE_SCOPED("distr_queue::submit", Orange3);
[[maybe_unused]] const auto tid = detail::runtime::get_instance().submit(std::move(cgf));
auto cg = detail::invoke_command_group_function(std::move(cgf));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ clang-diagnostic-error ⚠️
no member named invoke_command_group_function in namespace celerity::detail

[[maybe_unused]] const auto tid = detail::runtime::get_instance().submit(std::move(cg));
CELERITY_DETAIL_TRACY_ZONE_NAME("T{} submit", tid);
}

Expand Down
17 changes: 7 additions & 10 deletions include/fence.h
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#pragma once

#include "buffer.h"
#include "cgf.h"
#include "host_object.h"
#include "range_mapper.h"
#include "runtime.h"
#include "sycl_wrappers.h"
#include "task.h"
#include "task_manager.h"
#include "tracy.h"
#include "utils.h"

#include <future>
#include <memory>
Expand Down Expand Up @@ -113,11 +113,10 @@ std::future<T> fence(const experimental::host_object<T>& obj) {
static_assert(std::is_object_v<T>, "host_object<T&> and host_object<void> are not allowed as parameters to fence()");
CELERITY_DETAIL_TRACY_ZONE_SCOPED("queue::fence", Green2);

detail::side_effect_map side_effects;
side_effects.add_side_effect(detail::get_host_object_id(obj), experimental::side_effect_order::sequential);
const host_object_effect effect{detail::get_host_object_id(obj), experimental::side_effect_order::sequential};

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ misc-include-cleaner ⚠️
no header providing "celerity::experimental::side_effect_order" is directly included

auto promise = std::make_unique<detail::host_object_fence_promise<T>>(detail::get_host_object_instance(obj));
auto future = promise->get_future();
[[maybe_unused]] const auto tid = detail::runtime::get_instance().fence({}, std::move(side_effects), std::move(promise));
[[maybe_unused]] const auto tid = detail::runtime::get_instance().fence(effect, std::move(promise));

CELERITY_DETAIL_TRACY_ZONE_NAME("T{} fence", tid);
return future;
Expand All @@ -127,13 +126,11 @@ template <typename DataT, int Dims>
std::future<buffer_snapshot<DataT, Dims>> fence(const buffer<DataT, Dims>& buf, const subrange<Dims>& sr) {
CELERITY_DETAIL_TRACY_ZONE_SCOPED("queue::fence", Green2);

std::vector<detail::buffer_access> accesses;
accesses.push_back(detail::buffer_access{detail::get_buffer_id(buf), access_mode::read,
std::make_unique<detail::range_mapper<Dims, celerity::access::fixed<Dims>>>(celerity::access::fixed<Dims>(sr), buf.get_range())});
detail::buffer_access access{detail::get_buffer_id(buf), access_mode::read,
std::make_unique<detail::range_mapper<Dims, celerity::access::fixed<Dims>>>(celerity::access::fixed<Dims>(sr), buf.get_range())};
auto promise = std::make_unique<detail::buffer_fence_promise<DataT, Dims>>(sr);
auto future = promise->get_future();
[[maybe_unused]] const auto tid =
detail::runtime::get_instance().fence(detail::buffer_access_map(std::move(accesses), detail::task_geometry{}), {}, std::move(promise));
[[maybe_unused]] const auto tid = detail::runtime::get_instance().fence(std::move(access), std::move(promise));

CELERITY_DETAIL_TRACY_ZONE_NAME("T{} fence", tid);
return future;
Expand Down
Loading