From 3aea8103026f74296e028a5d253d969a46ae22c6 Mon Sep 17 00:00:00 2001 From: Luc Grosheintz Date: Thu, 25 Jul 2024 14:25:54 +0200 Subject: [PATCH 1/2] Optimize chained hyperslab selection. A common pattern for creating semi-unstructured selection is to use many (small) RegularHyperSlab and chain them: ``` HyperSlab hyperslab; for(auto slab : regular_hyper_slabs) { hyperslab |= slab; } ``` This eventually triggers calling: ``` for(auto slab : regular_hyper_slabs) { auto [offset, stride, counts, blocks] = slab; H5Sselect_hyperslab(space_id, offset, stride, counts, block); } ``` Measurements show that this has runtime that's quadratic in the number of regular hyper slabs. This starts becoming prohibitive at 10k - 40k slabs. We noticed that `H5Scombine_select` does not suffer from the same performance issue. This allows us to optimize (long) chain of `Op::Or` using divide and conquer. The current implementation only optimizes streaks of `Op::Or`. --- include/highfive/bits/H5Slice_traits.hpp | 122 ++++++++++++++++++++--- include/highfive/bits/h5s_wrapper.hpp | 21 ++++ tests/unit/test_high_five_selection.cpp | 57 +++++++++++ 3 files changed, 186 insertions(+), 14 deletions(-) diff --git a/include/highfive/bits/H5Slice_traits.hpp b/include/highfive/bits/H5Slice_traits.hpp index 6812a0914..ed10aa23c 100644 --- a/include/highfive/bits/H5Slice_traits.hpp +++ b/include/highfive/bits/H5Slice_traits.hpp @@ -162,20 +162,7 @@ class HyperSlab { } DataSpace apply(const DataSpace& space_) const { - auto space = space_.clone(); - for (const auto& sel: selects) { - if (sel.op == Op::None) { - detail::h5s_select_none(space.getId()); - } else { - detail::h5s_select_hyperslab(space.getId(), - convert(sel.op), - sel.offset.empty() ? nullptr : sel.offset.data(), - sel.stride.empty() ? nullptr : sel.stride.data(), - sel.count.empty() ? nullptr : sel.count.data(), - sel.block.empty() ? nullptr : sel.block.data()); - } - } - return space; + return apply_impl(space_); } private: @@ -229,6 +216,113 @@ class HyperSlab { }; std::vector selects; + + protected: + DataSpace select_none(const DataSpace& outer_space) const { + auto space = outer_space.clone(); + detail::h5s_select_none(space.getId()); + return space; + } + + void select_hyperslab(DataSpace& space, const Select_& sel) const { + detail::h5s_select_hyperslab(space.getId(), + convert(sel.op), + sel.offset.empty() ? nullptr : sel.offset.data(), + sel.stride.empty() ? nullptr : sel.stride.data(), + sel.count.empty() ? nullptr : sel.count.data(), + sel.block.empty() ? nullptr : sel.block.data()); + } + +#if H5_VERSION_GE(1, 10, 6) + /// The length of a stream of `Op::Or` starting at `begin`. + size_t detect_streak(Select_ const* begin, Select_ const* end, Op op) const { + assert(op == Op::Or); + auto it = std::find_if(begin, end, [op](const Select_& sel) { return sel.op != op; }); + return static_cast(it - begin); + } + + DataSpace combine_selections(const DataSpace& left_space, + Op op, + const DataSpace& right_space) const { + return detail::make_data_space( + H5Scombine_select(left_space.getId(), convert(op), right_space.getId())); + } + + /// Reduce a sequence of `Op::Or` efficiently. + /// + /// The issue is that `H5Sselect_hyperslab` runs in time that linear of the + /// number of block in the existing selection. Therefore, a loop that adds + /// slab-by-slab has quadratic runtime in the number of slabs. + /// + /// Fortunately, `H5Scombine_select` doesn't suffer from the same problem. + /// However, it's only available in 1.10.6 and newer. + /// + /// The solution is to use divide-and-conquer to reduce (long) streaks of + /// `Op::Or` in what seems to be log-linear time. + DataSpace reduce_streak(const DataSpace& outer_space, + Select_ const* begin, + Select_ const* end, + Op op) const { + assert(op == Op::Or); + + if (begin == end) { + throw std::runtime_error("Broken logic in 'DataSpace::reduce_streak'."); + } + + std::ptrdiff_t distance = end - begin; + if (distance == 1) { + auto space = select_none(outer_space); + select_hyperslab(space, *begin); + return space; + } + + Select_ const* mid = begin + distance / 2; + auto right_space = reduce_streak(outer_space, begin, mid, op); + auto left_space = reduce_streak(outer_space, mid, end, op); + + return combine_selections(left_space, op, right_space); + } + + DataSpace apply_impl(const DataSpace& space_) const { + auto space = space_.clone(); + auto n_selects = selects.size(); + for (size_t i = 0; i < n_selects; ++i) { + auto begin = selects.data() + i; + auto end = selects.data() + n_selects; + + auto n_ors = detect_streak(begin, end, Op::Or); + + if (n_ors > 1) { + auto right_space = reduce_streak(space_, begin, begin + n_ors, Op::Or); + // Since HDF5 doesn't allow `combine_selections` with a None + // selection, we need to avoid the issue: + if (detail::h5s_get_select_type(space.getId()) == H5S_SEL_NONE) { + space = right_space; + } else { + space = combine_selections(space, Op::Or, right_space); + } + i += n_ors - 1; + } else if (selects[i].op == Op::None) { + detail::h5s_select_none(space.getId()); + } else { + select_hyperslab(space, selects[i]); + } + } + return space; + } +#else + DataSpace apply_impl(const DataSpace& space_) const { + auto space = space_.clone(); + for (const auto& sel: selects) { + if (sel.op == Op::None) { + detail::h5s_select_none(space.getId()); + } else { + select_hyperslab(space, sel); + } + } + return space; + } +#endif }; /// diff --git a/include/highfive/bits/h5s_wrapper.hpp b/include/highfive/bits/h5s_wrapper.hpp index 03edf8005..60a7974ca 100644 --- a/include/highfive/bits/h5s_wrapper.hpp +++ b/include/highfive/bits/h5s_wrapper.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include namespace HighFive { namespace detail { @@ -110,6 +111,26 @@ inline H5S_class_t h5s_get_simple_extent_type(hid_t space_id) { return cls; } +inline H5S_sel_type h5s_get_select_type(hid_t space_id) { + H5S_sel_type type = H5Sget_select_type(space_id); + if (type < 0) { + HDF5ErrMapper::ToException("Unable to get type of selection."); + } + + return type; +} + +#if H5_VERSION_GE(1, 10, 6) +hid_t h5s_combine_select(hid_t space1_id, H5S_seloper_t op, hid_t space2_id) { + auto space_id = H5Scombine_select(space1_id, op, space2_id); + if (space_id == H5I_INVALID_HID) { + HDF5ErrMapper::ToException("Unable to combine two selections."); + } + + return space_id; +} +#endif + } // namespace detail } // namespace HighFive diff --git a/tests/unit/test_high_five_selection.cpp b/tests/unit/test_high_five_selection.cpp index 586af0b17..1d6006f4c 100644 --- a/tests/unit/test_high_five_selection.cpp +++ b/tests/unit/test_high_five_selection.cpp @@ -25,6 +25,7 @@ #include #include "tests_high_five.hpp" +#include "data_generator.hpp" using namespace HighFive; using Catch::Matchers::Equals; @@ -534,3 +535,59 @@ void irregularHyperSlabSelectionWriteTest() { TEMPLATE_LIST_TEST_CASE("irregularHyperSlabSelectionWrite", "[template]", std::tuple) { irregularHyperSlabSelectionWriteTest(); } + +TEST_CASE("select_multiple_ors", "[hyperslab]") { + size_t n = 100, m = 20; + size_t nsel = 30; + auto x = testing::DataGenerator>>::create({n, m}); + + auto file = File("select_multiple_ors.h5", File::Truncate); + auto dset = file.createDataSet("x", x); + + std::vector> indices; + auto hyperslab = HyperSlab(); + for (size_t i = 0; i < nsel; ++i) { + std::vector offsets{i, i % 10}; + std::vector counts{1, 3}; + hyperslab |= RegularHyperSlab(offsets, counts); + + for (size_t k = 0; k < counts[1]; ++k) { + indices.push_back({offsets[0], offsets[1] + k}); + } + } + + SECTION("Pure Or Chain") { + auto selected = dset.select(hyperslab).read>(); + REQUIRE(selected.size() == indices.size()); + for (size_t k = 0; k < selected.size(); ++k) { + size_t i = indices[k][0]; + size_t j = indices[k][1]; + REQUIRE(selected[k] == x[i][j]); + } + } + + SECTION("Or Chain And Slab") { + std::vector offsets{5, 2}; + std::vector counts{85, 12}; + + std::vector> selected_indices; + for (const auto ij: indices) { + std::array ij_max = {offsets[0] + counts[0], offsets[1] + counts[1]}; + + if (offsets[0] <= ij[0] && ij[0] < ij_max[0] && offsets[1] <= ij[1] && + ij[1] < ij_max[1]) { + selected_indices.push_back(ij); + } + } + + hyperslab &= RegularHyperSlab(offsets, counts); + + auto selected = dset.select(hyperslab).read>(); + REQUIRE(selected.size() == selected_indices.size()); + for (size_t k = 0; k < selected.size(); ++k) { + size_t i = selected_indices[k][0]; + size_t j = selected_indices[k][1]; + REQUIRE(selected[k] == x[i][j]); + } + } +} From 94727b8f0319411e0a01857b0fd563cfe8eebc51 Mon Sep 17 00:00:00 2001 From: Luc Grosheintz Date: Fri, 26 Jul 2024 11:21:19 +0200 Subject: [PATCH 2/2] Missing inline. --- include/highfive/bits/h5s_wrapper.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/highfive/bits/h5s_wrapper.hpp b/include/highfive/bits/h5s_wrapper.hpp index 60a7974ca..250ffcf6d 100644 --- a/include/highfive/bits/h5s_wrapper.hpp +++ b/include/highfive/bits/h5s_wrapper.hpp @@ -121,7 +121,7 @@ inline H5S_sel_type h5s_get_select_type(hid_t space_id) { } #if H5_VERSION_GE(1, 10, 6) -hid_t h5s_combine_select(hid_t space1_id, H5S_seloper_t op, hid_t space2_id) { +inline hid_t h5s_combine_select(hid_t space1_id, H5S_seloper_t op, hid_t space2_id) { auto space_id = H5Scombine_select(space1_id, op, space2_id); if (space_id == H5I_INVALID_HID) { HDF5ErrMapper::ToException("Unable to combine two selections.");