From 0c8e8ab670d69c5c8c210bd96d1aab55a704bea0 Mon Sep 17 00:00:00 2001 From: Denis Yaroshevskiy Date: Mon, 25 Nov 2024 13:50:00 +0000 Subject: [PATCH] unroll search --- include/eve/module/algo.hpp | 7 +- ...iteration_with_expensive_optional_part.hpp | 250 +++++++++++++++ include/eve/module/algo/algo/search.hpp | 58 ++-- ...iteration_with_expensive_optional_part.cpp | 289 ++++++++++++++++++ test/unit/module/algo/iteration_test.hpp | 7 + 5 files changed, 588 insertions(+), 23 deletions(-) create mode 100644 include/eve/module/algo/algo/for_each_iteration_with_expensive_optional_part.hpp create mode 100644 test/unit/module/algo/for_each_iteration_with_expensive_optional_part.cpp diff --git a/include/eve/module/algo.hpp b/include/eve/module/algo.hpp index 0b25b375b8..2ee178ce8d 100644 --- a/include/eve/module/algo.hpp +++ b/include/eve/module/algo.hpp @@ -30,12 +30,13 @@ #include #include #include -#include #include -#include -#include +#include #include +#include +#include #include +#include #include #include #include diff --git a/include/eve/module/algo/algo/for_each_iteration_with_expensive_optional_part.hpp b/include/eve/module/algo/algo/for_each_iteration_with_expensive_optional_part.hpp new file mode 100644 index 0000000000..a6793f948d --- /dev/null +++ b/include/eve/module/algo/algo/for_each_iteration_with_expensive_optional_part.hpp @@ -0,0 +1,250 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include + +namespace eve::algo +{ + +enum class continue_break_expensive { + continue_, + break_, + expensive, +}; + +namespace detail +{ + struct for_each_iteration_with_expensive_optional_part_common + { + template + struct small_steps_lambda + { + I& f; + S& l; + continue_break_expensive& delegate_reply; + Delegate& delegate; + + template + EVE_FORCEINLINE bool operator()(std::integral_constant) + { + if (f == l) return true; + + delegate_reply = delegate.step(f, eve::ignore_none); + f += iterator_cardinal_v; + + return delegate_reply != continue_break_expensive::continue_; + } + }; + + template + EVE_FORCEINLINE continue_break_expensive main_loop(Traits, + I &f, + S l, + Delegate &delegate) const { + auto delegate_reply = continue_break_expensive::continue_; + while (true) { + if (eve::detail::for_until_<0, 1, get_unrolling()>( + small_steps_lambda{f, l, delegate_reply, delegate} + )) { + return delegate_reply; + } + } + } + }; + + template S> + struct for_each_iteration_with_expensive_optional_part_precise_f_l + : for_each_iteration_with_expensive_optional_part_common + { + Traits traits; + I base; + I f; + S l; + + for_each_iteration_with_expensive_optional_part_precise_f_l(Traits t, I i, S s) + : traits(t) + , base(i) + , f(i) + , l(s) + { + EVE_ASSERT(((l - f) % iterator_cardinal_v == 0), + " len of the range is no divisible by cardinal " + << "when `divisible by cardinal is passed`: " << "l - f: " << (l - f) + << " iterator_cardinal_v: " << iterator_cardinal_v); + } + + template EVE_FORCEINLINE void operator()(Delegate& delegate) + { + continue_break_expensive action; + while( true ) + { + action = this->main_loop(traits, f, l, delegate); + if( action == continue_break_expensive::expensive) { + if( !delegate.expensive_part() ) { + continue; + } + } + return; + } + } + }; + + template S> + struct for_each_iteration_with_expensive_optional_part_precise_f + : for_each_iteration_with_expensive_optional_part_common + { + Traits traits; + I base; + I f; + S l; + + for_each_iteration_with_expensive_optional_part_precise_f(Traits t, I i, S s) + : traits(t) + , base(i) + , f(i) + , l(s) + {} + + template EVE_FORCEINLINE void operator()(Delegate& delegate) + { + I precise_l = f + (((l - f) / iterator_cardinal_v)*iterator_cardinal_v); + + continue_break_expensive action = continue_break_expensive::continue_; + + main_loop: + action = this->main_loop(traits, f, precise_l, delegate); + if( action == continue_break_expensive::break_ ) return; + if( action == continue_break_expensive::expensive ) goto expensive_part; + + if( precise_l == l ) return; + { + eve::keep_first ignore {l - precise_l}; + action = delegate.step(f, ignore); + } + + if( action == continue_break_expensive::expensive ) { + // hack to exit after the `expensive_part` without any extra checks. + l = precise_l; + goto expensive_part; + } + return; + + expensive_part: + if( delegate.expensive_part() ) return; + goto main_loop; + } + }; + + template S> + struct for_each_iteration_with_expensive_optional_part_aligning + : for_each_iteration_with_expensive_optional_part_common + { + Traits traits; + I base; + I f; + S l; + + for_each_iteration_with_expensive_optional_part_aligning(Traits traits, I f, S l) + : traits(traits) + , base(f.previous_partially_aligned()) + , f(f) + , l(l) + {} + + template EVE_FORCEINLINE void operator()(Delegate& delegate) + { + auto aligned_f = base; + auto aligned_l = (f + (l - f)).previous_partially_aligned(); + + continue_break_expensive action = continue_break_expensive::continue_; + + eve::ignore_first ignore_first {f - aligned_f}; + + if( aligned_f != aligned_l ) + { + action = delegate.step(aligned_f, ignore_first); + ignore_first = eve::ignore_first {0}; + + if( action == continue_break_expensive::break_ ) return; + if( action == continue_break_expensive::expensive ) goto expensive_part; + aligned_f += iterator_cardinal_v; + + main_loop: + // handles aligned_f == aligned_l + action = this->main_loop(traits, aligned_f, aligned_l, delegate); + if( action == continue_break_expensive::break_ ) return; + if( action == continue_break_expensive::expensive ) goto expensive_part; + } + + if( aligned_f == l ) { return; } + + { + eve::ignore_last ignore_last {aligned_l + iterator_cardinal_v - l}; + action = delegate.step(aligned_l, ignore_first && ignore_last); + } + if( action == continue_break_expensive::expensive ) { + l = aligned_l; // hack that pevents comming here after the expensive part + goto expensive_part; + } + return; + + expensive_part: + if( delegate.expensive_part() ) return; + goto main_loop; + } + }; +} + +//================================================================================================ +//! @addtogroup algos +//! @{ +//! @var for_each_iteration_with_expensive_optional_part +//! +//! @brief low level util for writing algorithms. A variation on for_each_iteration that has a +//! place for work we don't want duplicated in assembly. +//! +//! **Defined in Header** +//! +//! @code +//! #include +//! @endcode +//! +//! `for_each_iteration`, even if not unrolled, generates a few copies of the +//! callback code. For some algorithms we want to move out a piece of callback code +//! but we still don't want a function call. Think search: we want to move the more +//! expensive part of validating match outside. +//! +//! You can find example usage in the search implementation. +//! @} +//================================================================================================ +struct +{ + template S> + auto operator()(Traits traits, I f, S l) const + { + EVE_ASSERT(f != l, + "for_each_iteration_with_expensive_optional_part requires a non-empty range"); + if constexpr( !Traits::contains(no_aligning) && !partially_aligned_iterator ) + { + return detail::for_each_iteration_with_expensive_optional_part_aligning {traits, f, l}; + } + else if constexpr( Traits::contains(divisible_by_cardinal) ) + { + return detail::for_each_iteration_with_expensive_optional_part_precise_f_l {traits, f, l}; + } + else + { + return detail::for_each_iteration_with_expensive_optional_part_precise_f {traits, f, l}; + } + } +} inline constexpr for_each_iteration_with_expensive_optional_part; + +} diff --git a/include/eve/module/algo/algo/search.hpp b/include/eve/module/algo/algo/search.hpp index 5e0fbaa1ea..e4593c6294 100644 --- a/include/eve/module/algo/algo/search.hpp +++ b/include/eve/module/algo/algo/search.hpp @@ -7,7 +7,7 @@ //================================================================================================== #pragma once -#include +#include #include #include #include @@ -29,13 +29,20 @@ namespace detail */ struct for_each_possibly_matching_for_search_ { - template struct delegate + template< + typename HaystackI, + typename NeedleWide, + typename Equal, + typename Verify> struct delegate { NeedleWide needle_front; NeedleWide needle_back; Equal equal_fn; Verify& verify; - bool was_stopped = false; + + bool was_stopped = false; + unaligned_t pos = {}; + decltype(equal_fn(wide_value_type_t{}, NeedleWide{})) precheck = {}; template EVE_FORCEINLINE auto make_verify_adapter(I haystack_it) { @@ -50,39 +57,48 @@ namespace detail return res_t {verify, unalign(haystack_it)}; } - EVE_FORCEINLINE bool tail(auto zip_it, eve::relative_conditional_expr auto ignore) + EVE_FORCEINLINE auto tail(auto zip_it, eve::relative_conditional_expr auto ignore) { - auto front_it = get<0>(zip_it); + pos = get<0>(zip_it); // not loading from `zip_it` here, becasue it's much more expensive for tails. - auto haystack_front = eve::load[ignore](front_it); - eve::logical precheck = equal_fn(haystack_front, needle_front); + auto haystack_front = eve::load[ignore](pos); + precheck = equal_fn(haystack_front, needle_front); - was_stopped = eve::iterate_selected[ignore](precheck, make_verify_adapter(front_it)); - return was_stopped; + if (!eve::any[ignore](precheck)) { + return continue_break_expensive::continue_; + } + + precheck = precheck && ignore.mask(as(precheck)); + + return continue_break_expensive::expensive; } - EVE_FORCEINLINE bool main_part(auto zip_it) + EVE_FORCEINLINE auto main_part(auto zip_it) { auto [haystack_front, haystack_back] = eve::load(zip_it); - eve::logical precheck = - equal_fn(haystack_front, needle_front) && equal_fn(haystack_back, needle_back); - was_stopped = eve::iterate_selected(precheck, make_verify_adapter(get<0>(zip_it))); + pos = get<0>(zip_it); + precheck = equal_fn(haystack_front, needle_front) && equal_fn(haystack_back, needle_back); - return was_stopped; + if (!eve::any(precheck)) { + return continue_break_expensive::continue_; + } + + return continue_break_expensive::expensive; } template - EVE_FORCEINLINE bool step(auto zip_it, C ignore, auto /*idx*/) + EVE_FORCEINLINE auto step(auto zip_it, C ignore) { if constexpr( C::is_complete && C::is_inverted ) { return main_part(zip_it); } else { return tail(zip_it, ignore); } } - EVE_FORCEINLINE bool unrolled_step(auto arr) + EVE_FORCEINLINE bool expensive_part() { - return unroll_by_calling_single_step {}(arr, *this); + was_stopped = eve::iterate_selected(precheck, make_verify_adapter(pos)); + return was_stopped; } }; @@ -103,9 +119,11 @@ namespace detail auto haystack_front_back_range = views::zip(as_range(haystack_f, haystack_l), unalign(haystack_f) + (needle_len - 1)); - auto iteration = algo::for_each_iteration( + auto iteration = algo::for_each_iteration_with_expensive_optional_part( traits, haystack_front_back_range.begin(), haystack_front_back_range.end()); - delegate d {needle_front, needle_back, equal_fn, verify}; + delegate d { + needle_front, needle_back, equal_fn, verify, {}, {}, + }; iteration(d); return d.was_stopped; } @@ -419,6 +437,6 @@ template struct search_ : TraitsSupport //! //! @godbolt{doc/algo/search.cpp} //================================================================================================ -inline constexpr auto search = function_with_traits; +inline constexpr auto search = function_with_traits[eve::algo::unroll<2>]; } diff --git a/test/unit/module/algo/for_each_iteration_with_expensive_optional_part.cpp b/test/unit/module/algo/for_each_iteration_with_expensive_optional_part.cpp new file mode 100644 index 0000000000..5bedd67f4e --- /dev/null +++ b/test/unit/module/algo/for_each_iteration_with_expensive_optional_part.cpp @@ -0,0 +1,289 @@ +//================================================================================================== +/** + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +**/ +//================================================================================================== + +#include "unit/module/algo/algo_test.hpp" + +#include + +#include +#include + +namespace { + +struct fixture +{ + fixture() + { + data.fill(0); + data[0] = data[1] = data[2] = data[3] = '_'; + } + + auto aligned_begin() + { + using ap = eve::aligned_ptr>; + return eve::algo::ptr_iterator> {ap(data.begin())}; + } + + auto aligned_end() { return aligned_begin() + data.size(); } + + auto unaligned_begin() { return eve::unalign(aligned_begin()); } + auto unaligned_end() { return eve::unalign(aligned_end()); } + + std::string_view res() { return data.data(); } + + alignas(64) std::array data; +}; + +struct test_delegate { + char* data; + std::vector where_to_expensive; + std::ptrdiff_t expensive_returns_true_at; + std::ptrdiff_t stop_at = -1; + + std::ptrdiff_t where_to_expensive_pos = 0; + char* remembered_expesnive = nullptr; + + test_delegate( + char* data, + std::vector where_to_expensive, + std::ptrdiff_t expensive_returns_true_at = -1, + std::ptrdiff_t stop_at = -1 + ) : data(data), + where_to_expensive(where_to_expensive), + expensive_returns_true_at(expensive_returns_true_at), + stop_at(stop_at) {} + + eve::algo::continue_break_expensive step(auto it, auto ignore) { + + auto tgt = eve::as>> {}; + char *ptr = it.ptr; + + std::ptrdiff_t it_idx = it.ptr - data; + std::cerr << "step: it idx: " << it_idx << " ignore: " << ignore << std::endl; + + for( std::ptrdiff_t i = ignore.offset(tgt); i; --i ) { *ptr++ = 'i'; } + for( std::ptrdiff_t i = ignore.count(tgt); i; --i ) { + *ptr++ = 'a'; + } + for( std::ptrdiff_t i = ignore.roffset(tgt); i; --i ) { *ptr++ = 'i'; } + + if (where_to_expensive_pos < std::ssize(where_to_expensive)) { + auto next_expensive = where_to_expensive[where_to_expensive_pos]; + + if ( it_idx <= next_expensive && next_expensive < it_idx + 4 ) { + remembered_expesnive = data + next_expensive; + return eve::algo::continue_break_expensive::expensive; + } + } + + if ( stop_at != -1 && stop_at < it_idx + 4) { + return eve::algo::continue_break_expensive::break_; + } + return eve::algo::continue_break_expensive::continue_; + } + + bool expensive_part() { + *remembered_expesnive = 'e'; + if (remembered_expesnive - data == expensive_returns_true_at) { + return true; + } + return false; + } +}; + +template +struct run_test_impl { + int offset; + int size; + std::vector where_to_expensive; + int expensive_returns_true_at; + int stop_at; + + + template + std::string run_impl( + auto& fix, + auto f, + auto l + ) { + test_delegate d { + fix.data.data(), + where_to_expensive, + expensive_returns_true_at, + stop_at + }; + + auto iter = eve::algo::for_each_iteration_with_expensive_optional_part( + []{ + if constexpr (align && !divisible) { + return eve::algo::traits{eve::algo::unroll}; + } else if constexpr (align && divisible) { + return eve::algo::traits{eve::algo::divisible_by_cardinal, eve::algo::unroll}; + } else if constexpr (!align && !divisible) { + return eve::algo::traits{eve::algo::no_aligning, eve::algo::unroll}; + } else if constexpr (!align && divisible) { + return eve::algo::traits{ + eve::algo::no_aligning, eve::algo::divisible_by_cardinal, eve::algo::unroll + }; + } + }(), + f, + l + ); + iter(d); + return std::string(fix.res()); + } + + std::string operator()() { + std::string res; + { + fixture fix; + auto f = fix.unaligned_begin() + offset; + auto l = f + size; + res = run_impl(fix, f, l); + } + + // just aligned + if (offset % 4 == 0) { + std::string res1; + fixture fix; + auto f = fix.aligned_begin() + offset; + auto l = eve::unalign(f) + size; + res1 = run_impl(fix, f, l); + TTS_EQUAL(res, res1); + } + + // just divisible + if (size % 4 == 0) { + std::string res1; + fixture fix; + auto f = fix.unaligned_begin() + offset; + auto l = eve::unalign(f) + size; + res1 = run_impl(fix, f, l); + TTS_EQUAL(res, res1); + } + + // 1. align/unaling divisible + if (offset % 4 == 0 && size % 4 == 0) { + std::string res1; + fixture fix; + auto f = fix.aligned_begin() + offset; + auto l = eve::unalign(f) + size; + res1 = run_impl(fix, f, l); + TTS_EQUAL(res, res1); + } + + // 2. aligned both ends + if (offset % 4 == 0 && size % 4 == 0) { + std::string res1; + fixture fix; + auto f = fix.aligned_begin() + offset; + auto l = f + size; + res1 = run_impl(fix, f, l); + TTS_EQUAL(res, res1); + } + + return res; + } +}; + +template +std::string +run_test( + int offset, + int size, + std::vector where_to_expensive = {}, + int expensive_returns_true_at = -1, + int stop_at = -1 +) { + std::string unroll1 = run_test_impl{ + offset, + size, + where_to_expensive, + expensive_returns_true_at, + stop_at + }(); + + std::string unroll2 = run_test_impl{ + offset, + size, + where_to_expensive, + expensive_returns_true_at, + stop_at + }(); + + std::string unroll4 = run_test_impl{ + offset, + size, + where_to_expensive, + expensive_returns_true_at, + stop_at + }(); + + TTS_EQUAL(unroll1, unroll2); + TTS_EQUAL(unroll1, unroll4); + return unroll1; +} + +} // namespace + +TTS_CASE("eve.algo.for_each_iteration_with_expensive_optional_part, aligning, no matches") { + TTS_EQUAL(run_test(0, 1), "aiii"); + TTS_EQUAL(run_test(0, 2), "aaii"); + TTS_EQUAL(run_test(1, 1), "iaii"); + TTS_EQUAL(run_test(1, 2), "iaai"); + TTS_EQUAL(run_test(1, 3), "iaaa"); + + TTS_EQUAL(run_test(0, 8), + "aaaa" + "aaaa"); + + TTS_EQUAL(run_test(1, 14), + "iaaa" + "aaaa" + "aaaa" + "aaai"); +}; + +TTS_CASE("eve.algo.for_each_iteration_with_expensive_optional_part, no aligning, no matches") { + TTS_EQUAL(run_test(0, 1), "aiii"); + TTS_EQUAL(run_test(0, 2), "aaii"); + TTS_EQUAL(run_test(1, 1), "_aiii"); + TTS_EQUAL(run_test(1, 2), "_aaii"); + TTS_EQUAL(run_test(1, 3), "_aaai"); + + TTS_EQUAL(run_test(0, 8), + "aaaa" + "aaaa"); + + TTS_EQUAL(run_test(1, 8), + "_" + "aaaa" + "aaaa"); + + TTS_EQUAL(run_test(1, 14), + "_" + "aaaa" + "aaaa" + "aaaa" + "aaii"); +}; + +TTS_CASE("eve.algo.for_each_iteration_with_expensive_optional_part, aligning, some expensive") { + TTS_EQUAL(run_test( + 0, 1, + /*where to expensive*/ {0} + ), + "eiii"); + TTS_EQUAL(run_test( + 0, 4, + /*where to expensive*/ {0, 1, 2, 3} + ), + "eaaa"); + +}; diff --git a/test/unit/module/algo/iteration_test.hpp b/test/unit/module/algo/iteration_test.hpp index 310eb964e3..088ae175b6 100644 --- a/test/unit/module/algo/iteration_test.hpp +++ b/test/unit/module/algo/iteration_test.hpp @@ -1,3 +1,10 @@ +//================================================================================================== +/** + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +**/ +//================================================================================================== #include "unit/module/algo/algo_test.hpp"