Skip to content

Commit

Permalink
unroll search
Browse files Browse the repository at this point in the history
  • Loading branch information
DenisYaroshevskiy committed Dec 1, 2024
1 parent 0806466 commit 0c8e8ab
Show file tree
Hide file tree
Showing 5 changed files with 588 additions and 23 deletions.
7 changes: 4 additions & 3 deletions include/eve/module/algo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@
#include <eve/module/algo/algo/copy_if.hpp>
#include <eve/module/algo/algo/equal.hpp>
#include <eve/module/algo/algo/fill.hpp>
#include <eve/module/algo/algo/find.hpp>
#include <eve/module/algo/algo/find_last.hpp>
#include <eve/module/algo/algo/for_each.hpp>
#include <eve/module/algo/algo/for_each_iteration.hpp>
#include <eve/module/algo/algo/find.hpp>
#include <eve/module/algo/algo/for_each_iteration_fixed_overflow.hpp>
#include <eve/module/algo/algo/for_each_iteration_with_expensive_optional_part.hpp>
#include <eve/module/algo/algo/for_each_iteration.hpp>
#include <eve/module/algo/algo/for_each_selected.hpp>
#include <eve/module/algo/algo/for_each.hpp>
#include <eve/module/algo/algo/inclusive_scan.hpp>
#include <eve/module/algo/algo/iota.hpp>
#include <eve/module/algo/algo/iterator_helpers.hpp>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
//==================================================================================================
/*
EVE - Expressive Vector Engine
Copyright : EVE Project Contributors
SPDX-License-Identifier: BSL-1.0
*/
//==================================================================================================
#pragma once

#include <eve/module/algo/algo/concepts.hpp>
#include <eve/module/algo/algo/traits.hpp>
#include <eve/module/core.hpp>

namespace eve::algo
{

enum class continue_break_expensive {
continue_,
break_,
expensive,
};

namespace detail
{
struct for_each_iteration_with_expensive_optional_part_common
{
template <typename I, typename S, typename Delegate>
struct small_steps_lambda
{
I& f;
S& l;
continue_break_expensive& delegate_reply;
Delegate& delegate;

template <int i>
EVE_FORCEINLINE bool operator()(std::integral_constant<int, i>)
{
if (f == l) return true;

delegate_reply = delegate.step(f, eve::ignore_none);
f += iterator_cardinal_v<I>;

return delegate_reply != continue_break_expensive::continue_;
}
};

template<typename Traits, typename I, typename S, typename Delegate>
EVE_FORCEINLINE continue_break_expensive main_loop(Traits,
I &f,
S l,
Delegate &delegate) const {
auto delegate_reply = continue_break_expensive::continue_;
while (true) {
if (eve::detail::for_until_<0, 1, get_unrolling<Traits>()>(
small_steps_lambda<I, S, Delegate>{f, l, delegate_reply, delegate}
)) {
return delegate_reply;
}
}
}
};

template<typename Traits, iterator I, sentinel_for<I> S>
struct for_each_iteration_with_expensive_optional_part_precise_f_l
: for_each_iteration_with_expensive_optional_part_common
{
Traits traits;
I base;
I f;
S l;

for_each_iteration_with_expensive_optional_part_precise_f_l(Traits t, I i, S s)
: traits(t)
, base(i)
, f(i)
, l(s)
{
EVE_ASSERT(((l - f) % iterator_cardinal_v<I> == 0),
" len of the range is no divisible by cardinal "
<< "when `divisible by cardinal is passed`: " << "l - f: " << (l - f)
<< " iterator_cardinal_v<I>: " << iterator_cardinal_v<I>);
}

template<typename Delegate> EVE_FORCEINLINE void operator()(Delegate& delegate)
{
continue_break_expensive action;
while( true )
{
action = this->main_loop(traits, f, l, delegate);
if( action == continue_break_expensive::expensive) {
if( !delegate.expensive_part() ) {
continue;
}
}
return;
}
}
};

template<typename Traits, iterator I, sentinel_for<I> S>
struct for_each_iteration_with_expensive_optional_part_precise_f
: for_each_iteration_with_expensive_optional_part_common
{
Traits traits;
I base;
I f;
S l;

for_each_iteration_with_expensive_optional_part_precise_f(Traits t, I i, S s)
: traits(t)
, base(i)
, f(i)
, l(s)
{}

template<typename Delegate> EVE_FORCEINLINE void operator()(Delegate& delegate)
{
I precise_l = f + (((l - f) / iterator_cardinal_v<I>)*iterator_cardinal_v<I>);

continue_break_expensive action = continue_break_expensive::continue_;

main_loop:
action = this->main_loop(traits, f, precise_l, delegate);
if( action == continue_break_expensive::break_ ) return;
if( action == continue_break_expensive::expensive ) goto expensive_part;

if( precise_l == l ) return;
{
eve::keep_first ignore {l - precise_l};
action = delegate.step(f, ignore);
}

if( action == continue_break_expensive::expensive ) {
// hack to exit after the `expensive_part` without any extra checks.
l = precise_l;
goto expensive_part;
}
return;

expensive_part:
if( delegate.expensive_part() ) return;
goto main_loop;
}
};

template<typename Traits, iterator I, sentinel_for<I> S>
struct for_each_iteration_with_expensive_optional_part_aligning
: for_each_iteration_with_expensive_optional_part_common
{
Traits traits;
I base;
I f;
S l;

for_each_iteration_with_expensive_optional_part_aligning(Traits traits, I f, S l)
: traits(traits)
, base(f.previous_partially_aligned())
, f(f)
, l(l)
{}

template<typename Delegate> EVE_FORCEINLINE void operator()(Delegate& delegate)
{
auto aligned_f = base;
auto aligned_l = (f + (l - f)).previous_partially_aligned();

continue_break_expensive action = continue_break_expensive::continue_;

eve::ignore_first ignore_first {f - aligned_f};

if( aligned_f != aligned_l )
{
action = delegate.step(aligned_f, ignore_first);
ignore_first = eve::ignore_first {0};

if( action == continue_break_expensive::break_ ) return;
if( action == continue_break_expensive::expensive ) goto expensive_part;
aligned_f += iterator_cardinal_v<I>;

main_loop:
// handles aligned_f == aligned_l
action = this->main_loop(traits, aligned_f, aligned_l, delegate);
if( action == continue_break_expensive::break_ ) return;
if( action == continue_break_expensive::expensive ) goto expensive_part;
}

if( aligned_f == l ) { return; }

{
eve::ignore_last ignore_last {aligned_l + iterator_cardinal_v<I> - l};
action = delegate.step(aligned_l, ignore_first && ignore_last);
}
if( action == continue_break_expensive::expensive ) {
l = aligned_l; // hack that pevents comming here after the expensive part
goto expensive_part;
}
return;

expensive_part:
if( delegate.expensive_part() ) return;
goto main_loop;
}
};
}

//================================================================================================
//! @addtogroup algos
//! @{
//! @var for_each_iteration_with_expensive_optional_part
//!
//! @brief low level util for writing algorithms. A variation on for_each_iteration that has a
//! place for work we don't want duplicated in assembly.
//!
//! **Defined in Header**
//!
//! @code
//! #include <eve/module/algo.hpp>
//! @endcode
//!
//! `for_each_iteration`, even if not unrolled, generates a few copies of the
//! callback code. For some algorithms we want to move out a piece of callback code
//! but we still don't want a function call. Think search: we want to move the more
//! expensive part of validating match outside.
//!
//! You can find example usage in the search implementation.
//! @}
//================================================================================================
struct
{
template<typename Traits, iterator I, sentinel_for<I> S>
auto operator()(Traits traits, I f, S l) const
{
EVE_ASSERT(f != l,
"for_each_iteration_with_expensive_optional_part requires a non-empty range");
if constexpr( !Traits::contains(no_aligning) && !partially_aligned_iterator<I> )
{
return detail::for_each_iteration_with_expensive_optional_part_aligning {traits, f, l};
}
else if constexpr( Traits::contains(divisible_by_cardinal) )
{
return detail::for_each_iteration_with_expensive_optional_part_precise_f_l {traits, f, l};
}
else
{
return detail::for_each_iteration_with_expensive_optional_part_precise_f {traits, f, l};
}
}
} inline constexpr for_each_iteration_with_expensive_optional_part;

}
58 changes: 38 additions & 20 deletions include/eve/module/algo/algo/search.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
//==================================================================================================
#pragma once

#include <eve/module/algo/algo/for_each_selected.hpp>
#include <eve/module/algo/algo/for_each_iteration_with_expensive_optional_part.hpp>
#include <eve/module/algo/algo/views/convert.hpp>
#include <eve/module/algo/algo/views/zip.hpp>
#include <eve/module/core.hpp>
Expand All @@ -29,13 +29,20 @@ namespace detail
*/
struct for_each_possibly_matching_for_search_
{
template<typename NeedleWide, typename Equal, typename Verify> struct delegate
template<
typename HaystackI,
typename NeedleWide,
typename Equal,
typename Verify> struct delegate
{
NeedleWide needle_front;
NeedleWide needle_back;
Equal equal_fn;
Verify& verify;
bool was_stopped = false;

bool was_stopped = false;
unaligned_t<HaystackI> pos = {};
decltype(equal_fn(wide_value_type_t<HaystackI>{}, NeedleWide{})) precheck = {};

template<typename I> EVE_FORCEINLINE auto make_verify_adapter(I haystack_it)
{
Expand All @@ -50,39 +57,48 @@ namespace detail
return res_t {verify, unalign(haystack_it)};
}

EVE_FORCEINLINE bool tail(auto zip_it, eve::relative_conditional_expr auto ignore)
EVE_FORCEINLINE auto tail(auto zip_it, eve::relative_conditional_expr auto ignore)
{
auto front_it = get<0>(zip_it);
pos = get<0>(zip_it);

// not loading from `zip_it` here, becasue it's much more expensive for tails.
auto haystack_front = eve::load[ignore](front_it);
eve::logical precheck = equal_fn(haystack_front, needle_front);
auto haystack_front = eve::load[ignore](pos);
precheck = equal_fn(haystack_front, needle_front);

was_stopped = eve::iterate_selected[ignore](precheck, make_verify_adapter(front_it));
return was_stopped;
if (!eve::any[ignore](precheck)) {
return continue_break_expensive::continue_;
}

precheck = precheck && ignore.mask(as(precheck));

return continue_break_expensive::expensive;
}

EVE_FORCEINLINE bool main_part(auto zip_it)
EVE_FORCEINLINE auto main_part(auto zip_it)
{
auto [haystack_front, haystack_back] = eve::load(zip_it);

eve::logical precheck =
equal_fn(haystack_front, needle_front) && equal_fn(haystack_back, needle_back);
was_stopped = eve::iterate_selected(precheck, make_verify_adapter(get<0>(zip_it)));
pos = get<0>(zip_it);
precheck = equal_fn(haystack_front, needle_front) && equal_fn(haystack_back, needle_back);

return was_stopped;
if (!eve::any(precheck)) {
return continue_break_expensive::continue_;
}

return continue_break_expensive::expensive;
}

template<eve::relative_conditional_expr C>
EVE_FORCEINLINE bool step(auto zip_it, C ignore, auto /*idx*/)
EVE_FORCEINLINE auto step(auto zip_it, C ignore)
{
if constexpr( C::is_complete && C::is_inverted ) { return main_part(zip_it); }
else { return tail(zip_it, ignore); }
}

EVE_FORCEINLINE bool unrolled_step(auto arr)
EVE_FORCEINLINE bool expensive_part()
{
return unroll_by_calling_single_step {}(arr, *this);
was_stopped = eve::iterate_selected(precheck, make_verify_adapter(pos));
return was_stopped;
}
};

Expand All @@ -103,9 +119,11 @@ namespace detail
auto haystack_front_back_range =
views::zip(as_range(haystack_f, haystack_l), unalign(haystack_f) + (needle_len - 1));

auto iteration = algo::for_each_iteration(
auto iteration = algo::for_each_iteration_with_expensive_optional_part(
traits, haystack_front_back_range.begin(), haystack_front_back_range.end());
delegate<NeedleWide, Equal, Verify> d {needle_front, needle_back, equal_fn, verify};
delegate<HaystackI, NeedleWide, Equal, Verify> d {
needle_front, needle_back, equal_fn, verify, {}, {},
};
iteration(d);
return d.was_stopped;
}
Expand Down Expand Up @@ -419,6 +437,6 @@ template<typename TraitsSupport> struct search_ : TraitsSupport
//!
//! @godbolt{doc/algo/search.cpp}
//================================================================================================
inline constexpr auto search = function_with_traits<search_>;
inline constexpr auto search = function_with_traits<search_>[eve::algo::unroll<2>];

}
Loading

0 comments on commit 0c8e8ab

Please sign in to comment.