Skip to content

Commit

Permalink
compress copy, docs (#1653)
Browse files Browse the repository at this point in the history
  • Loading branch information
DenisYaroshevskiy authored Sep 1, 2023
1 parent 40be733 commit c7f4009
Show file tree
Hide file tree
Showing 17 changed files with 736 additions and 31 deletions.
1 change: 1 addition & 0 deletions include/eve/arch/arm/sve/top_bits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ requires(current_api >= sve && !has_aggregated_abi_v<Logical>) struct top_bits<L
}

// getters/setter ----------------------
static constexpr std::ptrdiff_t size() { return static_size; }

EVE_FORCEINLINE constexpr void set(std::ptrdiff_t i, bool x) { storage.set(i, x); }
EVE_FORCEINLINE constexpr bool get(std::ptrdiff_t i) const { return storage.get(i); }
Expand Down
2 changes: 2 additions & 0 deletions include/eve/arch/cpu/top_bits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ namespace detail

// getters/setter ----------------------

static constexpr std::ptrdiff_t size() { return static_size; }

//! setter
EVE_FORCEINLINE constexpr void set(std::ptrdiff_t i, bool x)
{
Expand Down
2 changes: 2 additions & 0 deletions include/eve/arch/x86/top_bits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ requires(current_api >= avx512 && !has_aggregated_abi_v<Logical>) struct top_bit

// getters/setter ----------------------

static constexpr std::ptrdiff_t size() { return static_size; }

EVE_FORCEINLINE constexpr void set(std::ptrdiff_t i, bool x) { storage.set(i, x); }
EVE_FORCEINLINE constexpr bool get(std::ptrdiff_t i) const { return storage.get(i); }

Expand Down
16 changes: 8 additions & 8 deletions include/eve/conditional.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ namespace eve

template<typename T> EVE_FORCEINLINE constexpr std::ptrdiff_t roffset(eve::as<T> const&) const
{
return cardinal_v<T>;
return T::size();
}

//! Number of lanes to be left unmasked
Expand Down Expand Up @@ -223,7 +223,7 @@ namespace eve
//! Number of lanes to be left unmasked
template<typename T> EVE_FORCEINLINE constexpr auto count(eve::as<T> const&) const
{
return cardinal_v<T>;
return T::size();
}

//! Checks equality between two eve::ignore_none_ instances
Expand Down Expand Up @@ -284,7 +284,7 @@ namespace eve

template<typename T> EVE_FORCEINLINE constexpr std::ptrdiff_t roffset(eve::as<T> const&) const
{
return cardinal_v<T> - count_;
return T::size() - count_;
}

//! Number of lanes to be left unmasked
Expand Down Expand Up @@ -347,7 +347,7 @@ namespace eve
//! Number of lanes to be left unmasked
template<typename T> EVE_FORCEINLINE constexpr auto count(eve::as<T> const&) const
{
return cardinal_v<T> - count_;
return T::size() - count_;
}

std::ptrdiff_t count_;
Expand Down Expand Up @@ -392,7 +392,7 @@ namespace eve

template<typename T> EVE_FORCEINLINE constexpr std::ptrdiff_t offset(eve::as<T> const&) const
{
return cardinal_v<T> - count_;
return T::size() - count_;
}

template<typename T> EVE_FORCEINLINE constexpr std::ptrdiff_t roffset(eve::as<T> const&) const
Expand Down Expand Up @@ -451,7 +451,7 @@ namespace eve
//! Number of lanes to be left unmasked
template<typename T> EVE_FORCEINLINE constexpr auto count(eve::as<T> const&) const
{
return cardinal_v<T> - count_;
return T::size() - count_;
}

//! Checks equality between two eve::ignore_first instances
Expand Down Expand Up @@ -505,7 +505,7 @@ namespace eve

template<typename T> EVE_FORCEINLINE constexpr std::ptrdiff_t roffset(eve::as<T> const&) const
{
return cardinal_v<T> - end_;
return T::size() - end_;
}

//! Number of lanes to be left unmasked
Expand Down Expand Up @@ -569,7 +569,7 @@ namespace eve
//! Number of lanes to be left unmasked
template<typename T> EVE_FORCEINLINE constexpr auto count(eve::as<T> const&) const
{
return cardinal_v<T> - last_count_ - first_count_;
return T::size() - last_count_ - first_count_;
}

//! Checks equality between two eve::ignore_extrema instances
Expand Down
12 changes: 6 additions & 6 deletions include/eve/module/core/compress/compress.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ namespace eve
//! @endcode
//!
//! @note this is very low level function, most likely you are looking for
//! `eve::compress_copy` or `eve::compress_store`.
//! eve::compress_copy or eve::compress_store.
//!
//! @note FIX-1647: `eve::compress` doesn't support `wide<tuple>` yet.
//!
//! @note the mask type can be any logical with the same cardinal.
//! @note
//! * FIX-1647: eve::compress doesn't support `wide<tuple>` yet.
//! * the mask type can be any logical with the same cardinal.
//!
//! Compression in simd is moving selected elements to the front of the simd_value.
//! Unfortunately, not for all `simd_value`, not for all plaftorms that can be done
Expand Down Expand Up @@ -70,9 +70,9 @@ namespace eve
//!
//! **Parameters**
//!
//! * x - `simd_value` to compress
//! * x - simd_value to compress
//! * m - mask which markes selected elements as true
//! * ignore - optional `eve::relative_conditional_expr`, passed in `[]`.
//! * ignore - optional eve::relative_conditional_expr, passed in `[]`.
//! Ignored elements are treated as not selected.
//!
//! **Return value**
Expand Down
142 changes: 142 additions & 0 deletions include/eve/module/core/compress/compress_copy.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
//==================================================================================================
/*
EVE - Expressive Vector Engine
Copyright : EVE Project Contributors
SPDX-License-Identifier: BSL-1.0
*/
//==================================================================================================
#pragma once

namespace eve
{

//================================================================================================
//! @addtogroup core_compress
//! @{
//! @var compress_copy
//! @brief A function that copies selected elements from source to destination,
//! while compressing them to the left.
//!
//! **Defined in Header**
//!
//! @code
//! #include <eve/module/core.hpp>
//! @endcode
//!
//! If this function doesn't work for you, maybe you are looking for eve::comress_store or
//! eve::compress. However this function is faster.
//!
//! You can think about this function as `std::copy_if` but instead of a predicate,
//! you pass in logical_simd_value. Similar to `std::copy_if` it returns you
//! a pointers to where the output ended.
//! @note: you might be missing information about the last selected element written,
//! but unfortunately that adds overhead we couldn't fix (#1656)
//!
//! There are the following two modifiers:
//! * safe/unsafe - unsafe version is allowed to write up to `mask.size()` elements,
//! even if not all are selected. Those values are undefined.
//! safe is not allowed to perform those writes, at the price of
//! being slower for certain usecases.
//! * dense/sparse - wether or not you expect a lot of selected elements.
//!
//! @note `safe` version does not touch not selected elements. So, for example,
//! other threads can read/write them without a race condition.
//!
//! ## Preloaded values
//!
//! Very often the mask is computed based on the values loaded from input.
//! We would expect the optimizer to eliminate duplicated loads,
//! but for some very complex pointer-like it might not be able to.
//!
//! So we provide overloads where you can pass an already preloaded value. It should
//! match loaded value from in, otherwise the behaviour is unspecified.
//!
//! ## Masked Calls
//!
//! You can pass up to two eve::relative_conditional_expr ignore modifiers.
//! 1st is the input side ignore:
//! * the ignored elements can are not loaded (same as load[ignore])
//! * they are treated as not selected, regardless of the mask value
//! 2nd is the output side ignore:
//! * elements that are ignored, will not be written. Example:
//! if the ignore_first(1) is passed, the first selected element
//! will not appear anywhere in the output and the 2nd selected
//! element will be written in (out + 1)
//! Defaults to 1st ignore.
//!
//! If the mask == true this and it's `unsafe` variation, this is the same behaviour as
//!
//! @code
//! // start with + offset
//! in += ignore_in.offset(eve::as(m));
//! out += ignore_out.offset(eve::as(m));
//!
//! eve::keep_first ignore_in1(ignore_in.count(eve::as(m)));
//! eve::keep_first ignore_out1(ignore_out.count(eve::as(m)));
//!
//! // load + store
//! auto x = eve::load[ignore_in1](in);
//! eve::store[ignore_out1](x, out);
//! @endcode
//!
//! For safe behaviour, we'd also have to make sure not to write not selected elements.
//!
//! @groupheader{Callable Signatures}
//!
//! @note - no proper concept for input and output, these are pointer like things,
//! but `eve::algo::iterator` also work here. FIX-1652
//!
//! @code
//! namespace eve
//! {
//!
//! template <relative_conditional_expr C1,
//! relative_conditional_expr C2,
//! typename I,
//! logical_simd_value L,
//! typename O>
//! autoo compress_copy
//! [safe/unsafe][sparse/dense]
//! [C1 ignore_in][C2 ignore_out](
//! I in,
//! L mask,
//! O out) -> unaligned_t<O>; // (1)
//!
//! template <relative_conditional_expr C1,
//! relative_conditional_expr C2,
//! typename I,
//! logical_simd_value L,
//! typename O>
//! autoo compress_copy
//! [safe/unsafe][sparse/dense]
//! [C1 ignore_in][C2 ignore_out](
//! I in,
//! wide<value_type_t<I>, fixed<L::size()>> preloaded,
//! L mask,
//! O out) -> unaligned_t<O>; // (2)
//! }
//! @endcode
//!
//! **Parameters**
//!
//! * `safe/unsafe` - required - variations described above.
//! * `sparse/dense` - optional - default to `dense`. described above.
//! * `ignore_in` - optional (default - ignore_none) - ignored elements are treated as not selected.
//! * `ignore_out` - optional (default - ignore_in) - ignored elements are not written to in the output.
//! * `in`: input ptr-like from which to copy
//! * `m`: mask indicating selected elements
//! * `out`: output to will write elements
//!
//! **Return value**
//!
//! * unaligned_t<O> where result - out == number of elements written
//!
//! @groupheader{Example}
//!
//! @godbolt{doc/core/compress/compress_copy.cpp}
//! @}
//================================================================================================

}

#include <eve/module/core/compress/simd/common/compress_copy.hpp>
36 changes: 19 additions & 17 deletions include/eve/module/core/compress/compress_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,27 @@ namespace eve
//! @addtogroup core_compress
//! @{
//! @var compress_store
//! @brief A function that stores selected elements from an `eve::simd_value`
//! to an `evesimd_compatible_ptr`, while compressing them to the beginning.
//! @brief A function that stores selected elements from an eve::simd_value
//! to an eve::simd_compatible_ptr, while compressing them to the beginning.
//!
//! **Defined in Header**
//!
//! @code
//! #include <eve/module/core.hpp>
//! @endcode
//!
//! @warning you should use `eve::compress_copy` if possible, it has more opportunities
//! @warning you should use eve::compress_copy if possible, it has more opportunities
//! for optimizations.
//!
//! You can think about this function as `copy_if` for `simd_value`.
//! Similar to `copy_if`, it returns you a `ptr` after the last written element.
//! If this function is not what you are looking for, maybe eve::compress will be help.
//!
//! @note many non const `eve::algo::relaxed_iterator` are `simd_compatible_ptr` and
//! can be used with `compress_store`
//! You can think about this function as `std::copy_if` for `simd_value`.
//! Similar to `std::copy_if`, it returns you a `ptr` after the last written element.
//!
//! There are 2 versions: `unsafe` and `safe`:
//! * `unsafe` is allowed to write up to `simd_value::size()` elements
//! regardless of how many elements are selected. In other words it
//! is `eve::compress` + `eve::store`.
//! is eve::compress + eve::store.
//! * `safe` version will write exactly how many elements are selected.
//! This makes it slow on most platforms.
//!
Expand All @@ -46,29 +45,32 @@ namespace eve
//!
//! ## Masked Calls
//!
//! You can pass `eve::relative_conditional_expr` ignore modifier to indicate that
//! You can pass eve::relative_conditional_expr ignore modifier to indicate that
//! some elements should not be considered.
//!
//! Passing `ignore` other than `eve::ignore_none` to `unsafe(compress_store)`
//! Passing `ignore` other than eve::ignore_none to `unsafe(compress_store)`
//! converts it into `safe`.
//!
//! Ignored elements are treated as not selected when compressing.
//! We start writing from `c.offset()`. This is the same behaviour as `eve::store`.
//! We start writing from `c.offset()`. This is the same behaviour as eve::store.
//!
//! @code
//! ignore_first(1), [a, b, c, d], (true, true, false, true)
//! ouput: [_, b, d, _] - here _ indicates previous value that was not modified.
//! @endcode
//!
//!
//! @note passing `ignore` other that `ignore_none` to `unsafe(compress_store)`
//! @note passing `ignore` other that eve::ignore_none to `unsafe(compress_store)`
//! is making it `safe`.
//! As soon as we start not writing some elements, it doesn't cost us extra.
//! Plus it simplifies writing code for ranges with unequal length.
//!
//! Ignored elements are treated as not selected when compressing.
//! We start writing from `c.offset()`.
//!
//! @code
//! ignore_first(1), [a, b, c, d], (true, true, false, true)
//! ouput: [_, b, d, _] - here _ indicates previous value that was not modified.
//! @endcode
//!
//! Another explanation:
//! `unsafe(compress_store[ignore])(x, x != 0)` behaves exactly like `store[ignore]`
Expand All @@ -80,22 +82,22 @@ namespace eve
//! namespace eve
//! {
//! template <simd_value T, logical_simd_value L, simd_compatible_ptr<T> Ptr>
//! unalign_t<Ptr> unsafe(compress_store)(T x, L m, Ptr ptr) // (1)
//! unaligned_t<Ptr> unsafe(compress_store)(T x, L m, Ptr ptr) // (1)
//!
//! template <relative_conditional_expr C,
//! simd_value T,
//! logical_simd_value L,
//! simd_compatible_ptr<T> Ptr>
//! unalign_t<Ptr> unsafe(compress_store[C ignore])(T x, L m, Ptr ptr) // (2)
//! unaligned_t<Ptr> unsafe(compress_store[C ignore])(T x, L m, Ptr ptr) // (2)
//!
//! template <simd_value T, logical_simd_value L, simd_compatible_ptr<T> Ptr>
//! unalign_t<Ptr> safe(compress_store)(T x, L m, Ptr ptr) // (3)
//! unaligned_t<Ptr> safe(compress_store)(T x, L m, Ptr ptr) // (3)
//!
//! template <relative_conditional_expr C,
//! simd_value T,
//! logical_simd_value L,
//! simd_compatible_ptr<T> Ptr>
//! unalign_t<Ptr> safe(compress_store[C ignore])(T x, L m, Ptr ptr) // (4)
//! unaligned_t<Ptr> safe(compress_store[C ignore])(T x, L m, Ptr ptr) // (4)
//! }
//! @endcode
//!
Expand Down
1 change: 1 addition & 0 deletions include/eve/module/core/compress/core.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@
#pragma once

#include <eve/module/core/compress/compress.hpp>
#include <eve/module/core/compress/compress_copy.hpp>
#include <eve/module/core/compress/compress_store.hpp>
Loading

0 comments on commit c7f4009

Please sign in to comment.