diff --git a/CMakeLists.txt b/CMakeLists.txt index e03743ab..58929c8b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON) # turn on folder hierarchies include (cmake/Flags.cmake) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) if(EXISTS ${CMAKE_BINARY_DIR}/conanbuildinfo.cmake) include(${CMAKE_BINARY_DIR}/conanbuildinfo.cmake) diff --git a/conanfile.py b/conanfile.py index 321ea4cb..74b61bc2 100644 --- a/conanfile.py +++ b/conanfile.py @@ -8,7 +8,7 @@ class SISLConan(ConanFile): name = "sisl" - version = "10.2.1" + version = "10.2.2" homepage = "https://github.com/eBay/sisl" description = "Library for fast data structures, utilities" diff --git a/include/sisl/fds/bitword.hpp b/include/sisl/fds/bitword.hpp index d65d021b..69c910eb 100644 --- a/include/sisl/fds/bitword.hpp +++ b/include/sisl/fds/bitword.hpp @@ -398,6 +398,16 @@ class Bitword { } } + bool get_prev_set_bit(uint8_t start, uint8_t* p_set_bit) const { + const word_t e{extract(0, start + 1)}; + if (e) { + *p_set_bit = logBase2(e); + return true; + } else { + return false; + } + } + uint8_t get_next_reset_bits(const uint8_t start, uint8_t* const pcount) const { assert(start < bits()); assert(pcount); @@ -536,10 +546,9 @@ class Bitword { std::string to_string() const { std::ostringstream oSS{}; - const word_t e{m_bits.get()}; - word_t mask{static_cast< word_t >(bit_mask[bits() - 1])}; - for (uint8_t bit{0}; bit < bits(); ++bit, mask >>= 1) { - oSS << (((e & mask) == mask) ? '1' : '0'); + const word_t e = m_bits.get(); + for (uint8_t bit{0}; bit < bits(); ++bit) { + oSS << (((e & bit_mask[bit]) == bit_mask[bit]) ? '1' : '0'); } return oSS.str(); } diff --git a/include/sisl/fds/buffer.hpp b/include/sisl/fds/buffer.hpp index 972eccb1..505fc603 100644 --- a/include/sisl/fds/buffer.hpp +++ b/include/sisl/fds/buffer.hpp @@ -37,7 +37,7 @@ struct blob { uint32_t size; blob() : blob{nullptr, 0} {} - blob(uint8_t* const b, const uint32_t s) : bytes{b}, size{s} {} + blob(uint8_t* b, uint32_t s) : bytes{b}, size{s} {} }; using sg_iovs_t = folly::small_vector< iovec, 4 >; diff --git a/include/sisl/fds/compact_bitset.hpp b/include/sisl/fds/compact_bitset.hpp new file mode 100644 index 00000000..53c9df02 --- /dev/null +++ b/include/sisl/fds/compact_bitset.hpp @@ -0,0 +1,175 @@ +/********************************************************************************* + * Modifications Copyright 2017-2019 eBay Inc. + * + * Author/Developer(s): Harihara Kadayam + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + *********************************************************************************/ +#include +#include +#include +#include + +namespace sisl { +class CompactBitSet { +public: + using bit_count_t = uint32_t; + +private: + using bitword_type = Bitword< unsafe_bits< uint64_t > >; + + struct serialized { + bitword_type words[1]{bitword_type{}}; + }; + + bit_count_t nbits_{0}; + bool allocated_{false}; + serialized* s_{nullptr}; + +private: + static constexpr size_t word_size_bytes() { return sizeof(unsafe_bits< uint64_t >); } + static constexpr size_t word_size_bits() { return word_size_bytes() * 8; } + static constexpr uint64_t word_mask() { return bitword_type::bits() - 1; } + +public: + static constexpr bit_count_t inval_bit = std::numeric_limits< bit_count_t >::max(); + static constexpr uint8_t size_multiples() { return word_size_bytes(); } + + explicit CompactBitSet(bit_count_t nbits) { + DEBUG_ASSERT_GT(nbits, 0, "compact bitset should have nbits > 0"); + nbits_ = s_cast< bit_count_t >(sisl::round_up(nbits, word_size_bits())); + size_t const buf_size = nbits_ / 8; + + uint8_t* buf = new uint8_t[buf_size]; + std::memset(buf, 0, buf_size); + s_ = r_cast< serialized* >(buf); + allocated_ = true; + } + + CompactBitSet(sisl::blob const& buf, bool init_bits) : s_{r_cast< serialized* >(buf.bytes)} { + DEBUG_ASSERT_GT(buf.size, 0, "compact bitset initialized with empty buffer"); + DEBUG_ASSERT_EQ(buf.size % word_size_bytes(), 0, "compact bitset buffer size must be multiple of word size"); + nbits_ = buf.size * 8; + if (init_bits) { std::memset(buf.bytes, 0, buf.size); } + } + + ~CompactBitSet() { + if (allocated_) { delete[] uintptr_cast(s_); } + } + + bit_count_t size() const { return nbits_; } + void set_bit(bit_count_t start) { set_reset_bit(start, true); } + void reset_bit(bit_count_t start) { set_reset_bit(start, false); } + + bool is_bit_set(bit_count_t bit) const { + bitword_type const* word_ptr = get_word_const(bit); + if (!word_ptr) { return false; } + uint8_t const offset = get_word_offset(bit); + return word_ptr->is_bit_set_reset(offset, true); + } + + bit_count_t get_next_set_bit(bit_count_t start_bit) const { return get_next_set_or_reset_bit(start_bit, true); } + bit_count_t get_next_reset_bit(bit_count_t start_bit) const { return get_next_set_or_reset_bit(start_bit, false); } + + /// @brief This method gets the previous set bit from starting bit (including the start bit). So if start bit + /// is 1, it will return the start bit. + /// @param start_bit: Start bit should be > 0 and <= size() + /// @return Returns the previous set bit or inval_bit if nothing is set + bit_count_t get_prev_set_bit(bit_count_t start_bit) const { + // check first word which may be partial + uint8_t offset = get_word_offset(start_bit); + bit_count_t word_idx = get_word_index(start_bit); + + do { + bitword_type const* word_ptr = &s_->words[word_idx]; + if (!word_ptr) { return inval_bit; } + + uint8_t nbit{0}; + if (word_ptr->get_prev_set_bit(offset, &nbit)) { return start_bit - (offset - nbit); } + + start_bit -= offset; + offset = bitword_type::bits(); + } while (word_idx-- != 0); + + return inval_bit; + } + + void set_reset_bit(bit_count_t bit, bool value) { + bitword_type* word_ptr = get_word(bit); + if (!word_ptr) { return; } + uint8_t const offset = get_word_offset(bit); + word_ptr->set_reset_bits(offset, 1, value); + } + + bit_count_t get_next_set_or_reset_bit(bit_count_t start_bit, bool search_for_set_bit) const { + bit_count_t ret{inval_bit}; + + // check first word which may be partial + uint8_t const offset = get_word_offset(start_bit); + bitword_type const* word_ptr = get_word_const(start_bit); + if (!word_ptr) { return ret; } + + uint8_t nbit{0}; + bool found = search_for_set_bit ? word_ptr->get_next_set_bit(offset, &nbit) + : word_ptr->get_next_reset_bit(offset, &nbit); + if (found) { ret = start_bit + nbit - offset; } + + if (ret == inval_bit) { + // test rest of whole words + bit_count_t current_bit = start_bit + (bitword_type::bits() - offset); + bit_count_t bits_remaining = (current_bit > size()) ? 0 : size() - current_bit; + while (bits_remaining > 0) { + ++word_ptr; + found = + search_for_set_bit ? word_ptr->get_next_set_bit(0, &nbit) : word_ptr->get_next_reset_bit(0, &nbit); + if (found) { + ret = current_bit + nbit; + break; + } + current_bit += bitword_type::bits(); + bits_remaining -= std::min< bit_count_t >(bits_remaining, bitword_type::bits()); + } + } + + if (ret >= size()) { ret = inval_bit; } + return ret; + } + + std::string to_string() const { + std::string str; + auto const num_words = size() / word_size_bits(); + for (uint32_t i{0}; i < num_words; ++i) { + fmt::format_to(std::back_inserter(str), "{}", s_->words[i].to_string()); + } + return str; + } + +private: + bitword_type* get_word(bit_count_t bit) { + return (sisl_unlikely(bit >= nbits_)) ? nullptr : &s_->words[bit / word_size_bits()]; + } + + bitword_type const* get_word_const(bit_count_t bit) const { + return (sisl_unlikely(bit >= nbits_)) ? nullptr : &s_->words[bit / word_size_bits()]; + } + + bit_count_t get_word_index(bit_count_t bit) const { + DEBUG_ASSERT(s_, "compact bitset not initialized"); + return bit / word_size_bits(); + } + + uint8_t get_word_offset(bit_count_t bit) const { + assert(s_); + return static_cast< uint8_t >(bit & word_mask()); + } +}; +} // namespace sisl diff --git a/include/sisl/fds/utils.hpp b/include/sisl/fds/utils.hpp index 0832a76e..6615103e 100644 --- a/include/sisl/fds/utils.hpp +++ b/include/sisl/fds/utils.hpp @@ -224,6 +224,7 @@ static int spaceship_oper(const T& left, const T& right) { #define uintptr_cast reinterpret_cast< uint8_t* > #define voidptr_cast reinterpret_cast< void* > +#define c_voidptr_cast reinterpret_cast< const void* > #define charptr_cast reinterpret_cast< char* > #define c_charptr_cast reinterpret_cast< const char* > #define int_cast static_cast< int > diff --git a/src/fds/CMakeLists.txt b/src/fds/CMakeLists.txt index d0841fe5..b53fe5cd 100644 --- a/src/fds/CMakeLists.txt +++ b/src/fds/CMakeLists.txt @@ -38,6 +38,13 @@ if (DEFINED ENABLE_TESTING) target_link_libraries(test_bitword sisl ${COMMON_DEPS} GTest::gtest) add_test(NAME Bitword COMMAND test_bitset) + add_executable(test_compact_bitset) + target_sources(test_compact_bitset PRIVATE + tests/test_compact_bitset.cpp + ) + target_link_libraries(test_compact_bitset sisl ${COMMON_DEPS} GTest::gtest) + add_test(NAME CompactBitset COMMAND test_compact_bitset) + add_executable(obj_allocator_benchmark) target_sources(obj_allocator_benchmark PRIVATE tests/obj_allocator_benchmark.cpp diff --git a/src/fds/tests/test_compact_bitset.cpp b/src/fds/tests/test_compact_bitset.cpp new file mode 100644 index 00000000..7ce9cc95 --- /dev/null +++ b/src/fds/tests/test_compact_bitset.cpp @@ -0,0 +1,158 @@ +/********************************************************************************* + * Modifications Copyright 2017-2019 eBay Inc. + * + * Author/Developer(s): Harihara Kadayam + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + *********************************************************************************/ +#include +#include +#include +#include + +#include +#include + +#include + +#include + +using namespace sisl; + +SISL_OPTIONS_ENABLE(logging, test_compact_bitset) + +class CompactBitsetTest : public testing::Test { +protected: + sisl::io_blob_safe m_buf; + std::unique_ptr< CompactBitSet > m_bset; + +public: + CompactBitsetTest() : + testing::Test(), + m_buf{uint32_cast( + sisl::round_up(SISL_OPTIONS["buf_size"].as< uint32_t >(), CompactBitSet::size_multiples()))} {} + CompactBitsetTest(const CompactBitsetTest&) = delete; + CompactBitsetTest(CompactBitsetTest&&) noexcept = delete; + CompactBitsetTest& operator=(const CompactBitsetTest&) = delete; + CompactBitsetTest& operator=(CompactBitsetTest&&) noexcept = delete; + virtual ~CompactBitsetTest() override = default; + +protected: + void SetUp() override { m_bset = std::make_unique< CompactBitSet >(m_buf, true); } + void TearDown() override {} +}; + +TEST_F(CompactBitsetTest, AlternateBits) { + ASSERT_EQ(m_bset->size(), m_buf.size * 8); + + for (CompactBitSet::bit_count_t i{0}; i < m_bset->size(); ++i) { + ASSERT_EQ(m_bset->is_bit_set(i), false); + } + + // Set alternate bits + for (CompactBitSet::bit_count_t i{0}; i < m_bset->size(); i += 2) { + m_bset->set_bit(i); + } + + for (CompactBitSet::bit_count_t i{0}; i < m_bset->size(); ++i) { + ASSERT_EQ(m_bset->is_bit_set(i), (i % 2 == 0)); + } + + // Validate if next set or reset bit starting from itself returns itself back + for (CompactBitSet::bit_count_t i{0}; i < m_bset->size(); ++i) { + ASSERT_EQ(m_bset->get_next_set_or_reset_bit(i, ((i % 2) == 0)), i); + } + + // Validate if next set or reset bit starting from previous returns next bit + for (CompactBitSet::bit_count_t i{1}; i < m_bset->size(); ++i) { + ASSERT_EQ(m_bset->get_next_set_or_reset_bit(i - 1, ((i % 2) == 0)), i); + } +} + +TEST_F(CompactBitsetTest, AllBits) { + // Set all bits + for (CompactBitSet::bit_count_t i{0}; i < m_bset->size(); ++i) { + m_bset->set_bit(i); + } + + for (CompactBitSet::bit_count_t i{0}; i < m_bset->size(); ++i) { + ASSERT_EQ(m_bset->is_bit_set(i), true); + } + + for (CompactBitSet::bit_count_t i{0}; i < m_bset->size(); ++i) { + ASSERT_EQ(m_bset->get_next_set_bit(i), i); + ASSERT_EQ(m_bset->get_next_reset_bit(i), CompactBitSet::inval_bit); + } +} + +TEST_F(CompactBitsetTest, RandomBitsWithReload) { + auto const num_bits = m_bset->size(); + boost::dynamic_bitset<> shadow_bset{num_bits}; + + std::random_device rd; + std::mt19937 re(rd()); + std::uniform_int_distribution< CompactBitSet::bit_count_t > bit_gen(0, num_bits - 1); + for (uint64_t i{0}; i < num_bits / 2; ++i) { + auto bit = bit_gen(re); + shadow_bset.set(bit); + m_bset->set_bit(s_cast< CompactBitSet::bit_count_t >(bit)); + } + + auto validate = [this, &shadow_bset]() { + CompactBitSet::bit_count_t prev_set_bit{CompactBitSet::inval_bit}; + for (uint64_t i{0}; i < m_bset->size(); ++i) { + auto next_shadow_set_bit = (i == 0) ? shadow_bset.find_first() : shadow_bset.find_next(i - 1); + CompactBitSet::bit_count_t next_set_bit = m_bset->get_next_set_bit(i); + if (next_shadow_set_bit == boost::dynamic_bitset<>::npos) { + ASSERT_EQ(next_set_bit, CompactBitSet::inval_bit); + } else { + ASSERT_EQ(next_set_bit, next_shadow_set_bit); + if (next_set_bit == i) { prev_set_bit = i; } + ASSERT_EQ(m_bset->get_prev_set_bit(i), prev_set_bit); + } + } + + // Flip it back so we can look for reset bits + shadow_bset = shadow_bset.flip(); + for (uint64_t i{0}; i < m_bset->size(); ++i) { + auto next_shadow_reset_bit = (i == 0) ? shadow_bset.find_first() : shadow_bset.find_next(i - 1); + CompactBitSet::bit_count_t next_reset_bit = m_bset->get_next_reset_bit(i); + if (next_shadow_reset_bit == boost::dynamic_bitset<>::npos) { + ASSERT_EQ(next_reset_bit, CompactBitSet::inval_bit); + } else { + ASSERT_EQ(next_reset_bit, next_shadow_reset_bit); + } + } + + // Flip it back to original + shadow_bset = shadow_bset.flip(); + }; + + validate(); + m_bset = std::make_unique< CompactBitSet >(m_buf, false); // Reload + validate(); +} + +SISL_OPTION_GROUP(test_compact_bitset, + (buf_size, "", "buf_size", "buf_size that contains the bits", + ::cxxopts::value< uint32_t >()->default_value("1024"), "number")) + +int main(int argc, char* argv[]) { + int parsed_argc{argc}; + ::testing::InitGoogleTest(&parsed_argc, argv); + SISL_OPTIONS_LOAD(parsed_argc, argv, logging, test_compact_bitset); + + sisl::logging::SetLogger("test_compact_bitset"); + spdlog::set_pattern("[%D %T%z] [%^%l%$] [%n] [%t] %v"); + + return RUN_ALL_TESTS(); +}