Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CompactBitset utility to turn bunch of bits to a bitset without additional serialization requirements #184

Merged
merged 3 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON) # turn on folder hierarchies

include (cmake/Flags.cmake)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD 20)

if(EXISTS ${CMAKE_BINARY_DIR}/conanbuildinfo.cmake)
include(${CMAKE_BINARY_DIR}/conanbuildinfo.cmake)
Expand Down
2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

class SISLConan(ConanFile):
name = "sisl"
version = "10.1.4"
version = "10.1.5"

homepage = "https://github.com/eBay/sisl"
description = "Library for fast data structures, utilities"
Expand Down
2 changes: 1 addition & 1 deletion include/sisl/fds/buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ struct blob {
uint32_t size;

blob() : blob{nullptr, 0} {}
blob(uint8_t* const b, const uint32_t s) : bytes{b}, size{s} {}
blob(uint8_t* b, uint32_t s) : bytes{b}, size{s} {}
};

using sg_iovs_t = folly::small_vector< iovec, 4 >;
Expand Down
147 changes: 147 additions & 0 deletions include/sisl/fds/compact_bitset.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/*********************************************************************************
* Modifications Copyright 2017-2019 eBay Inc.
*
* Author/Developer(s): Harihara Kadayam
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
*********************************************************************************/
#include <cstdint>
#include <sisl/fds/bitword.hpp>
#include <sisl/fds/utils.hpp>
#include <sisl/fds/buffer.hpp>

namespace sisl {
class CompactBitSet {
public:
using bit_count_t = uint32_t;

private:
using bitword_type = Bitword< unsafe_bits< uint64_t > >;

struct serialized {
bitword_type words[1]{bitword_type{}};
};

bit_count_t nbits_{0};
bool allocated_{false};
serialized* s_{nullptr};

private:
static constexpr size_t word_size_bytes() { return sizeof(unsafe_bits< uint64_t >); }
static constexpr size_t word_size_bits() { return word_size_bytes() * 8; }
static constexpr uint64_t word_mask() { return bitword_type::bits() - 1; }
szmyd marked this conversation as resolved.
Show resolved Hide resolved

public:
static constexpr bit_count_t inval_bit = std::numeric_limits< bit_count_t >::max();
static constexpr uint8_t size_multiples() { return word_size_bytes(); }

explicit CompactBitSet(bit_count_t nbits) {
DEBUG_ASSERT_GT(nbits, 0, "compact bitset should have nbits > 0");
nbits_ = s_cast< bit_count_t >(sisl::round_up(nbits, word_size_bits()));
size_t const buf_size = nbits_ / 8;
hkadayam marked this conversation as resolved.
Show resolved Hide resolved

uint8_t* buf = new uint8_t[buf_size];
std::memset(buf, 0, buf_size);
s_ = r_cast< serialized* >(buf);
allocated_ = true;
}

CompactBitSet(sisl::blob const& buf, bool init_bits) : s_{r_cast< serialized* >(buf.bytes)} {
DEBUG_ASSERT_GT(buf.size, 0, "compact bitset initialized with empty buffer");
DEBUG_ASSERT_EQ(buf.size % word_size_bytes(), 0, "compact bitset buffer size must be multiple of word size");
nbits_ = buf.size * 8;
if (init_bits) { std::memset(buf.bytes, 0, buf.size); }
}

~CompactBitSet() {
if (allocated_) { delete[] uintptr_cast(s_); }
}

bit_count_t size() const { return nbits_; }
void set_bit(bit_count_t start) { set_reset_bit(start, true); }
void reset_bit(bit_count_t start) { set_reset_bit(start, false); }

bool is_bit_set(bit_count_t bit) const {
bitword_type const* word_ptr = get_word_const(bit);
if (!word_ptr) { return false; }
uint8_t const offset = get_word_offset(bit);
return word_ptr->is_bit_set_reset(offset, true);
}

bit_count_t get_next_set_bit(bit_count_t start_bit) const { return get_next_set_or_reset_bit(start_bit, true); }
bit_count_t get_next_reset_bit(bit_count_t start_bit) const { return get_next_set_or_reset_bit(start_bit, false); }

void set_reset_bit(bit_count_t bit, bool value) {
bitword_type* word_ptr = get_word(bit);
if (!word_ptr) { return; }
uint8_t const offset = get_word_offset(bit);
word_ptr->set_reset_bits(offset, 1, value);
}

bit_count_t get_next_set_or_reset_bit(bit_count_t start_bit, bool search_for_set_bit) const {
bit_count_t ret{inval_bit};

// check first word which may be partial
uint8_t const offset = get_word_offset(start_bit);
bitword_type const* word_ptr = get_word_const(start_bit);
if (!word_ptr) { return ret; }

uint8_t nbit{0};
bool found = search_for_set_bit ? word_ptr->get_next_set_bit(offset, &nbit)
: word_ptr->get_next_reset_bit(offset, &nbit);
if (found) { ret = start_bit + nbit - offset; }

if (ret == inval_bit) {
// test rest of whole words
bit_count_t current_bit = start_bit + (bitword_type::bits() - offset);
szmyd marked this conversation as resolved.
Show resolved Hide resolved
bit_count_t bits_remaining = (current_bit > size()) ? 0 : size() - current_bit;
while (bits_remaining > 0) {
hkadayam marked this conversation as resolved.
Show resolved Hide resolved
++word_ptr;
found =
search_for_set_bit ? word_ptr->get_next_set_bit(0, &nbit) : word_ptr->get_next_reset_bit(0, &nbit);
if (found) {
ret = current_bit + nbit;
break;
}
current_bit += bitword_type::bits();
bits_remaining -= std::min< bit_count_t >(bits_remaining, bitword_type::bits());
}
}

if (ret >= size()) { ret = inval_bit; }
return ret;
}

std::string to_string() const {
std::string str;
auto const num_words = size() / word_size_bytes();
for (uint32_t i{0}; i < num_words; ++i) {
fmt::format_to(std::back_inserter(str), "{}", s_->words[i].to_string());
}
return str;
}

private:
bitword_type* get_word(bit_count_t bit) {
return (sisl_unlikely(bit >= nbits_)) ? nullptr : &s_->words[bit / word_size_bits()];
}

bitword_type const* get_word_const(bit_count_t bit) const {
return (sisl_unlikely(bit >= nbits_)) ? nullptr : &s_->words[bit / word_size_bits()];
}

uint8_t get_word_offset(bit_count_t bit) const {
szmyd marked this conversation as resolved.
Show resolved Hide resolved
assert(s_);
return static_cast< uint8_t >(bit & word_mask());
}
};
} // namespace sisl
1 change: 1 addition & 0 deletions include/sisl/fds/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ static int spaceship_oper(const T& left, const T& right) {

#define uintptr_cast reinterpret_cast< uint8_t* >
#define voidptr_cast reinterpret_cast< void* >
#define c_voidptr_cast reinterpret_cast< const void* >
#define charptr_cast reinterpret_cast< char* >
#define c_charptr_cast reinterpret_cast< const char* >
#define int_cast static_cast< int >
Expand Down
7 changes: 7 additions & 0 deletions src/fds/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ if (DEFINED ENABLE_TESTING)
target_link_libraries(test_bitword sisl ${COMMON_DEPS} GTest::gtest)
add_test(NAME Bitword COMMAND test_bitset)

add_executable(test_compact_bitset)
target_sources(test_compact_bitset PRIVATE
tests/test_compact_bitset.cpp
)
target_link_libraries(test_compact_bitset sisl ${COMMON_DEPS} GTest::gtest)
add_test(NAME CompactBitset COMMAND test_compact_bitset)

add_executable(obj_allocator_benchmark)
target_sources(obj_allocator_benchmark PRIVATE
tests/obj_allocator_benchmark.cpp
Expand Down
155 changes: 155 additions & 0 deletions src/fds/tests/test_compact_bitset.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/*********************************************************************************
* Modifications Copyright 2017-2019 eBay Inc.
*
* Author/Developer(s): Harihara Kadayam
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
*********************************************************************************/
#include <cstdint>
#include <iostream>
#include <boost/dynamic_bitset.hpp>
#include <random>

#include <sisl/logging/logging.h>
#include <sisl/options/options.h>

#include <gtest/gtest.h>

#include <sisl/fds/compact_bitset.hpp>

using namespace sisl;

SISL_OPTIONS_ENABLE(logging, test_compact_bitset)

class CompactBitsetTest : public testing::Test {
protected:
sisl::io_blob_safe m_buf;
std::unique_ptr< CompactBitSet > m_bset;

public:
CompactBitsetTest() :
testing::Test(),
m_buf{uint32_cast(
sisl::round_up(SISL_OPTIONS["buf_size"].as< uint32_t >(), CompactBitSet::size_multiples()))} {}
CompactBitsetTest(const CompactBitsetTest&) = delete;
CompactBitsetTest(CompactBitsetTest&&) noexcept = delete;
CompactBitsetTest& operator=(const CompactBitsetTest&) = delete;
CompactBitsetTest& operator=(CompactBitsetTest&&) noexcept = delete;
virtual ~CompactBitsetTest() override = default;

protected:
void SetUp() override { m_bset = std::make_unique< CompactBitSet >(m_buf, true); }
void TearDown() override {}
};

TEST_F(CompactBitsetTest, AlternateBits) {
ASSERT_EQ(m_bset->size(), m_buf.size * 8);

for (CompactBitSet::bit_count_t i{0}; i < m_bset->size(); ++i) {
ASSERT_EQ(m_bset->is_bit_set(i), false);
}

// Set alternate bits
for (CompactBitSet::bit_count_t i{0}; i < m_bset->size(); i += 2) {
m_bset->set_bit(i);
}

for (CompactBitSet::bit_count_t i{0}; i < m_bset->size(); ++i) {
ASSERT_EQ(m_bset->is_bit_set(i), (i % 2 == 0));
}

// Validate if next set or reset bit starting from itself returns itself back
for (CompactBitSet::bit_count_t i{0}; i < m_bset->size(); ++i) {
ASSERT_EQ(m_bset->get_next_set_or_reset_bit(i, ((i % 2) == 0)), i);
}

// Validate if next set or reset bit starting from previous returns next bit
for (CompactBitSet::bit_count_t i{1}; i < m_bset->size(); ++i) {
ASSERT_EQ(m_bset->get_next_set_or_reset_bit(i - 1, ((i % 2) == 0)), i);
}
}

TEST_F(CompactBitsetTest, AllBits) {
// Set all bits
for (CompactBitSet::bit_count_t i{0}; i < m_bset->size(); ++i) {
m_bset->set_bit(i);
}

for (CompactBitSet::bit_count_t i{0}; i < m_bset->size(); ++i) {
ASSERT_EQ(m_bset->is_bit_set(i), true);
}

for (CompactBitSet::bit_count_t i{0}; i < m_bset->size(); ++i) {
ASSERT_EQ(m_bset->get_next_set_bit(i), i);
ASSERT_EQ(m_bset->get_next_reset_bit(i), CompactBitSet::inval_bit);
}
}

TEST_F(CompactBitsetTest, RandomBitsWithReload) {
auto const num_bits = m_bset->size();
boost::dynamic_bitset<> shadow_bset{num_bits};

std::random_device rd;
std::mt19937 re(rd());
std::uniform_int_distribution< CompactBitSet::bit_count_t > bit_gen(0, num_bits - 1);
for (uint64_t i{0}; i < num_bits / 2; ++i) {
auto bit = bit_gen(re);
shadow_bset.set(bit);
m_bset->set_bit(s_cast< CompactBitSet::bit_count_t >(bit));
}

auto validate = [this, &shadow_bset]() {
for (uint64_t i{0}; i < m_bset->size(); ++i) {
auto next_shadow_set_bit = (i == 0) ? shadow_bset.find_first() : shadow_bset.find_next(i - 1);
CompactBitSet::bit_count_t next_set_bit = m_bset->get_next_set_bit(i);
if (next_shadow_set_bit == boost::dynamic_bitset<>::npos) {
ASSERT_EQ(next_set_bit, CompactBitSet::inval_bit);
} else {
ASSERT_EQ(next_set_bit, next_shadow_set_bit);
}
}

// Flip it back so we can look for reset bits
shadow_bset = shadow_bset.flip();
for (uint64_t i{0}; i < m_bset->size(); ++i) {
auto next_shadow_reset_bit = (i == 0) ? shadow_bset.find_first() : shadow_bset.find_next(i - 1);
CompactBitSet::bit_count_t next_reset_bit = m_bset->get_next_reset_bit(i);
if (next_shadow_reset_bit == boost::dynamic_bitset<>::npos) {
ASSERT_EQ(next_reset_bit, CompactBitSet::inval_bit);
} else {
ASSERT_EQ(next_reset_bit, next_shadow_reset_bit);
}
}

// Flip it back to original
shadow_bset = shadow_bset.flip();
};

validate();
m_bset = std::make_unique< CompactBitSet >(m_buf, false); // Reload
validate();
}

SISL_OPTION_GROUP(test_compact_bitset,
(buf_size, "", "buf_size", "buf_size that contains the bits",
::cxxopts::value< uint32_t >()->default_value("1024"), "number"))

int main(int argc, char* argv[]) {
int parsed_argc{argc};
::testing::InitGoogleTest(&parsed_argc, argv);
SISL_OPTIONS_LOAD(parsed_argc, argv, logging, test_compact_bitset);

sisl::logging::SetLogger("test_compact_bitset");
spdlog::set_pattern("[%D %T%z] [%^%l%$] [%n] [%t] %v");

return RUN_ALL_TESTS();
}