Skip to content

Commit

Permalink
Add method internal_memory_builder_partitioned_phf::build_from_hashes
Browse files Browse the repository at this point in the history
This mirrors internal_memory_builder_single_phf::build_from_hashes
  • Loading branch information
progval committed Apr 30, 2024
1 parent 28aedcb commit f3b3f4d
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 29 deletions.
18 changes: 14 additions & 4 deletions include/builders/internal_memory_builder_partitioned_phf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,17 @@ struct internal_memory_builder_partitioned_phf {
template <typename Iterator>
build_timings build_from_keys(Iterator keys, uint64_t num_keys,
build_configuration const& config) {
build_configuration actual_config = config;
if (config.seed == constants::invalid_seed)
actual_config.seed = random_value();

return build_from_hashes(hash_generator<Iterator, hasher_type>(keys, actual_config.seed),
num_keys, actual_config);
}

template <typename Iterator>
build_timings build_from_hashes(Iterator hashes, uint64_t num_keys,
build_configuration const& config) {
assert(num_keys > 1);
util::check_hash_collision_probability<Hasher>(num_keys);

Expand All @@ -25,7 +36,7 @@ struct internal_memory_builder_partitioned_phf {
uint64_t num_partitions = config.num_partitions;
if (config.verbose_output) std::cout << "num_partitions " << num_partitions << std::endl;

m_seed = config.seed == constants::invalid_seed ? random_value() : config.seed;
m_seed = config.seed;
m_num_keys = num_keys;
m_table_size = 0;
m_num_partitions = num_partitions;
Expand All @@ -41,9 +52,8 @@ struct internal_memory_builder_partitioned_phf {
for (auto& partition : partitions) partition.reserve(1.5 * average_partition_size);

progress_logger logger(num_keys, " == partitioned ", " keys", config.verbose_output);
for (uint64_t i = 0; i != num_keys; ++i, ++keys) {
auto const& key = *keys;
auto hash = hasher_type::hash(key, m_seed);
for (uint64_t i = 0; i != num_keys; ++i, ++hashes) {
auto hash = *hashes;
auto b = m_bucketer.bucket(hash.mix());
partitions[b].push_back(hash);
logger.log();
Expand Down
27 changes: 3 additions & 24 deletions include/builders/internal_memory_builder_single_phf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ struct internal_memory_builder_single_phf {
for (auto attempt = 0; attempt < 10; ++attempt) {
m_seed = random_value();
try {
return build_from_hashes(hash_generator<RandomAccessIterator>(keys, m_seed),
return build_from_hashes(hash_generator<RandomAccessIterator, hasher_type>(keys, m_seed),
num_keys, config);
} catch (seed_runtime_error const& error) {
std::cout << "attempt " << attempt + 1 << " failed" << std::endl;
Expand All @@ -28,8 +28,8 @@ struct internal_memory_builder_single_phf {
throw seed_runtime_error();
}
m_seed = config.seed;
return build_from_hashes(hash_generator<RandomAccessIterator>(keys, m_seed), num_keys,
config);
return build_from_hashes(hash_generator<RandomAccessIterator, hasher_type>(keys, m_seed),
num_keys, config);
}

template <typename RandomAccessIterator>
Expand Down Expand Up @@ -205,27 +205,6 @@ struct internal_memory_builder_single_phf {
std::vector<uint64_t> m_pilots;
std::vector<uint64_t> m_free_slots;

template <typename RandomAccessIterator>
struct hash_generator {
hash_generator(RandomAccessIterator keys, uint64_t seed) : m_iterator(keys), m_seed(seed) {}

inline auto operator*() {
return hasher_type::hash(*m_iterator, m_seed);
}

inline void operator++() {
++m_iterator;
}

inline hash_generator operator+(uint64_t offset) const {
return hash_generator(m_iterator + offset, m_seed);
}

private:
RandomAccessIterator m_iterator;
uint64_t m_seed;
};

typedef std::vector<bucket_payload_pair> pairs_t;

struct buckets_iterator_t {
Expand Down
23 changes: 22 additions & 1 deletion include/builders/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,4 +286,25 @@ void fill_free_slots(bit_vector_builder const& taken, uint64_t num_keys, FreeSlo
assert(next_used_slot == table_size);
}

} // namespace pthash
template <typename RandomAccessIterator, typename Hasher>
struct hash_generator {
hash_generator(RandomAccessIterator keys, uint64_t seed) : m_iterator(keys), m_seed(seed) {}

inline auto operator*() {
return Hasher::hash(*m_iterator, m_seed);
}

inline void operator++() {
++m_iterator;
}

inline hash_generator operator+(uint64_t offset) const {
return hash_generator(m_iterator + offset, m_seed);
}

private:
RandomAccessIterator m_iterator;
uint64_t m_seed;
};

} // namespace pthash

0 comments on commit f3b3f4d

Please sign in to comment.