From 5e8ddca960b8ac8eece3ac1166d714967e2d75a1 Mon Sep 17 00:00:00 2001 From: Piotr Beling Date: Thu, 3 Oct 2024 10:41:27 +0200 Subject: [PATCH] refactoring --- csf/src/fp/collision_solver.rs | 5 +++++ csf/src/fp/kvset.rs | 13 +++++++++++++ csf/src/fp/map/conf.rs | 1 - csf/src/fp/map/mod.rs | 32 ++++++++++++-------------------- 4 files changed, 30 insertions(+), 21 deletions(-) diff --git a/csf/src/fp/collision_solver.rs b/csf/src/fp/collision_solver.rs index 1fc6e08..b138117 100644 --- a/csf/src/fp/collision_solver.rs +++ b/csf/src/fp/collision_solver.rs @@ -34,6 +34,11 @@ pub trait CollisionSolver { /// Try to assign `value` (of size `bits_per_value`) to the given `index` which is not under collision. fn add_value(&mut self, index: usize, value: u8, bits_per_value: u8); + /// If the index is not under collision then try to assign `value` (of size `bits_per_value`) to them. + #[inline] fn process_value(&mut self, index: usize, value: u8, bits_per_value: u8) { + if !self.is_under_collision(index) { self.add_value(index, value, bits_per_value); } + } + /// Array that shows indices which have assigned values and are not under collision. fn to_collision_array(self) -> Box<[u64]>; diff --git a/csf/src/fp/kvset.rs b/csf/src/fp/kvset.rs index fec74fe..7166f59 100644 --- a/csf/src/fp/kvset.rs +++ b/csf/src/fp/kvset.rs @@ -1,5 +1,7 @@ use crate::bits_to_store_any_of_ref; +use super::CollisionSolver; + /// Moves all non-zeros to the begging of `values` and returns their number. pub fn remove_zeros(values: &mut [usize]) -> usize { let mut new_len: usize = 0usize; @@ -23,6 +25,17 @@ pub trait KVSet { /// If `self` doesn't remember which keys are retained it uses `retained_hint` to check this. fn for_each_key_value(&self, f: F/*, retained_hint: P*/) where F: FnMut(&K, u8)/*, P: FnMut(&K) -> bool*/; + /// Call `collision_solver.process_value(key_to_index(key), value, self.bits_per_value())` for each `key`-`value` pair. + #[inline] + fn process_all_values(&self, mut key_to_index: I, collision_solver: &mut CS) + where I: FnMut(&K) -> usize, CS: CollisionSolver + { + let bits_per_value = self.bits_per_value(); + self.for_each_key_value(|key, value| { + collision_solver.process_value(key_to_index(key), value, bits_per_value); + }); + } + /// Returns minimal number of bits that can store any value. fn bits_per_value(&self) -> u8; diff --git a/csf/src/fp/map/conf.rs b/csf/src/fp/map/conf.rs index 0a721bb..5b18ef0 100644 --- a/csf/src/fp/map/conf.rs +++ b/csf/src/fp/map/conf.rs @@ -9,7 +9,6 @@ pub struct MapConf< LSC = OptimalLevelSize, CSB: CollisionSolverBuilder = LoMemAcceptEquals, S: BuildSeededHasher = BuildDefaultSeededHasher - /*, BS: stats::BuildStatsCollector = ()*/ > { /// Choose the size of each level. pub level_sizer: LSC, diff --git a/csf/src/fp/map/mod.rs b/csf/src/fp/map/mod.rs index 1f43aab..24b4560 100644 --- a/csf/src/fp/map/mod.rs +++ b/csf/src/fp/map/mod.rs @@ -24,7 +24,7 @@ pub struct Map { values: Box<[u64]>, // BitVec bits_per_value: u8, level_sizes: Box<[u64]>, - hash_builder: S + hash: S } impl GetSize for Map { @@ -37,11 +37,12 @@ impl GetSize for Map { const USES_DYN_MEM: bool = true; } -impl Map { +#[inline] +fn index(hash: &H, k: &K, level_nr: u32, level_size: usize) -> usize { + ph::utils::map64_to_64(hash.hash_one(k, level_nr), level_size as u64) as usize +} - #[inline(always)] fn index(&self, k: &K, level_nr: u32, size: usize) -> usize { - utils::map64_to_64(self.hash_builder.hash_one(k, level_nr), size as u64) as usize - } +impl Map { /// Gets the value associated with the given key k and reports statistics to access_stats. pub fn get_stats(&self, k: &K, access_stats: &mut A) -> Option { @@ -49,7 +50,7 @@ impl Map { let mut level = 0u32; loop { let level_size = (*self.level_sizes.get(level as usize)? as usize) << 6usize; - let i = array_begin_index + self.index(k, level, level_size); + let i = array_begin_index + index(&self.hash, k, level, level_size); if self.array.content.get_bit(i) { access_stats.found_on_level(level); return Some(self.values.get_fragment(self.array.rank(i), self.bits_per_value) as u8); @@ -90,19 +91,10 @@ impl Map { let level_size_segments = conf.level_sizer.size_segments(&kv); let level_size = level_size_segments * 64; stats.level(input_size, level_size); - let mut collision_solver = conf.collision_solver.new(level_size_segments, bits_per_value); - kv.for_each_key_value(|k, v| { // TODO ?? move this code to kv method - let a_index = utils::map64_to_64(conf.hash.hash_one(k, level_nr), level_size as u64) as usize; - if !collision_solver.is_under_collision(a_index) { - collision_solver.add_value(a_index, v, bits_per_value); - } - }); + let mut collision_solver: ::CollisionSolver = conf.collision_solver.new(level_size_segments, bits_per_value); + kv.process_all_values(|k| index(&conf.hash, k, level_nr, level_size), &mut collision_solver); let (current_array, current_values, current_values_len) = collision_solver.to_collision_and_values(bits_per_value); - kv.retain_keys(|k| { - !current_array.get_bit( - utils::map64_to_64(conf.hash.hash_one(k, level_nr), level_size as u64) as usize - ) - }); + kv.retain_keys(|k| !current_array.get_bit(index(&conf.hash, k, level_nr, level_size))); arrays.push(current_array); level_sizes.push(level_size_segments as u64); values.push(current_values); @@ -117,7 +109,7 @@ impl Map { values: concatenate_values(&values, &values_lens, bits_per_value), bits_per_value, level_sizes: level_sizes.into_boxed_slice(), - hash_builder: conf.hash + hash: conf.hash } } @@ -240,7 +232,7 @@ impl Map { values, bits_per_value, level_sizes, - hash_builder: hasher + hash: hasher }) } }