Skip to content

Commit

Permalink
work on GOMap
Browse files Browse the repository at this point in the history
  • Loading branch information
beling committed Oct 8, 2024
1 parent abe9430 commit 346c2f7
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 16 deletions.
4 changes: 2 additions & 2 deletions csf/src/fp/gocmap/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ impl<C: Coding, GS: GroupSize, SS: SeedSize, S: BuildSeededHasher> GOCMap<C, GS,
let index = conf.goconf.bits_per_group.bit_index_for_seed(hash, group_seed(group), group);
if collision_solver.is_under_collision(index) { continue }
collision_solver.add_value(index,
coding.rev_fragment_of(values[i], value_rev_indices[i]),
bits_per_fragment);
coding.rev_fragment_of(values[i], value_rev_indices[i]),
bits_per_fragment);
}
}

Expand Down
18 changes: 9 additions & 9 deletions csf/src/fp/gomap/conf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ pub struct GOMapConf<
/// Configuration of family of (group-optimized) hash functions (default: [`GOConf::default`]).
pub goconf: GOConf<GS, SS, S>,
/// Choose the size of each level.
pub level_size_chooser: LSC,
pub level_sizer: LSC,
/// Constructs collision solver that decides which collisions are positive, and which are negative.
pub collision_solver: CSB,
}

impl Default for GOMapConf {
fn default() -> Self { Self {
goconf: Default::default(),
level_size_chooser: Default::default(),
level_sizer: Default::default(),
collision_solver: Default::default(),
} }
}
Expand All @@ -32,7 +32,7 @@ impl<CSB: CollisionSolverBuilder> GOMapConf<OptimalLevelSize, CSB, TwoToPowerBit
pub fn cs(collision_solver: CSB) -> Self {
Self {
goconf: Default::default(),
level_size_chooser: Default::default(),
level_sizer: Default::default(),
collision_solver,
}
}
Expand All @@ -42,7 +42,7 @@ impl<GS: GroupSize, SS: SeedSize, S> GOMapConf<OptimalLevelSize, LoMemAcceptEqua
pub fn groups(goconf: GOConf<GS, SS, S>) -> Self {
Self {
goconf,
level_size_chooser: Default::default(),
level_sizer: Default::default(),
collision_solver: Default::default(),
}
}
Expand All @@ -52,7 +52,7 @@ impl<CSB: CollisionSolverBuilder, GS: GroupSize, SS: SeedSize, S> GOMapConf<Opti
pub fn groups_cs(goconf: GOConf<GS, SS, S>, collision_solver: CSB) -> Self {
Self {
goconf,
level_size_chooser: Default::default(),
level_sizer: Default::default(),
collision_solver,
}
}
Expand All @@ -68,7 +68,7 @@ impl<LSC> GOMapConf<LSC, LoMemAcceptEquals, TwoToPowerBitsStatic::<4>, TwoToPowe
pub fn lsize(level_size_chooser: LSC) -> Self {
Self {
goconf: Default::default(),
level_size_chooser,
level_sizer: level_size_chooser,
collision_solver: Default::default(),
}
}
Expand All @@ -78,20 +78,20 @@ impl<LSC, CSB: CollisionSolverBuilder> GOMapConf<LSC, CSB, TwoToPowerBitsStatic:
pub fn lsize_cs(level_size_chooser: LSC, collision_solver: CSB) -> Self {
Self {
goconf: Default::default(),
level_size_chooser,
level_sizer: level_size_chooser,
collision_solver,
}
}
}

impl<LSC, GS: GroupSize, SS: SeedSize, S> GOMapConf<LSC, LoMemAcceptEquals, GS, SS, S> {
pub fn groups_lsize(goconf: GOConf<GS, SS, S>, level_size_chooser: LSC) -> Self {
Self { goconf, level_size_chooser, collision_solver: Default::default() }
Self { goconf, level_sizer: level_size_chooser, collision_solver: Default::default() }
}
}

impl<LSC, CSB: CollisionSolverBuilder, GS: GroupSize, SS: SeedSize, S> GOMapConf<LSC, CSB, GS, SS, S> {
pub fn groups_lsize_cs(goconf: GOConf<GS, SS, S>, level_size_chooser: LSC, collision_solver: CSB) -> Self {
Self { goconf, level_size_chooser, collision_solver }
Self { goconf, level_sizer: level_size_chooser, collision_solver }
}
}
39 changes: 39 additions & 0 deletions csf/src/fp/gomap/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ use std::hash::Hash;
mod conf;
pub use conf::GOMapConf;

use crate::fp::CollisionSolver;

use super::kvset::KVSet;
use super::{CollisionSolverBuilder, LevelSizer};

/// Finger-printing based compressed static function (immutable map)
/// that uses group optimization and maps hashable keys to unsigned integer values of given bit-size.
///
Expand Down Expand Up @@ -82,4 +87,38 @@ impl<GS: GroupSize, SS: SeedSize, S: BuildSeededHasher> GOMap<GS, SS, S> {
self.get_stats_or_panic(key, &mut ())
}



pub fn with_conf_stats<K, KV, LSC, CSB, BS>(kv: KV, conf: GOMapConf<LSC, CSB, GS, SS, S>, stats: &mut BS) -> Self
where K: Hash, KV: KVSet<K>, LSC: LevelSizer, CSB: CollisionSolverBuilder, BS: stats::BuildStatsCollector
{
let bits_per_value = kv.bits_per_value();
let level_sizes = Vec::<usize>::new();
let arrays = Vec::<Box<[u64]>>::new();
let values_lens = Vec::<usize>::new();
let values = Vec::<Box<[u64]>>::new();
let groups = Vec::<Box<[u64]>>::new();
let mut input_size = kv.kv_len();
let mut level_nr = 0;
while input_size != 0 {
let (level_size_groups, level_size_segments) = conf.goconf.bits_per_group
.level_size_groups_segments(conf.level_sizer.size_segments(&kv) * 64);
stats.level(input_size, level_size_segments * 64);

let mut collision_solver: <CSB as CollisionSolverBuilder>::CollisionSolver = conf.collision_solver.new(level_size_segments, bits_per_value);
kv.process_all_values(|key| conf.goconf.key_index(key, level_nr, level_size_groups as u64,
|_| 0), &mut collision_solver);
let collisions = collision_solver.to_collision_array();
let mut best_counts = vec![0u32; level_size_groups].into_boxed_slice();
kv.for_each_key(|key| {
let hash = conf.goconf.hash_builder.hash_one(key, level_nr);
let group = group_nr(hash, level_size_groups as u64);
let bit_nr = conf.goconf.bits_per_group.bit_index_for_seed(hash, 0, group);
if collisions.get_bit(bit_nr) { best_counts[group as usize] += 1; }
});
let mut best_seeds = conf.goconf.bits_per_seed.new_zeroed_seed_vec(level_size_groups);

}
todo!()
}
}
8 changes: 8 additions & 0 deletions csf/src/fp/kvset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ pub trait KVSet<K> {
/// If `self` doesn't remember which keys are retained it uses `retained_hint` to check this.
fn for_each_key_value<F>(&self, f: F/*, retained_hint: P*/) where F: FnMut(&K, u8)/*, P: FnMut(&K) -> bool*/;

/// Call `f` for each key in the set, using single thread.
///
/// If `self` doesn't remember which keys are retained it uses `retained_hint` to check this.
#[inline(always)]
fn for_each_key<F>(&self, mut f: F/*, retained_hint: P*/) where F: FnMut(&K)/*, P: FnMut(&K) -> bool*/ {
self.for_each_key_value(|k, _| f(k));
}

/// Call `collision_solver.process_value(key_to_index(key), value, self.bits_per_value())` for each `key`-`value` pair.
#[inline]
fn process_all_values<I, CS>(&self, mut key_to_index: I, collision_solver: &mut CS)
Expand Down
6 changes: 1 addition & 5 deletions csf/src/fp/map/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,7 @@ impl<S: BuildSeededHasher> Map<S> {
bits_per_value: u8,
construct_partial: bool
) -> Arrays
where K: Hash,
KV: KVSet<K>,
LSC: LevelSizer,
CSB: CollisionSolverBuilder,
BS: stats::BuildStatsCollector
where K: Hash, KV: KVSet<K>, LSC: LevelSizer, CSB: CollisionSolverBuilder, BS: stats::BuildStatsCollector
{
let mut res = Arrays::default();
let mut input_size = kv.kv_len();
Expand Down

0 comments on commit 346c2f7

Please sign in to comment.