diff --git a/Cargo.lock b/Cargo.lock index 5d5bcdf..2bfa004 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,12 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "aliasable" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" - [[package]] name = "autocfg" version = "1.1.0" @@ -85,12 +79,6 @@ dependencies = [ "crypto-common", ] -[[package]] -name = "either" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" - [[package]] name = "errno" version = "0.3.8" @@ -134,27 +122,11 @@ dependencies = [ "wasi", ] -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "kairos-trie" version = "0.1.0" dependencies = [ "bumpalo", - "ouroboros", "proptest", "sha2", ] @@ -193,59 +165,12 @@ dependencies = [ "libm", ] -[[package]] -name = "ouroboros" -version = "0.18.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b7be5a8a3462b752f4be3ff2b2bf2f7f1d00834902e46be2a4d68b87b0573c" -dependencies = [ - "aliasable", - "ouroboros_macro", - "static_assertions", -] - -[[package]] -name = "ouroboros_macro" -version = "0.18.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b645dcde5f119c2c454a92d0dfa271a2a3b205da92e4292a68ead4bdbfde1f33" -dependencies = [ - "heck", - "itertools", - "proc-macro2", - "proc-macro2-diagnostics", - "quote", - "syn", -] - [[package]] name = "ppv-lite86" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" -[[package]] -name = "proc-macro2" -version = "1.0.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "proc-macro2-diagnostics" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "version_check", - "yansi", -] - [[package]] name = "proptest" version = "1.4.0" @@ -272,15 +197,6 @@ version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" -[[package]] -name = "quote" -version = "1.0.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" -dependencies = [ - "proc-macro2", -] - [[package]] name = "rand" version = "0.8.5" @@ -362,28 +278,11 @@ dependencies = [ "digest", ] -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "syn" -version = "2.0.52" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - [[package]] name = "tempfile" -version = "3.10.0" +version = "3.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a365e8cd18e44762ef95d87f284f4b5cd04107fec2ff3052bd6a3e6069669e67" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" dependencies = [ "cfg-if", "fastrand", @@ -403,12 +302,6 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" -[[package]] -name = "unicode-ident" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - [[package]] name = "version_check" version = "0.9.4" @@ -441,9 +334,9 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", @@ -456,48 +349,42 @@ dependencies = [ [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" [[package]] name = "windows_aarch64_msvc" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" [[package]] name = "windows_i686_gnu" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" [[package]] name = "windows_i686_msvc" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" [[package]] name = "windows_x86_64_gnu" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" [[package]] name = "windows_x86_64_msvc" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" - -[[package]] -name = "yansi" -version = "1.0.0-rc.1" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1367295b8f788d371ce2dbc842c7b709c73ee1364d30351dd300ec2203b12377" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" diff --git a/Cargo.toml b/Cargo.toml index 36e8cd5..3d0a54c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,13 +4,18 @@ version = "0.1.0" edition = "2021" [features] -# default = ["std"] +default = ["std"] std = [] +[profile.test] +opt-level = 3 +debug-assertions = true +overflow-checks = true + + [dependencies] sha2 = "0.10" bumpalo = "3" -ouroboros = "0.18" [dev-dependencies] proptest = "1" diff --git a/src/lib.rs b/src/lib.rs index d7d6bb9..d7f0738 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,7 @@ extern crate alloc; pub mod modified; pub mod stored; +use core::fmt::Debug; use std::{iter, mem}; use alloc::{boxed::Box, string::String, vec::Vec}; @@ -71,7 +72,7 @@ struct BranchMask { impl BranchMask { const fn new(word_idx: u32, a: u32, b: u32) -> Self { let diff = a ^ b; - let relative_bit_idx = diff.leading_zeros(); + let relative_bit_idx = diff.trailing_zeros(); let bit_idx = word_idx * 32 + relative_bit_idx; @@ -109,7 +110,9 @@ impl BranchMask { /// The index of the discriminant bit in the `left_prefix`. #[inline(always)] fn relative_bit_idx(&self) -> u32 { - self.bit_idx % 32 + let r = self.bit_idx % 32; + debug_assert!(r < 32); + r } #[inline(always)] @@ -120,7 +123,14 @@ impl BranchMask { /// A mask containing 1s in the prefix and discriminant bit. #[inline(always)] fn prefix_discriminant_mask(&self) -> u32 { - (1 << (self.relative_bit_idx() + 1)) - 1 + let relative_bit_idx = self.relative_bit_idx(); + if relative_bit_idx == 31 { + u32::MAX + } else { + let r = (1 << (relative_bit_idx + 1)) - 1; + debug_assert_ne!(r, 0); + r + } } #[allow(dead_code)] @@ -130,6 +140,38 @@ impl BranchMask { } } +#[cfg(all(feature = "std", test))] +mod tests { + use super::*; + use proptest::prelude::*; + + proptest! { + #![proptest_config(ProptestConfig::with_cases(1_000_000))] + #[test] + fn test_branch_mask(word_idx in 0u32..8, a: u32, b: u32) { + let mask = BranchMask::new(word_idx, a, b); + + match (mask.is_left_descendant(a), + mask.is_right_descendant(a), + mask.is_left_descendant(b), + mask.is_right_descendant(b)) { + (true, false, false, true) | (false, true, true, false) => (), + other => panic!("\n\ + mast.relative_bit_idx: {}\n\ + mask.left_prefix: {:032b}\n\ + a: {:032b}\n\ + b: {:032b}\n\ + (a.left, a.right, b.left, b.right): {:?}", + mask.relative_bit_idx(), + mask.left_prefix, + a, b, other), + + } + } + + } +} + #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] pub struct Branch { pub left: NR, @@ -213,7 +255,7 @@ impl Branch { } } -impl Branch> { +impl Branch> { #[allow(dead_code)] fn new_at_branch( word_idx: usize, @@ -232,7 +274,7 @@ impl Branch> { }; let diff = branch_word_or_prefix ^ leaf_word; - let discriminant_bit_idx = diff.leading_zeros(); + let discriminant_bit_idx = diff.trailing_zeros(); let mask = BranchMask { bit_idx: word_idx as u32 * 32 + discriminant_bit_idx, @@ -241,13 +283,19 @@ impl Branch> { debug_assert!(branch.mask.word_idx() >= word_idx); - debug_assert_eq!( - branch.prior_word, - leaf.key_hash.0[branch.mask.word_idx() - 1] - ); - let prefix = if word_idx == branch.mask.word_idx() { debug_assert_eq!(prior_word, branch.prior_word); + if branch.prior_word != prior_word { + dbg!( + word_idx, + branch.mask.word_idx(), + branch.prior_word, + prior_word + ); + dbg!(branch.prefix.len()); + dbg!(&branch.prefix); + dbg!(&leaf.key_hash.0); + } mem::take(&mut branch.prefix) } else if word_idx == branch.mask.word_idx() - 1 { mem::take(&mut branch.prefix) @@ -306,8 +354,8 @@ impl Branch> { new_leaf: Box>, ) -> Box { let Some((word_idx, (a, b))) = iter::zip(new_leaf.key_hash.0, old_leaf.as_ref().key_hash.0) - .skip(prefix_start_idx) .enumerate() + .skip(prefix_start_idx) .find(|(_, (a, b))| a != b) else { panic!("The keys are the same") @@ -315,16 +363,17 @@ impl Branch> { debug_assert!(new_leaf.key_hash.0[..word_idx] == old_leaf.as_ref().key_hash.0[..word_idx]); - let prefix = new_leaf.key_hash.0[prefix_start_idx..word_idx - 1].to_vec(); + let prior_word_idx = word_idx.saturating_sub(1); + let prefix = new_leaf.key_hash.0[prefix_start_idx..prior_word_idx].to_vec(); let prior_word = if word_idx == 0 { 0 } else { debug_assert_eq!( - new_leaf.key_hash.0[word_idx - 1], - old_leaf.as_ref().key_hash.0[word_idx - 1] + new_leaf.key_hash.0[prior_word_idx], + old_leaf.as_ref().key_hash.0[prior_word_idx] ); - new_leaf.key_hash.0[word_idx - 1] + new_leaf.key_hash.0[prior_word_idx] }; let mask = BranchMask::new(word_idx as u32, a, b); @@ -337,6 +386,29 @@ impl Branch> { (new_leaf.into(), old_leaf.into()) } else { + if !mask.is_right_descendant(a) { + eprintln!("mask.left_prefix : {:032b}", mask.left_prefix); + eprintln!("mask.bit_idx: {}", mask.bit_idx); + eprintln!("mask.right_prefix: {:032b}", mask.right_prefix()); + eprintln!("1: {:032b}", 1); + + eprintln!( + "mask.prefix_discriminant_mask: {:032b}", + mask.prefix_discriminant_mask() + ); + eprintln!( + "1 << (self.relative_bit_idx() + 1): {:032b}", + 1 << (mask.relative_bit_idx() + 1) + ); + eprintln!("self.relative_bit_idx: {}", mask.relative_bit_idx()); + eprintln!("a: {:032b}", a); + eprintln!("b: {:032b}", b); + eprintln!("word_idx: {}", word_idx); + eprintln!("prefix_start_idx: {}", prefix_start_idx); + eprintln!("old_leaf: {:?}", old_leaf.as_ref()); + eprintln!("new_leaf: {:?}", new_leaf); + } + debug_assert!(mask.is_right_descendant(a)); debug_assert!(!mask.is_left_descendant(a)); @@ -384,7 +456,7 @@ pub struct Transaction, V> { pub current_root: TrieRoot, } -impl, V: AsRef<[u8]>> Transaction { +impl, V: Debug + AsRef<[u8]>> Transaction { pub fn new(root: TrieRoot, data_store: S) -> Self { Transaction { current_root: root, @@ -666,7 +738,7 @@ impl, V: AsRef<[u8]>> Transaction { }; *next = NodeRef::ModBranch(Branch::new_from_leafs( - branch.mask.word_idx() - 1, + branch.mask.word_idx().saturating_sub(1), leaf, Box::new(Leaf { key_hash: *key_hash, diff --git a/src/stored.rs b/src/stored.rs index 8b90763..b9cbb03 100644 --- a/src/stored.rs +++ b/src/stored.rs @@ -56,6 +56,14 @@ pub struct MemoryDb { leaves: BTreeMap, Leaf>>, } +impl MemoryDb { + pub fn empty() -> Self { + Self { + leaves: BTreeMap::new(), + } + } +} + impl Database for MemoryDb { type Error = Error; diff --git a/src/stored/merkle.rs b/src/stored/merkle.rs index 69e09d0..0b3ac79 100644 --- a/src/stored/merkle.rs +++ b/src/stored/merkle.rs @@ -1,6 +1,5 @@ use alloc::{boxed::Box, string::String, vec::Vec}; use bumpalo::Bump; -use ouroboros::self_referencing; use crate::{Branch, Leaf}; @@ -65,69 +64,69 @@ impl> Store for Snapshot { } } -// Maybe just use Box with nightly Allocator parameter. -#[self_referencing] -pub struct SnapshotBuilder { +pub struct SnapshotBuilder<'a, Db, V> { db: Db, - bump: Bump, + bump: &'a Bump, - #[borrows(bump)] - #[covariant] - nodes: Vec<&'this NodeHashMaybeNode<'this, V>>, + nodes: Vec<&'a NodeHashMaybeNode<'a, V>>, } type NodeHashMaybeNode<'a, V> = (NodeHash, Option, &'a Leaf>>); -impl, V: 'static> Store for SnapshotBuilder { +impl<'a, Db: Database, V> Store for SnapshotBuilder<'a, Db, V> { type Error = Error; fn get_unvisted_hash(&self, hash_idx: Idx) -> Result<&NodeHash, Self::Error> { let hash_idx = hash_idx as usize; - self.with_nodes(|nodes| { - nodes - .get(hash_idx) - .map(|(hash, _)| hash) - .ok_or(Error::NodeNotFound) - }) + self.nodes + .get(hash_idx) + .map(|(hash, _)| hash) + .ok_or(Error::NodeNotFound) } fn get_node(&mut self, hash_idx: Idx) -> Result, &Leaf>, Self::Error> { let hash_idx = hash_idx as usize; - self.with_mut(|this| { - let Some((hash, o_node)) = this - .nodes - .get(hash_idx) - .map(|(hash, o_node)| (hash, *o_node)) - else { - return Err(Error::NodeNotFound); - }; - - if let Some(node) = o_node { - return Ok(node); - } + let Some((hash, o_node)) = self + .nodes + .get(hash_idx) + .map(|(hash, o_node)| (hash, *o_node)) + else { + return Err(Error::NodeNotFound); + }; - let next_idx = this.nodes.len() as Idx; - let (node, left, right) = Self::get_from_db(this.bump, this.db, hash, next_idx)?; + if let Some(node) = o_node { + return Ok(node); + } - let mut add_unvisited = |hash: Option| { - if let Some(hash) = hash { - this.nodes.push(this.bump.alloc((hash, None))) - } - }; + let next_idx = self.nodes.len() as Idx; + let (node, left, right) = Self::get_from_db(self.bump, &self.db, hash, next_idx)?; - add_unvisited(left); - add_unvisited(right); + let mut add_unvisited = |hash: Option| { + if let Some(hash) = hash { + self.nodes.push(self.bump.alloc((hash, None))) + } + }; - Ok(node) - }) + add_unvisited(left); + add_unvisited(right); + + Ok(node) } } -impl, V: 'static> SnapshotBuilder { +impl<'a, Db: Database, V> SnapshotBuilder<'a, Db, V> { + pub fn new_with_db(db: Db, bump: &'a Bump) -> Self { + Self { + db, + bump, + nodes: Vec::new(), + } + } + #[inline(always)] - fn get_from_db<'a>( + fn get_from_db( bump: &'a Bump, db: &Db, hash: &NodeHash, diff --git a/tests/modified_only.rs b/tests/modified_only.rs new file mode 100644 index 0000000..f21164a --- /dev/null +++ b/tests/modified_only.rs @@ -0,0 +1,72 @@ +use proptest::prelude::*; +use std::collections::HashMap; + +use sha2::{Digest, Sha256}; + +use kairos_trie::{ + stored::{merkle::SnapshotBuilder, MemoryDb}, + KeyHash, Transaction, TrieRoot, +}; + +fn sha256_hash(data: &[u8]) -> [u8; 32] { + let mut hasher = Sha256::new(); + hasher.update(data); + hasher.finalize().into() +} + +#[test] +fn insert_get_u64_round_trip() { + let hashmap: HashMap> = (0u64..10000) + .map(|i| { + ( + KeyHash::from(&sha256_hash(&i.to_le_bytes())), + i.to_le_bytes().to_vec(), + ) + }) + .collect(); + + let bump = bumpalo::Bump::new(); + let snapshot = SnapshotBuilder::new_with_db(MemoryDb::>::empty(), &bump); + + let mut txn = Transaction::new(TrieRoot::Empty, snapshot); + + for (key, value) in hashmap.iter() { + txn.insert(key, value.clone()).unwrap(); + let ret_val = txn.get(key).unwrap().unwrap(); + assert_eq!(ret_val, value); + } + + for (key, value) in hashmap.iter() { + let ret_val = txn.get(key).unwrap().unwrap(); + assert_eq!(ret_val, value); + } +} + +prop_compose! { + fn arb_key_hash()(data in any::<[u8; 32]>()) -> KeyHash { + KeyHash::from(&data) + } +} + +proptest! { + #[test] + fn prop_insert_get_rand( + keys in prop::collection::hash_map(arb_key_hash(), 0u64.., 0..100_000) + ) { + let bump = bumpalo::Bump::new(); + let snapshot = SnapshotBuilder::new_with_db(MemoryDb::<[u8; 8]>::empty(), &bump); + + let mut txn = Transaction::new(TrieRoot::Empty, snapshot); + + for (key, value) in keys.iter() { + txn.insert(key, value.to_le_bytes()).unwrap(); + let ret_val = txn.get(key).unwrap().unwrap(); + assert_eq!(ret_val, &value.to_le_bytes()); + } + + for (key, value) in keys.iter() { + let ret_val = txn.get(key).unwrap().unwrap(); + assert_eq!(ret_val, &value.to_le_bytes()); + } + } +}