From 87c69b3d9c8916bf304d9c0ac8b63668ef7b8584 Mon Sep 17 00:00:00 2001 From: Wisdom Ogwu Date: Fri, 25 Aug 2023 08:37:19 +0100 Subject: [PATCH 1/7] wip finalize multi chunk with limit implement chunk op test chunk op encoding convert chunk op chunk id to string implement traversal instruction to string add chunking error + devoid multi subtree chunk from encoding work multi-subtree-chunk skeleton + return encoding length in multichunk make chunks fixed size height 2 test height proof implement height proof verifier update documentation verify height proof generation add documentation test no of chunk under chunk id fn implement number of chunks under chunk id function extract chunk layer function from chunk height seperate number_of_chunk into height and layer_height functions return multi chunk result enforce limit without storage overhead add test for encoding length check implement iterator for chunk producer remove cost from chunks fix the error type implement random chunk access fixes implement chunk height function add traverse then build chunk function to ref walker update comment implement chunk producer length init chunk producer struct implement merk tree height function update traversal generation instruction add instruction traversal test fix documentation implement binary range function clean up number of chunks function given a subtree of a given height return the exit node count documentation fixes implement chunk_height_per_layer verify that chunks produce expected root hash implement and test variable depth chunk creation restart chunking v2 --- merk/src/error.rs | 20 +- merk/src/merk/chunks.rs | 26 +- merk/src/merk/chunks2.rs | 943 ++++++++++++++++++++++++++ merk/src/merk/mod.rs | 54 ++ merk/src/merk/restore.rs | 6 +- merk/src/merk/restore2.rs | 195 ++++++ merk/src/proofs/chunk.rs | 26 +- merk/src/proofs/chunk/binary_range.rs | 211 ++++++ merk/src/proofs/chunk/chunk2.rs | 620 +++++++++++++++++ merk/src/proofs/chunk/chunk_op.rs | 141 ++++ merk/src/proofs/chunk/error.rs | 32 + merk/src/proofs/chunk/util.rs | 432 ++++++++++++ merk/src/test_utils/mod.rs | 12 +- 13 files changed, 2692 insertions(+), 26 deletions(-) create mode 100644 merk/src/merk/chunks2.rs create mode 100644 merk/src/merk/restore2.rs create mode 100644 merk/src/proofs/chunk/binary_range.rs create mode 100644 merk/src/proofs/chunk/chunk2.rs create mode 100644 merk/src/proofs/chunk/chunk_op.rs create mode 100644 merk/src/proofs/chunk/error.rs create mode 100644 merk/src/proofs/chunk/util.rs diff --git a/merk/src/error.rs b/merk/src/error.rs index 4455ef96..96717391 100644 --- a/merk/src/error.rs +++ b/merk/src/error.rs @@ -28,6 +28,8 @@ //! Errors +use crate::proofs::chunk::error::ChunkError; + #[cfg(any(feature = "full", feature = "verify"))] #[derive(Debug, thiserror::Error)] /// Errors @@ -59,11 +61,21 @@ pub enum Error { /// Chunking error #[error("chunking error {0}")] - ChunkingError(&'static str), + ChunkingError(ChunkError), + + // TODO: remove + /// Old chunking error + #[error("chunking error {0}")] + OldChunkingError(&'static str), /// Chunk restoring error #[error("chunk restoring error {0}")] - ChunkRestoringError(String), + ChunkRestoringError(ChunkError), + + // TODO: remove + /// Chunk restoring error + #[error("chunk restoring error {0}")] + OldChunkRestoringError(String), /// Key not found error #[error("key not found error {0}")] @@ -97,6 +109,10 @@ pub enum Error { #[error("invalid operation error {0}")] InvalidOperation(&'static str), + /// Internal error + #[error("internal error {0}")] + InternalError(&'static str), + /// Specialized costs error #[error("specialized costs error {0}")] SpecializedCostsError(&'static str), diff --git a/merk/src/merk/chunks.rs b/merk/src/merk/chunks.rs index 7e8c588e..0df9655a 100644 --- a/merk/src/merk/chunks.rs +++ b/merk/src/merk/chunks.rs @@ -96,7 +96,7 @@ where /// calling `producer.len()`. pub fn chunk(&mut self, index: usize) -> Result, Error> { if index >= self.len() { - return Err(Error::ChunkingError("Chunk index out-of-bounds")); + return Err(Error::OldChunkingError("Chunk index out-of-bounds")); } self.index = index; @@ -129,7 +129,7 @@ where fn next_chunk(&mut self) -> Result, Error> { if self.index == 0 { if self.trunk.is_empty() { - return Err(Error::ChunkingError( + return Err(Error::OldChunkingError( "Attempted to fetch chunk on empty tree", )); } @@ -198,7 +198,7 @@ where { /// Creates a `ChunkProducer` which can return chunk proofs for replicating /// the entire Merk tree. - pub fn chunks(&self) -> Result, Error> { + pub fn chunks_old(&self) -> Result, Error> { ChunkProducer::new(self) } } @@ -223,7 +223,7 @@ mod tests { merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); merk.commit(); - let chunks = merk.chunks().unwrap(); + let chunks = merk.chunks_old().unwrap(); assert_eq!(chunks.len(), 1); assert_eq!(chunks.into_iter().size_hint().0, 1); } @@ -235,7 +235,7 @@ mod tests { merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); merk.commit(); - let chunks = merk.chunks().unwrap(); + let chunks = merk.chunks_old().unwrap(); assert_eq!(chunks.len(), 129); assert_eq!(chunks.into_iter().size_hint().0, 129); } @@ -247,7 +247,7 @@ mod tests { merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); merk.commit(); - let mut chunks = merk.chunks().unwrap().into_iter().map(|x| x.unwrap()); + let mut chunks = merk.chunks_old().unwrap().into_iter().map(|x| x.unwrap()); let chunk = chunks.next().unwrap(); let (trunk, height) = verify_trunk(chunk.into_iter().map(Ok)).unwrap().unwrap(); @@ -297,7 +297,7 @@ mod tests { .unwrap() .unwrap(); - merk.chunks() + merk.chunks_old() .unwrap() .into_iter() .map(|x| x.unwrap()) @@ -314,7 +314,7 @@ mod tests { ) .unwrap() .unwrap(); - let reopen_chunks = merk.chunks().unwrap().into_iter().map(|x| x.unwrap()); + let reopen_chunks = merk.chunks_old().unwrap().into_iter().map(|x| x.unwrap()); for (original, checkpoint) in original_chunks.zip(reopen_chunks) { assert_eq!(original.len(), checkpoint.len()); @@ -352,13 +352,13 @@ mod tests { merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); let chunks = merk - .chunks() + .chunks_old() .unwrap() .into_iter() .map(|x| x.unwrap()) .collect::>(); - let mut producer = merk.chunks().unwrap(); + let mut producer = merk.chunks_old().unwrap(); for i in 0..chunks.len() * 2 { let index = i % chunks.len(); assert_eq!(producer.chunk(index).unwrap(), chunks[index]); @@ -371,7 +371,7 @@ mod tests { let merk = TempMerk::new(); let _chunks = merk - .chunks() + .chunks_old() .unwrap() .into_iter() .map(|x| x.unwrap()) @@ -385,7 +385,7 @@ mod tests { let batch = make_batch_seq(1..42); merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); - let mut producer = merk.chunks().unwrap(); + let mut producer = merk.chunks_old().unwrap(); let _chunk = producer.chunk(50000).unwrap(); } @@ -493,7 +493,7 @@ mod tests { let batch = make_batch_seq(1..42); merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); - let mut producer = merk.chunks().unwrap(); + let mut producer = merk.chunks_old().unwrap(); let _chunk1 = producer.next_chunk(); let _chunk2 = producer.next_chunk(); } diff --git a/merk/src/merk/chunks2.rs b/merk/src/merk/chunks2.rs new file mode 100644 index 00000000..d455dbde --- /dev/null +++ b/merk/src/merk/chunks2.rs @@ -0,0 +1,943 @@ +// TODO: add MIT License +// TODO: add module description +// TODO: figure out verification features + +use std::{ + cmp::max, + collections::{LinkedList, VecDeque}, + path::Iter, +}; + +use ed::Encode; +use grovedb_costs::{CostResult, CostsExt, OperationCost}; +use grovedb_storage::StorageContext; +use integer_encoding::VarInt; + +use crate::{ + error::Error, + proofs::{ + chunk::{ + chunk_op::ChunkOp, + error::{ChunkError, ChunkError::InternalError}, + util::{ + chunk_height, generate_traversal_instruction, number_of_chunks, + traversal_instruction_as_string, write_to_vec, + }, + }, + Node, Op, + }, + tree::RefWalker, + Error::ChunkingError, + Merk, PanicSource, +}; + +// TODO: move types to some other file +// TODO: add documentation +#[derive(Debug)] +pub struct SubtreeChunk { + chunk: Vec, + next_index: Option, + remaining_limit: Option, +} + +impl SubtreeChunk { + pub fn new(chunk: Vec, next_index: Option, remaining_limit: Option) -> Self { + Self { + chunk, + next_index, + remaining_limit, + } + } +} + +#[derive(Debug)] +pub struct MultiChunk { + pub chunk: Vec, + pub next_index: Option, + pub remaining_limit: Option, +} + +impl MultiChunk { + pub fn new( + chunk: Vec, + next_index: Option, + remaining_limit: Option, + ) -> Self { + Self { + chunk, + next_index, + remaining_limit, + } + } +} + +/// A `ChunkProducer` allows the creation of chunk proofs, used for trustlessly +/// replicating entire Merk trees. Chunks can be generated on the fly in a +/// random order, or iterated in order for slightly better performance. +pub struct ChunkProducer<'db, S> { + /// Represents the max height of the Merk tree + height: usize, + /// Represents the index of the next chunk + index: usize, + merk: &'db Merk, +} + +impl<'db, S> ChunkProducer<'db, S> +where + S: StorageContext<'db>, +{ + /// Creates a new `ChunkProducer` for the given `Merk` instance + pub(crate) fn new(merk: &'db Merk) -> Result { + let tree_height = merk + .height() + .ok_or(Error::ChunkingError(ChunkError::EmptyTree( + "cannot create chunk producer for empty Merk", + )))?; + Ok(Self { + height: tree_height as usize, + index: 1, + merk, + }) + } + + /// Gets the chunk with the given index. Errors if the index is out of + /// bounds or the tree is empty - the number of chunks can be checked by + /// calling `producer.len()`. + pub fn chunk(&mut self, index: usize) -> Result, Error> { + // ensure that the chunk index is within bounds + let max_chunk_index = self.len(); + if index < 1 || index > max_chunk_index { + return Err(ChunkingError(ChunkError::OutOfBounds( + "chunk index out of bounds", + ))); + } + + self.index = index + 1; + + let traversal_instructions = generate_traversal_instruction(self.height, index)?; + + let chunk_height = chunk_height(self.height, index).unwrap(); + + self.merk.walk(|maybe_walker| match maybe_walker { + Some(mut walker) => { + walker.traverse_and_build_chunk(&traversal_instructions, chunk_height) + } + None => Err(Error::ChunkingError(ChunkError::EmptyTree( + "cannot create chunk producer for empty Merk", + ))), + }) + } + + // TODO: add documentation + pub fn multi_chunk_with_limit( + &mut self, + index: usize, + limit: Option, + ) -> Result { + // TODO: what happens if the vec is filled? + // we need to have some kind of hardhoc limit value if none is supplied. + // maybe we can just do something with the length to fix this? + let mut chunk = vec![]; + + let mut current_index = Some(index); + let mut current_limit = limit; + + // generate as many subtree chunks as we can + // until we have exhausted all or hit a limit restriction + while current_index != None { + let current_index_traversal_instruction = generate_traversal_instruction( + self.height, + current_index.expect("confirmed is Some"), + )?; + let chunk_id_op = ChunkOp::ChunkId(current_index_traversal_instruction); + + // factor in the ChunkId encoding length in limit calculations + let temp_limit = if let Some(limit) = current_limit { + let chunk_id_op_encoding_len = chunk_id_op.encoding_length().map_err(|e| { + Error::ChunkingError(ChunkError::InternalError("cannot get encoding length")) + })?; + if limit >= chunk_id_op_encoding_len { + Some(limit - chunk_id_op_encoding_len) + } else { + Some(0) + } + } else { + None + }; + + let subtree_multi_chunk_result = self.subtree_multi_chunk_with_limit( + current_index.expect("confirmed is not None"), + temp_limit, + ); + + let limit_too_small_error = matches!( + subtree_multi_chunk_result, + Err(ChunkingError(ChunkError::LimitTooSmall(..))) + ); + + if limit_too_small_error { + if chunk.is_empty() { + // no progress, return limit too small error + return Err(Error::ChunkingError(ChunkError::LimitTooSmall( + "limit too small for initial chunk", + ))); + } else { + // made progress, send accumulated chunk + break; + } + } + + let subtree_multi_chunk = subtree_multi_chunk_result?; + + chunk.push(chunk_id_op); + chunk.push(ChunkOp::Chunk(subtree_multi_chunk.chunk)); + + // update loop parameters + current_index = subtree_multi_chunk.next_index; + current_limit = subtree_multi_chunk.remaining_limit; + } + + Ok(MultiChunk::new(chunk, current_index, current_limit)) + } + + /// Packs as many chunks as it can from a starting chunk index, into a + /// vector. Stops when we have exhausted all chunks or we have reached + /// some limit. + pub fn subtree_multi_chunk_with_limit( + &mut self, + index: usize, + limit: Option, + ) -> Result { + let mut chunk_byte_length = 0; + + let max_chunk_index = number_of_chunks(self.height); + let mut chunk_index = index; + + // we first get the chunk at the given index + let chunk_ops = self.chunk(chunk_index)?; + chunk_byte_length = chunk_ops.encoding_length().map_err(|e| { + Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) + })?; + chunk_index += 1; + + let mut chunk = VecDeque::from(chunk_ops); + + // ensure the limit is not less than first chunk byte length + // if it is we can't proceed and didn't make progress so we return an error + if let Some(limit) = limit { + if chunk_byte_length > limit { + return Err(Error::ChunkingError(ChunkError::LimitTooSmall( + "limit too small for initial chunk", + ))); + } + } + + let mut iteration_index = 0; + while iteration_index < chunk.len() { + // we only perform replacements on Hash nodes + if matches!(chunk[iteration_index], Op::Push(Node::Hash(..))) { + let replacement_chunk = self.chunk(chunk_index)?; + + // calculate the new total + let new_total = replacement_chunk.encoding_length().map_err(|e| { + Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) + })? + chunk_byte_length + - chunk[iteration_index].encoding_length().map_err(|e| { + Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) + })?; + + // verify that this chunk doesn't make use exceed the limit + if let Some(limit) = limit { + if new_total > limit { + let next_index = match chunk_index > max_chunk_index { + true => None, + _ => Some(chunk_index), + }; + + return Ok(SubtreeChunk::new( + chunk.into(), + next_index, + Some(limit - chunk_byte_length), + )); + } + } + + chunk_byte_length = new_total; + chunk_index += 1; + + chunk.remove(iteration_index); + for op in replacement_chunk.into_iter().rev() { + chunk.insert(iteration_index, op); + } + } else { + iteration_index += 1; + } + } + + let remaining_limit = limit.map(|l| l - chunk_byte_length); + let next_index = match chunk_index > max_chunk_index { + true => None, + _ => Some(chunk_index), + }; + + Ok(SubtreeChunk::new(chunk.into(), next_index, remaining_limit)) + } + + /// Returns the total number of chunks for the underlying Merk tree. + pub fn len(&self) -> usize { + number_of_chunks(self.height as usize) + } + + /// Gets the next chunk based on the `ChunkProducer`'s internal index state. + /// This is mostly useful for letting `ChunkIter` yield the chunks in order, + /// optimizing throughput compared to random access. + // TODO: does this really optimize throughput, how can you make the statement + // true? + fn next_chunk(&mut self) -> Option, Error>> { + // for now not better than random access + // TODO: fix + let max_index = number_of_chunks(self.height); + if self.index > max_index { + return None; + } + + let chunk = self.chunk(self.index); + + return Some(chunk); + } + + // TODO: test this logic out + fn get_chunk_encoding_length(chunk: &[Op]) -> usize { + // TODO: deal with error + chunk + .iter() + .fold(0, |sum, op| sum + op.encoding_length().unwrap()) + } +} + +/// Iterate over each chunk, returning `None` after last chunk +impl<'db, S> Iterator for ChunkProducer<'db, S> +where + S: StorageContext<'db>, +{ + type Item = Result, Error>; + + fn next(&mut self) -> Option { + self.next_chunk() + } +} + +impl<'db, S> Merk +where + S: StorageContext<'db>, +{ + /// Creates a `ChunkProducer` which can return chunk proofs for replicating + /// the entire Merk tree. + pub fn chunks(&'db self) -> Result, Error> { + ChunkProducer::new(self) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{ + proofs::{ + chunk::chunk2::{ + tests::{traverse_get_kv_feature_type, traverse_get_node_hash}, + LEFT, RIGHT, + }, + tree::execute, + Tree, + }, + test_utils::{make_batch_seq, TempMerk}, + }; + + #[derive(Default)] + struct NodeCounts { + hash: usize, + kv_hash: usize, + kv: usize, + kv_value_hash: usize, + kv_digest: usize, + kv_ref_value_hash: usize, + kv_value_hash_feature_type: usize, + } + + impl NodeCounts { + fn sum(&self) -> usize { + return self.hash + + self.kv_hash + + self.kv + + self.kv_value_hash + + self.kv_digest + + self.kv_ref_value_hash + + self.kv_value_hash_feature_type; + } + } + + fn count_node_types(tree: Tree) -> NodeCounts { + let mut counts = NodeCounts::default(); + + tree.visit_nodes(&mut |node| { + match node { + Node::Hash(_) => counts.hash += 1, + Node::KVHash(_) => counts.kv_hash += 1, + Node::KV(..) => counts.kv += 1, + Node::KVValueHash(..) => counts.kv_value_hash += 1, + Node::KVDigest(..) => counts.kv_digest += 1, + Node::KVRefValueHash(..) => counts.kv_ref_value_hash += 1, + Node::KVValueHashFeatureType(..) => counts.kv_value_hash_feature_type += 1, + }; + }); + + counts + } + + #[test] + fn test_merk_chunk_len() { + // Tree of height 5 - max of 31 elements, min of 16 elements + // 5 will be broken into 3 layers = [2, 2, 2] + // exit nodes from first layer = 2^2 = 4 + // exit nodes from the second layer = 4 ^ 2^2 = 16 + // total_chunk = 1 + 4 + 16 = 21 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..20); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(5)); + let chunk_producer = ChunkProducer::new(&merk).unwrap(); + assert_eq!(chunk_producer.len(), 21); + + // Tree of height 10 - max of 1023 elements, min of 512 elements + // 4 layers -> [2,2,2,2,2] + // chunk_count_per_layer -> [1, 4, 16, 64, 256] + // total = 341 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..1000); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(10)); + let chunk_producer = ChunkProducer::new(&merk).unwrap(); + assert_eq!(chunk_producer.len(), 341); + } + + #[test] + fn test_chunk_producer_iter() { + // tree with height 4 + // full tree + // 7 + // / \ + // 3 11 + // / \ / \ + // 1 5 9 13 + // / \ / \ / \ / \ + // 0 2 4 6 8 10 12 14 + // going to be broken into [2, 2] + // that's a total of 5 chunks + + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // build iterator from first chunk producer + let mut chunks = merk.chunks().expect("should return producer"); + + // ensure that the chunks gotten from the iterator is the same + // as that from the chunk producer + for i in 1..=5 { + assert_eq!( + chunks.next().unwrap().unwrap(), + chunk_producer.chunk(i).unwrap() + ); + } + + // returns None after max + assert_eq!(chunks.next().is_none(), true); + } + + #[test] + fn test_random_chunk_access() { + // tree with height 4 + // full tree + // 7 + // / \ + // 3 11 + // / \ / \ + // 1 5 9 13 + // / \ / \ / \ / \ + // 0 2 4 6 8 10 12 14 + // going to be broken into [2, 2] + // that's a total of 5 chunks + + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut inner_tree = merk.tree.take().expect("has inner tree"); + merk.tree.set(Some(inner_tree.clone())); + + // TODO: should I be using panic source? + let mut tree_walker = RefWalker::new(&mut inner_tree, PanicSource {}); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + assert_eq!(chunk_producer.len(), 5); + + // assert bounds + assert_eq!(chunk_producer.chunk(0).is_err(), true); + assert_eq!(chunk_producer.chunk(6).is_err(), true); + + // first chunk + // expected: + // 7 + // / \ + // 3 11 + // / \ / \ + // H(1) H(5) H(9) H(13) + let chunk = chunk_producer.chunk(1).expect("should generate chunk"); + assert_eq!(chunk.len(), 13); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, LEFT])), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[LEFT])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT])), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[RIGHT])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT, RIGHT])), + Op::Child, + Op::Child + ] + ); + + // second chunk + // expected: + // 1 + // / \ + // 0 2 + let chunk = chunk_producer.chunk(2).expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT, RIGHT] + )), + Op::Child + ] + ); + + // third chunk + // expected: + // 5 + // / \ + // 4 6 + let chunk = chunk_producer.chunk(3).expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT, RIGHT] + )), + Op::Child + ] + ); + + // third chunk + // expected: + // 9 + // / \ + // 8 10 + let chunk = chunk_producer.chunk(4).expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child + ] + ); + + // third chunk + // expected: + // 13 + // / \ + // 12 14 + let chunk = chunk_producer.chunk(5).expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT, RIGHT] + )), + Op::Child + ] + ); + } + + #[test] + fn test_subtree_chunk_no_limit() { + // tree of height 4 + // 5 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + // generate multi chunk with no limit + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, None) + .expect("should generate chunk with limit"); + + assert_eq!(chunk_result.remaining_limit, None); + assert_eq!(chunk_result.next_index, None); + + let tree = execute(chunk_result.chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + // assert that all nodes are of type kv_value_hash_feature_type + let node_counts = count_node_types(tree); + assert_eq!(node_counts.hash, 0); + assert_eq!(node_counts.kv_hash, 0); + assert_eq!(node_counts.kv, 0); + assert_eq!(node_counts.kv_value_hash, 0); + assert_eq!(node_counts.kv_digest, 0); + assert_eq!(node_counts.kv_ref_value_hash, 0); + assert_eq!(node_counts.kv_value_hash_feature_type, 15); + } + + #[test] + fn test_subtree_chunk_with_limit() { + // tree of height 4 + // 5 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // initial chunk is of size 453, so limit of 10 is too small + // should return an error + let chunk = chunk_producer.subtree_multi_chunk_with_limit(1, Some(10)); + assert!(chunk.is_err()); + + // get just the fist chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(453)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(2)); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 453); + assert_eq!(chunk.len(), 13); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 3); + assert_eq!(node_counts.hash, 4); + assert_eq!(node_counts.sum(), 4 + 3); + + // get up to second chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(737)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(3)); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 737); + assert_eq!(chunk.len(), 17); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 6); + assert_eq!(node_counts.hash, 3); + assert_eq!(node_counts.sum(), 6 + 3); + + // get up to third chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(1021)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(4)); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1021); + assert_eq!(chunk.len(), 21); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 9); + assert_eq!(node_counts.hash, 2); + assert_eq!(node_counts.sum(), 9 + 2); + + // get up to fourth chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(1305)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(5)); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1305); + assert_eq!(chunk.len(), 25); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 12); + assert_eq!(node_counts.hash, 1); + assert_eq!(node_counts.sum(), 12 + 1); + + // get up to fifth chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(1589)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, None); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1589); + assert_eq!(chunk.len(), 29); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 15); + assert_eq!(node_counts.hash, 0); + assert_eq!(node_counts.sum(), 15); + + // limit larger than total chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(usize::MAX)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(18446744073709550026)); + assert_eq!(chunk_result.next_index, None); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1589); + assert_eq!(chunk.len(), 29); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 15); + assert_eq!(node_counts.hash, 0); + assert_eq!(node_counts.sum(), 15); + } + + #[test] + fn test_multi_chunk_with_no_limit_trunk() { + // tree of height 4 + // 5 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // we generate the chunk starting from index 2, this has no hash nodes + // so no multi chunk will be generated + let chunk_result = chunk_producer + .multi_chunk_with_limit(1, None) + .expect("should generate chunk with limit"); + + assert_eq!(chunk_result.remaining_limit, None); + assert_eq!(chunk_result.next_index, None); + + // should only contain 2 items, the starting chunk id and the entire tree + assert_eq!(chunk_result.chunk.len(), 2); + + // assert items + assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![])); + if let ChunkOp::Chunk(chunk) = &chunk_result.chunk[1] { + let tree = execute(chunk.clone().into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + } else { + panic!("expected ChunkOp::Chunk"); + } + } + + #[test] + fn test_multi_chunk_with_no_limit_not_trunk() { + // tree of height 4 + // 5 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // we generate the chunk starting from index 2, this has no hash nodes + // so no multi chunk will be generated + let chunk_result = chunk_producer + .multi_chunk_with_limit(2, None) + .expect("should generate chunk with limit"); + + assert_eq!(chunk_result.remaining_limit, None); + assert_eq!(chunk_result.next_index, None); + + // chunk 2 - 5 will be considered separate subtrees + // each will have an accompanying chunk id, so 8 elements total + assert_eq!(chunk_result.chunk.len(), 8); + + // assert the chunk id's + assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![LEFT, LEFT])); + assert_eq!(chunk_result.chunk[2], ChunkOp::ChunkId(vec![LEFT, RIGHT])); + assert_eq!(chunk_result.chunk[4], ChunkOp::ChunkId(vec![RIGHT, LEFT])); + assert_eq!(chunk_result.chunk[6], ChunkOp::ChunkId(vec![RIGHT, RIGHT])); + + // assert the chunks + assert_eq!( + chunk_result.chunk[1], + ChunkOp::Chunk(chunk_producer.chunk(2).expect("should generate chunk")) + ); + assert_eq!( + chunk_result.chunk[3], + ChunkOp::Chunk(chunk_producer.chunk(3).expect("should generate chunk")) + ); + assert_eq!( + chunk_result.chunk[5], + ChunkOp::Chunk(chunk_producer.chunk(4).expect("should generate chunk")) + ); + assert_eq!( + chunk_result.chunk[7], + ChunkOp::Chunk(chunk_producer.chunk(5).expect("should generate chunk")) + ); + } + + #[test] + fn test_multi_chunk_with_limit() { + // tree of height 4 + // 5 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // ensure that the remaining limit, next index and values given are correct + // if limit is smaller than first chunk, we should get an error + let chunk_result = chunk_producer.multi_chunk_with_limit(1, Some(5)); + assert!(matches!( + chunk_result, + Err(Error::ChunkingError(ChunkError::LimitTooSmall(..))) + )); + + // get chunk 2 + // data size of chunk 2 is exactly 317 + // chunk op encoding for chunk 2 = 321 + // hence limit of 317 will be insufficient + let chunk_result = chunk_producer.multi_chunk_with_limit(2, Some(317)); + assert!(matches!( + chunk_result, + Err(Error::ChunkingError(ChunkError::LimitTooSmall(..))) + )); + + // get chunk 2 and 3 + // chunk 2 chunk op = 331 + // chunk 3 chunk op = 321 + let chunk_result = chunk_producer + .multi_chunk_with_limit(2, Some(321 + 321 + 5)) + .expect("should generate chunk"); + assert_eq!(chunk_result.next_index, Some(4)); + assert_eq!(chunk_result.remaining_limit, Some(5)); + assert_eq!(chunk_result.chunk.len(), 4); + assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![LEFT, LEFT])); + assert_eq!(chunk_result.chunk[2], ChunkOp::ChunkId(vec![LEFT, RIGHT])); + } +} diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index 37276b65..0eb3f8cd 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -34,7 +34,9 @@ pub(crate) mod defaults; pub mod options; +mod chunks2; pub mod restore; +mod restore2; use std::{ cell::Cell, @@ -607,6 +609,23 @@ where }) } + /// Returns the height of the Merk tree + pub fn height(&self) -> Option { + self.use_tree(|tree| match tree { + None => None, + Some(tree) => Some(tree.height()), + }) + } + + // TODO: remove this + // /// Returns a clone of the Tree instance in Merk + // pub fn get_root_tree(&self) -> Option { + // self.use_tree(|tree| match tree { + // None => None, + // Some(tree) => Some(tree.clone()), + // }) + // } + /// Returns the root non-prefixed key of the tree. If the tree is empty, /// None. pub fn root_key(&self) -> Option> { @@ -1563,6 +1582,41 @@ mod test { ); } + #[test] + fn tree_height() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..1); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(1)); + + // height 2 + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..2); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(2)); + + // height 5 + // 2^5 - 1 = 31 (max number of elements in tree of height 5) + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..31); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(5)); + + // should still be height 5 for 29 elements + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..29); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(5)); + } + #[test] fn insert_uncached() { let batch_size = 20; diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index 23cef703..e1a1afd4 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -106,7 +106,7 @@ impl<'db, S: StorageContext<'db>> Restorer { /// to 0). pub fn finalize(mut self) -> Result, Error> { if self.remaining_chunks().unwrap_or(0) != 0 { - return Err(Error::ChunkRestoringError( + return Err(Error::OldChunkRestoringError( "Called finalize before all chunks were processed".to_string(), )); } @@ -188,7 +188,7 @@ impl<'db, S: StorageContext<'db>> Restorer { }; if root_hash != self.expected_root_hash { - return Err(Error::ChunkRestoringError(format!( + return Err(Error::OldChunkRestoringError(format!( "Proof did not match expected hash\n\tExpected: {:?}\n\tActual: {:?}", self.expected_root_hash, trunk.hash() @@ -419,7 +419,7 @@ mod tests { .unwrap(); } - let chunks = original.chunks().unwrap(); + let chunks = original.chunks_old().unwrap(); let storage = TempStorage::default(); let _tx2 = storage.start_transaction(); diff --git a/merk/src/merk/restore2.rs b/merk/src/merk/restore2.rs new file mode 100644 index 00000000..084f3759 --- /dev/null +++ b/merk/src/merk/restore2.rs @@ -0,0 +1,195 @@ +// TODO: add license + +//! Provides `Restorer`, which can create a replica of a Merk instance by +//! receiving chunk proofs. + +use std::collections::BTreeMap; + +use grovedb_storage::{Batch, StorageContext}; + +use crate::{ + merk::MerkSource, + proofs::{ + chunk::{ + chunk_op::ChunkOp, + error::ChunkError, + util::{traversal_instruction_as_string, write_to_vec}, + }, + tree::{execute, Child, Tree as ProofTree}, + Node, + }, + tree::{RefWalker, Tree}, + CryptoHash, Error, + Error::{CostsError, EdError, StorageError}, + Link, Merk, + TreeFeatureType::BasicMerk, +}; + +// TODO: add documentation +pub struct Restorer { + merk: Merk, + chunk_id_to_root_hash: BTreeMap, +} + +impl<'db, S: StorageContext<'db>> Restorer { + // TODO: add documenation + pub fn new(merk: Merk, expected_root_hash: CryptoHash) -> Self { + let mut chunk_id_to_root_hash = BTreeMap::new(); + chunk_id_to_root_hash.insert(traversal_instruction_as_string(vec![]), expected_root_hash); + + Self { + merk, + chunk_id_to_root_hash, + } + } + + // TODO: add documentation + // what does the restorer process? + // it should be able to process single chunks, subtree chunks and multi chunks + // right? or just one of them? + // I think it should process just multi chunk at least for now + pub fn process_multi_chunk( + &mut self, + chunk: impl IntoIterator, + ) -> Result<(), Error> { + // chunk id, chunk + // we use the chunk id to know what to verify against + let mut chunks = chunk.into_iter(); + + // TODO: clean this up, make external function that peeks and asserts + let chunk_id_string = if let Some(ChunkOp::ChunkId(chunk_id)) = chunks.next() { + traversal_instruction_as_string(chunk_id) + } else { + return Err(Error::ChunkRestoringError(ChunkError::ExpectedChunkId)); + }; + + // TODO: deal with unwrap + let expected_root_hash = self.chunk_id_to_root_hash.get(&chunk_id_string).unwrap(); + dbg!(expected_root_hash); + + if let Some(ChunkOp::Chunk(chunk)) = chunks.next() { + // todo: deal with error + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .unwrap(); + debug_assert!(tree.hash().unwrap() == *expected_root_hash); + dbg!("yayy"); + self.write_chunk(tree); + } else { + return Err(Error::ChunkRestoringError(ChunkError::ExpectedChunk)); + } + + Ok(()) + } + + /// Writes the data contained in `tree` (extracted from a verified chunk + /// proof) to the RocksDB. + fn write_chunk(&mut self, tree: ProofTree) -> Result<(), Error> { + let mut batch = self.merk.storage.new_batch(); + + tree.visit_refs(&mut |proof_node| { + if let Some((mut node, key)) = match &proof_node.node { + Node::KV(key, value) => Some(( + Tree::new(key.clone(), value.clone(), None, BasicMerk).unwrap(), + key, + )), + Node::KVValueHash(key, value, value_hash) => Some(( + Tree::new_with_value_hash(key.clone(), value.clone(), *value_hash, BasicMerk) + .unwrap(), + key, + )), + Node::KVValueHashFeatureType(key, value, value_hash, feature_type) => Some(( + Tree::new_with_value_hash( + key.clone(), + value.clone(), + *value_hash, + *feature_type, + ) + .unwrap(), + key, + )), + _ => None, + } { + // TODO: encode tree node without cloning key/value + // *node.slot_mut(true) = proof_node.left.as_ref().map(Child::as_link); + // *node.slot_mut(false) = proof_node.right.as_ref().map(Child::as_link); + + let bytes = node.encode(); + batch.put(key, &bytes, None, None).map_err(CostsError) + } else { + Ok(()) + } + })?; + + self.merk + .storage + .commit_batch(batch) + .unwrap() + .map_err(StorageError) + } +} + +#[cfg(test)] +mod tests { + use grovedb_path::SubtreePath; + use grovedb_storage::{rocksdb_storage::test_utils::TempStorage, Storage}; + + use super::*; + use crate::{merk::chunks2::ChunkProducer, test_utils::make_batch_seq, Merk}; + + #[test] + fn restoration_test() { + // Create source merk and populate + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut original = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + let batch = make_batch_seq(0..15); + original + .apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(original.height(), Some(4)); + + // Create to be restored merk + let storage = TempStorage::new(); + let tx2 = storage.start_transaction(); + let restored_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx2) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + assert_eq!(restored_merk.height(), None); + + // assert initial conditions + assert_ne!( + original.root_hash().unwrap(), + restored_merk.root_hash().unwrap() + ); + + // Perform Restoration + let mut chunk_producer = + ChunkProducer::new(&original).expect("should create chunk producer"); + + let mut restorer = Restorer::new(restored_merk, original.root_hash().unwrap()); + + let chunk = chunk_producer + .multi_chunk_with_limit(1, None) + .expect("should generate chunk"); + + assert_eq!(chunk.next_index, None); + assert_eq!(chunk.remaining_limit, None); + assert_eq!(chunk.chunk.len(), 2); + + restorer.process_multi_chunk(chunk.chunk).unwrap(); + } +} diff --git a/merk/src/proofs/chunk.rs b/merk/src/proofs/chunk.rs index 48afe8f3..d5ef376c 100644 --- a/merk/src/proofs/chunk.rs +++ b/merk/src/proofs/chunk.rs @@ -51,6 +51,18 @@ use crate::{ TreeFeatureType::BasicMerk, }; +mod binary_range; +#[cfg(feature = "full")] +// TODO: remove from here +pub mod chunk2; +#[cfg(feature = "full")] +// TODO: remove from here +pub mod util; +// TODO: remove from here +pub mod error; +// TODO: remove from here +pub mod chunk_op; + /// The minimum number of layers the trunk will be guaranteed to have before /// splitting into multiple chunks. If the tree's height is less than double /// this value, the trunk should be verified as a leaf chunk. @@ -264,14 +276,14 @@ pub(crate) fn verify_leaf>>( ) -> CostResult { execute(ops, false, |node| match node { Node::KVValueHash(..) | Node::KV(..) | Node::KVValueHashFeatureType(..) => Ok(()), - _ => Err(Error::ChunkRestoringError( + _ => Err(Error::OldChunkRestoringError( "Leaf chunks must contain full subtree".to_string(), )), }) .flat_map_ok(|tree| { tree.hash().map(|hash| { if hash != expected_hash { - Error::ChunkRestoringError(format!( + Error::OldChunkRestoringError(format!( "Leaf chunk proof did not match expected hash\n\tExpected: {:?}\n\tActual: \ {:?}", expected_hash, @@ -297,7 +309,7 @@ pub(crate) fn verify_trunk>>( Ok(match tree.child(true) { Some(child) => { if let Node::Hash(_) = child.tree.node { - return Err(Error::ChunkRestoringError( + return Err(Error::OldChunkRestoringError( "Expected height proof to only contain KV and KVHash nodes".to_string(), )); } @@ -323,7 +335,7 @@ pub(crate) fn verify_trunk>>( match tree.node { Node::KVValueHash(..) | Node::KV(..) | Node::KVValueHashFeatureType(..) => {} _ => { - return Err(Error::ChunkRestoringError( + return Err(Error::OldChunkRestoringError( "Expected trunk inner nodes to contain keys and values".to_string(), )) } @@ -333,14 +345,14 @@ pub(crate) fn verify_trunk>>( } else if !leftmost { match tree.node { Node::Hash(_) => Ok(()), - _ => Err(Error::ChunkRestoringError( + _ => Err(Error::OldChunkRestoringError( "Expected trunk leaves to contain Hash nodes".to_string(), )), } } else { match &tree.node { Node::KVHash(_) => Ok(()), - _ => Err(Error::ChunkRestoringError( + _ => Err(Error::OldChunkRestoringError( "Expected leftmost trunk leaf to contain KVHash node".to_string(), )), } @@ -363,7 +375,7 @@ pub(crate) fn verify_trunk>>( if trunk_height < MIN_TRUNK_HEIGHT { if !kv_only { - return Err(Error::ChunkRestoringError( + return Err(Error::OldChunkRestoringError( "Leaf chunks must contain full subtree".to_string(), )) .wrap_with_cost(cost); diff --git a/merk/src/proofs/chunk/binary_range.rs b/merk/src/proofs/chunk/binary_range.rs new file mode 100644 index 00000000..350c9718 --- /dev/null +++ b/merk/src/proofs/chunk/binary_range.rs @@ -0,0 +1,211 @@ +const LEFT: bool = true; +const RIGHT: bool = false; + +/// Utility type for range bisection and advancement +#[derive(Debug)] +pub(crate) struct BinaryRange { + start: usize, + end: usize, +} + +impl BinaryRange { + /// Returns a new BinaryRange and ensures that start < end + /// and min start value is 1 + pub fn new(start: usize, end: usize) -> Result { + // start should be less than or equal to end + if start > end { + return Err(String::from("start value cannot be greater than end value")); + } + + // the minimum value for start should be 1 + // that way the length of the maximum length + // of the range is usize::MAX and not + // usize::MAX + 1 + if start < 1 { + return Err(String::from( + "minimum start value should be 1 to avoid len overflow", + )); + } + + return Ok(Self { start, end }); + } + + /// Returns the len of the current range + pub fn len(&self) -> usize { + self.end - self.start + 1 + } + + /// Returns true when the len of the range is odd + pub fn odd(&self) -> bool { + (self.len() % 2) != 0 + } + + /// Determines if a value belongs to the left half or right half of a range + /// returns true for left and false for right + /// returns None if value is outside the range or range len is odd + pub fn which_half(&self, value: usize) -> Option { + // return None if value is not in the range + if value < self.start || value > self.end { + return None; + } + + // can't divide the range into equal halves + // when odd, so return None + if self.odd() { + return None; + } + + let half_size = self.len() / 2; + let second_half_start = self.start + half_size; + + if value >= second_half_start { + return Some(RIGHT); + } + + Some(LEFT) + } + + /// Returns a new range that only contains elements on the specified half + /// returns an error if range is not odd + pub fn get_half(&self, left: bool) -> Result { + if self.odd() { + return Err(String::from("cannot break odd range in half")); + } + + let half_size = self.len() / 2; + let second_half_start = self.start + half_size; + + return Ok(if left { + Self { + start: self.start, + end: second_half_start - 1, + } + } else { + Self { + start: second_half_start, + end: self.end, + } + }); + } + + /// Returns a new range that increments the start value + /// also return the previous start value + /// returns an error if the operation will cause start to be larger than end + pub fn advance_range_start(&self) -> Result<(Self, usize), String> { + // check if operation will cause start > end + if self.start == self.end { + return Err(String::from( + "can't advance start when start is equal to end", + )); + } + + Ok(( + Self { + start: self.start + 1, + end: self.end, + }, + self.start, + )) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn cannot_create_invalid_range() { + let invalid_range = BinaryRange::new(5, 3); + assert_eq!(invalid_range.is_err(), true); + } + + #[test] + fn can_get_range_len() { + let range = BinaryRange::new(2, 5).expect("should create range"); + assert_eq!(range.len(), 4); + assert_eq!(range.odd(), false); + + let range = BinaryRange::new(2, 2).expect("should create range"); + assert_eq!(range.len(), 1); + assert_eq!(range.odd(), true); + } + + #[test] + fn can_determine_correct_half() { + let range = BinaryRange::new(3, 7).expect("should create range"); + assert_eq!(range.len(), 5); + assert_eq!(range.odd(), true); + + // cannot determine half for value outside a range + assert_eq!(range.which_half(1).is_none(), true); + assert_eq!(range.which_half(7).is_none(), true); + + // cannot determine half when range is odd + assert_eq!(range.which_half(3).is_none(), true); + + let range = BinaryRange::new(3, 6).expect("should create range"); + assert_eq!(range.len(), 4); + assert_eq!(range.odd(), false); + + assert_eq!(range.which_half(3), Some(LEFT)); + assert_eq!(range.which_half(4), Some(LEFT)); + assert_eq!(range.which_half(5), Some(RIGHT)); + assert_eq!(range.which_half(6), Some(RIGHT)); + } + + #[test] + fn can_advance_start_range() { + let range = BinaryRange::new(2, 5).expect("should create range"); + assert_eq!(range.len(), 4); + assert_eq!(range.start, 2); + + // advance the range + let (range, prev_start) = range.advance_range_start().expect("should advance range"); + assert_eq!(prev_start, 2); + assert_eq!(range.len(), 3); + assert_eq!(range.start, 3); + + // advance range + let (range, prev_start) = range.advance_range_start().expect("should advance range"); + assert_eq!(prev_start, 3); + assert_eq!(range.len(), 2); + assert_eq!(range.start, 4); + + // advance range + let (range, prev_start) = range.advance_range_start().expect("should advance range"); + assert_eq!(prev_start, 4); + assert_eq!(range.len(), 1); + assert_eq!(range.start, 5); + + // should not be allowed to advance the range anymore + let advance_result = range.advance_range_start(); + assert_eq!(advance_result.is_err(), true); + } + + #[test] + fn can_break_range_into_halves() { + let range = BinaryRange::new(2, 10).expect("should create range"); + assert_eq!(range.len(), 9); + assert_eq!(range.odd(), true); + assert_eq!(range.get_half(LEFT).is_err(), true); + + let range = BinaryRange::new(2, 11).expect("should create range"); + assert_eq!(range.len(), 10); + assert_eq!(range.odd(), false); + + let left_range = range.get_half(LEFT).expect("should get sub range"); + assert_eq!(left_range.start, 2); + assert_eq!(left_range.end, 6); + + let right_range = range.get_half(RIGHT).expect("should get sub range"); + assert_eq!(right_range.start, 7); + assert_eq!(right_range.end, 11); + + // right_range is false, advance to make even + let (right_range, prev) = right_range.advance_range_start().expect("should advance"); + let right_left_range = right_range.get_half(LEFT).expect("should get sub range"); + assert_eq!(right_left_range.len(), 2); + assert_eq!(right_left_range.start, 8); + assert_eq!(right_left_range.end, 9); + } +} diff --git a/merk/src/proofs/chunk/chunk2.rs b/merk/src/proofs/chunk/chunk2.rs new file mode 100644 index 00000000..a6072871 --- /dev/null +++ b/merk/src/proofs/chunk/chunk2.rs @@ -0,0 +1,620 @@ +use grovedb_costs::{cost_return_on_error, CostResult, CostsExt, OperationCost}; + +// TODO: add copyright comment +use crate::proofs::{Node, Op, Tree}; +use crate::{ + proofs::{chunk::error::ChunkError, tree::execute}, + tree::{Fetch, RefWalker}, + CryptoHash, Error, +}; + +pub const LEFT: bool = true; +pub const RIGHT: bool = false; + +impl<'a, S> RefWalker<'a, S> +where + S: Fetch + Sized + Clone, +{ + /// Returns a chunk of a given depth from a RefWalker + pub fn create_chunk(&mut self, depth: usize) -> Result, Error> { + // build the proof vector + let mut proof = vec![]; + + self.create_chunk_internal(&mut proof, depth)?; + + Ok(proof) + } + + fn create_chunk_internal( + &mut self, + proof: &mut Vec, + remaining_depth: usize, + ) -> Result<(), Error> { + // at some point we will reach the depth + // here we need to put the node hash + if remaining_depth == 0 { + proof.push(Op::Push(self.to_hash_node().unwrap())); + return Ok(()); + } + + // traverse left + let has_left_child = self.tree().link(true).is_some(); + if has_left_child { + let mut left = self.walk(true).unwrap()?.expect("confirmed is some"); + left.create_chunk_internal(proof, remaining_depth - 1)?; + } + + // add current node's data + proof.push(Op::Push(self.to_kv_value_hash_feature_type_node())); + + if has_left_child { + proof.push(Op::Parent); + } + + // traverse right + if let Some(mut right) = self.walk(false).unwrap()? { + right.create_chunk_internal(proof, remaining_depth - 1)?; + + proof.push(Op::Child); + } + + Ok(()) + } + + /// Returns a chunk of a given depth after applying some traversal + /// instruction to the RefWalker + pub fn traverse_and_build_chunk( + &mut self, + instructions: &[bool], + depth: usize, + ) -> Result, Error> { + // base case + if instructions.len() == 0 { + // we are at the desired node + return self.create_chunk(depth); + } + + // link must exist + let has_link = self.tree().link(instructions[0]).is_some(); + if !has_link { + return Err(Error::ChunkingError(ChunkError::BadTraversalInstruction( + "no node found at given traversal instruction", + ))); + } + + // grab child + let mut child = self + .walk(instructions[0]) + .unwrap()? + .expect("confirmed link exists so cannot be none"); + + // recurse on child + child.traverse_and_build_chunk(&instructions[1..], depth) + } + + /// Returns the smallest amount of tree ops, that can convince + /// a verifier of the tree height + /// the generated subtree is of this form + /// kv_hash + /// / \ + /// kv_hash node_hash + /// / \ + /// kv_hash node_hash + /// . + /// . + /// . + pub fn generate_height_proof(&mut self, proof: &mut Vec) -> CostResult<(), Error> { + // TODO: look into making height proofs more efficient + // they will always be used in the context of some + // existing chunk, we don't want to repeat nodes unnecessarily + let mut cost = OperationCost::default(); + + let maybe_left = cost_return_on_error!(&mut cost, self.walk(LEFT)); + let has_left_child = maybe_left.is_some(); + + // recurse to leftmost element + if let Some(mut left) = maybe_left { + cost_return_on_error!(&mut cost, left.generate_height_proof(proof)) + } + + proof.push(Op::Push(self.to_kvhash_node())); + + if has_left_child { + proof.push(Op::Parent); + } + + if let Some(right) = self.tree().link(RIGHT) { + proof.push(Op::Push(Node::Hash(*right.hash()))); + proof.push(Op::Child); + } + + Ok(()).wrap_with_cost(cost) + } +} + +// TODO: add documentation +pub fn verify_height_proof(proof: Vec, expected_root_hash: CryptoHash) -> Result { + // todo: remove unwrap + let height_proof_tree = execute(proof.into_iter().map(Ok), false, |_| Ok(())).unwrap()?; + + // todo: deal with cost + // todo: deal with old chunk restoring error + if height_proof_tree.hash().unwrap() != expected_root_hash { + return Err(Error::OldChunkRestoringError( + "invalid height proof: root hash mismatch".to_string(), + )); + } + + verify_height_tree(&height_proof_tree) +} + +// TODO: add documentation +pub fn verify_height_tree(height_proof_tree: &Tree) -> Result { + return Ok(match height_proof_tree.child(LEFT) { + Some(child) => { + if !matches!(child.tree.node, Node::KVHash(..)) { + // todo deal with old chunk restoring error + return Err(Error::OldChunkRestoringError( + "Expected left nodes in height proofs to be kvhash nodes".to_string(), + )); + } + verify_height_tree(&child.tree)? + 1 + } + None => 1, + }); +} + +#[cfg(test)] +pub mod tests { + use ed::Encode; + + use crate::{ + proofs::{ + chunk::chunk2::{verify_height_proof, LEFT, RIGHT}, + tree::execute, + Node, Op, + Op::Parent, + }, + test_utils::{make_tree_seq, make_tree_seq_with_start_key}, + tree::{RefWalker, Tree}, + CryptoHash, PanicSource, TreeFeatureType, + }; + + fn build_tree_10_nodes() -> Tree { + // 3 + // / \ + // 1 7 + // / \ / \ + // 0 2 5 8 + // / \ \ + // 4 6 9 + make_tree_seq_with_start_key(10, [0; 8].to_vec()) + } + + /// Traverses a tree to a certain node and returns the node hash of that + /// node + pub fn traverse_get_node_hash( + mut walker: &mut RefWalker, + traverse_instructions: &[bool], + ) -> Node { + return traverse_and_apply(walker, traverse_instructions, |walker| { + walker.to_hash_node().unwrap() + }); + } + + /// Traverses a tree to a certain node and returns the kv_feature_type of + /// that node + pub fn traverse_get_kv_feature_type( + mut walker: &mut RefWalker, + traverse_instructions: &[bool], + ) -> Node { + return traverse_and_apply(walker, traverse_instructions, |walker| { + walker.to_kv_value_hash_feature_type_node() + }); + } + /// Traverses a tree to a certain node and returns the kv_hash of + /// that node + pub fn traverse_get_kv_hash( + mut walker: &mut RefWalker, + traverse_instructions: &[bool], + ) -> Node { + return traverse_and_apply(walker, traverse_instructions, |walker| { + walker.to_kvhash_node() + }); + } + + /// Traverses a tree to a certain node and returns the result of applying + /// some arbitrary function + pub fn traverse_and_apply( + mut walker: &mut RefWalker, + traverse_instructions: &[bool], + apply_fn: T, + ) -> Node + where + T: Fn(&mut RefWalker) -> Node, + { + if traverse_instructions.is_empty() { + return apply_fn(walker); + } + + let mut child = walker + .walk(traverse_instructions[0]) + .unwrap() + .unwrap() + .unwrap(); + return traverse_and_apply(&mut child, &traverse_instructions[1..], apply_fn); + } + + #[test] + fn build_chunk_from_root_depth_0() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // should return the node hash of the root node + let chunk = tree_walker.create_chunk(0).expect("should build chunk"); + assert_eq!(chunk.len(), 1); + assert_eq!( + chunk[0], + Op::Push(traverse_get_node_hash(&mut tree_walker, &[])) + ); + + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(computed_tree.hash().unwrap(), tree.hash().unwrap()); + } + + #[test] + fn build_chunk_from_root_depth_1() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // build chunk for depth 1 + // expected: + // 3 + // / \ + // Hash(1) Hash(7) + let chunk = tree_walker.create_chunk(1).expect("should build chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT])), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT])), + Op::Child + ] + ); + + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(computed_tree.hash().unwrap(), tree.hash().unwrap()); + } + + #[test] + fn build_chunk_from_root_depth_3() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // build chunk for depth 3 + // expected: + // 3 + // / \ + // 1 7 + // / \ / \ + // 0 2 5 8 + // / \ \ + // H(4) H(6) H(9) + let chunk = tree_walker.create_chunk(3).expect("should build chunk"); + assert_eq!(chunk.len(), 19); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[LEFT])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT] + )), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[RIGHT])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT] + )), + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, RIGHT, RIGHT] + )), + Op::Child, + Op::Child, + Op::Child + ] + ); + + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(computed_tree.hash().unwrap(), tree.hash().unwrap()); + } + + #[test] + fn build_chunk_from_root_depth_max_depth() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // build chunk for entire tree (depth 4) + // 3 + // / \ + // 1 7 + // / \ / \ + // 0 2 5 8 + // / \ \ + // 4 6 9 + let chunk = tree_walker.create_chunk(4).expect("should build chunk"); + assert_eq!(chunk.len(), 19); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[LEFT])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT] + )), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[RIGHT])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT, RIGHT] + )), + Op::Child, + Op::Child, + Op::Child + ] + ); + + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(computed_tree.hash().unwrap(), tree.hash().unwrap()); + } + + #[test] + fn chunk_greater_than_max_should_equal_max_depth() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // build chunk with depth greater than tree + // we should get the same result as building with the exact depth + let large_depth_chunk = tree_walker.create_chunk(100).expect("should build chunk"); + let exact_depth_chunk = tree_walker.create_chunk(4).expect("should build chunk"); + assert_eq!(large_depth_chunk, exact_depth_chunk); + + let tree_a = execute(large_depth_chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + let tree_b = execute(exact_depth_chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree_a.hash().unwrap(), tree_b.hash().unwrap()); + } + + #[test] + fn build_chunk_after_traversal_depth_2() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // traverse to the right first then build chunk + // expected + // 7 + // / \ + // 5 8 + // / \ \ + // H(4) H(6) H(9) + + // right traversal + let chunk = tree_walker + .traverse_and_build_chunk(&[RIGHT], 2) + .expect("should build chunk"); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[RIGHT])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT] + )), + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, RIGHT, RIGHT] + )), + Op::Child, + Op::Child, + ] + ); + + // the hash of the tree computed from the chunk + // should be the same as the node_hash of the element + // on the right + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!( + Node::Hash(computed_tree.hash().unwrap()), + traverse_get_node_hash(&mut tree_walker, &[RIGHT]) + ); + } + + #[test] + fn build_chunk_after_traversal_depth_1() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // traverse with [right, left] and then build chunk of depth 1 + // expected + // 5 + // / \ + // H(4) H(6) + + // instruction traversal + let chunk = tree_walker + .traverse_and_build_chunk(&[RIGHT, LEFT], 1) + .expect("should build chunk"); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child, + ] + ); + + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!( + Node::Hash(computed_tree.hash().unwrap()), + traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT]) + ); + } + + #[test] + fn test_chunk_encoding() { + let chunk = vec![ + Op::Push(Node::Hash([0; 32])), + Op::Push(Node::KVValueHashFeatureType( + vec![1], + vec![2], + [0; 32], + TreeFeatureType::BasicMerk, + )), + ]; + let encoded_chunk = chunk.encode().expect("should encode"); + assert_eq!(encoded_chunk.len(), 33 + 39); + assert_eq!( + encoded_chunk.len(), + chunk.encoding_length().expect("should get encoding length") + ); + } + + #[test] + fn test_height_proof_generation() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + let mut height_proof = vec![]; + tree_walker + .generate_height_proof(&mut height_proof) + .unwrap() + .expect("should generate height proof"); + + assert_eq!(height_proof.len(), 9); + assert_eq!( + height_proof, + vec![ + Op::Push(traverse_get_kv_hash(&mut tree_walker, &[LEFT, LEFT])), + Op::Push(traverse_get_kv_hash(&mut tree_walker, &[LEFT])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])), + Op::Child, + Op::Push(traverse_get_kv_hash(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT])), + Op::Child, + ] + ); + } + + #[test] + fn test_height_proof_verification() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + let mut height_proof = vec![]; + tree_walker + .generate_height_proof(&mut height_proof) + .unwrap() + .expect("should generate height proof"); + + let verified_height = verify_height_proof(height_proof, tree.hash().unwrap()) + .expect("should verify height proof"); + + // doesn't represent the max height of the tree + assert_eq!(verified_height, 3); + } +} diff --git a/merk/src/proofs/chunk/chunk_op.rs b/merk/src/proofs/chunk/chunk_op.rs new file mode 100644 index 00000000..29687932 --- /dev/null +++ b/merk/src/proofs/chunk/chunk_op.rs @@ -0,0 +1,141 @@ +use std::io::{Read, Write}; + +use ed::{Decode, Encode}; +use integer_encoding::{VarInt, VarIntReader}; + +use crate::proofs::{chunk::chunk_op::ChunkOp::Chunk, Op}; + +/// Represents the chunk generated from a given starting chunk id +#[derive(PartialEq, Debug)] +pub enum ChunkOp { + ChunkId(Vec), + Chunk(Vec), +} + +impl Encode for ChunkOp { + fn encode_into(&self, dest: &mut W) -> ed::Result<()> { + match self { + Self::ChunkId(instruction) => { + // write the marker then the len + dest.write_all(&[0_u8]); + dest.write_all(instruction.len().encode_var_vec().as_slice())?; + let instruction_as_binary: Vec = instruction + .iter() + .map(|v| if *v { 1_u8 } else { 0_u8 }) + .collect(); + dest.write_all(&instruction_as_binary)?; + } + Self::Chunk(chunk) => { + dest.write_all(&[1_u8]); + // chunk len represents the number of ops not the total encoding len of ops + dest.write_all(chunk.len().encode_var_vec().as_slice())?; + for op in chunk { + dest.write_all(&op.encode()?)?; + } + } + } + + Ok(()) + } + + fn encoding_length(&self) -> ed::Result { + Ok(match self { + Self::ChunkId(instruction) => { + 1 + instruction.len().encode_var_vec().len() + instruction.len() + } + Self::Chunk(chunk) => { + 1 + chunk.len().encode_var_vec().len() + chunk.encoding_length()? + } + }) + } +} + +impl Decode for ChunkOp { + fn decode(input: R) -> ed::Result { + let mut chunk_op = ChunkOp::ChunkId(vec![]); + Self::decode_into(&mut chunk_op, input)?; + Ok(chunk_op) + } + + fn decode_into(&mut self, mut input: R) -> ed::Result<()> { + let mut marker = [0_u8; 1]; + input.read_exact(&mut marker)?; + + match marker[0] { + 0 => { + let length = input.read_varint()?; + let mut instruction_as_binary = vec![0_u8; length]; + input.read_exact(&mut instruction_as_binary)?; + + let instruction: Vec = instruction_as_binary + .into_iter() + .map(|v| v == 1_u8) + .collect(); + + *self = ChunkOp::ChunkId(instruction); + } + 1 => { + let ops_length = input.read_varint()?; + let mut chunk = Vec::with_capacity(ops_length); + + for _ in 0..ops_length { + let op = Decode::decode(&mut input)?; + chunk.push(op); + } + + *self = ChunkOp::Chunk(chunk); + } + _ => return Err(ed::Error::UnexpectedByte(marker[0])), + } + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use ed::{Decode, Encode}; + + use crate::proofs::{ + chunk::{ + chunk2::{LEFT, RIGHT}, + chunk_op::ChunkOp, + }, + Node, Op, + }; + + #[test] + fn test_chunk_op_encoding() { + let chunk_op = ChunkOp::ChunkId(vec![LEFT, RIGHT]); + let encoded_chunk_op = chunk_op.encode().unwrap(); + assert_eq!(encoded_chunk_op, vec![0, 2, 1, 0]); + assert_eq!(encoded_chunk_op.len(), chunk_op.encoding_length().unwrap()); + + let chunk_op = ChunkOp::Chunk(vec![Op::Push(Node::Hash([0; 32])), Op::Child]); + let encoded_chunk_op = chunk_op.encode().unwrap(); + let mut expected_encoding = vec![1, 2]; + expected_encoding.extend(Op::Push(Node::Hash([0; 32])).encode().unwrap()); + expected_encoding.extend(Op::Child.encode().unwrap()); + assert_eq!(encoded_chunk_op, expected_encoding); + assert_eq!(encoded_chunk_op.len(), chunk_op.encoding_length().unwrap()); + } + + #[test] + fn test_chunk_op_decoding() { + let encoded_chunk_op = vec![0, 3, 1, 0, 1]; + let decoded_chunk_op = ChunkOp::decode(encoded_chunk_op.as_slice()).unwrap(); + assert_eq!(decoded_chunk_op, ChunkOp::ChunkId(vec![LEFT, RIGHT, LEFT])); + + let mut encoded_chunk_op = vec![1, 2]; + encoded_chunk_op.extend(Op::Push(Node::Hash([1; 32])).encode().unwrap()); + encoded_chunk_op.extend(Op::Push(Node::KV(vec![1], vec![2])).encode().unwrap()); + let decoded_chunk_op = ChunkOp::decode(encoded_chunk_op.as_slice()).unwrap(); + assert_eq!( + decoded_chunk_op, + ChunkOp::Chunk(vec![ + Op::Push(Node::Hash([1; 32])), + Op::Push(Node::KV(vec![1], vec![2])) + ]) + ); + } +} diff --git a/merk/src/proofs/chunk/error.rs b/merk/src/proofs/chunk/error.rs new file mode 100644 index 00000000..0c926203 --- /dev/null +++ b/merk/src/proofs/chunk/error.rs @@ -0,0 +1,32 @@ +#[derive(Debug, thiserror::Error)] +/// Chunk related errors +pub enum ChunkError { + /// Limit too small for first chunk, cannot make progress + #[error("overflow error {0}")] + LimitTooSmall(&'static str), + + /// Chunk index out of bounds + #[error("chunk index out of bounds: {0}")] + OutOfBounds(&'static str), + + /// Empty tree contains no chunks + #[error("chunk from empty tree: {0}")] + EmptyTree(&'static str), + + /// Invalid traversal instruction (points to no element) + #[error("traversal instruction invalid {0}")] + BadTraversalInstruction(&'static str), + + /// Expected ChunkId when parsing chunk ops + #[error("expected chunk id when parsing chunk op")] + ExpectedChunkId, + + /// Expected Chunk when parsing chunk ops + #[error("expected chunk when parsing chunk op")] + ExpectedChunk, + + /// Internal error, this should never surface + /// if it does, it means wrong assumption in code + #[error("internal error {0}")] + InternalError(&'static str), +} diff --git a/merk/src/proofs/chunk/util.rs b/merk/src/proofs/chunk/util.rs new file mode 100644 index 00000000..1094e50f --- /dev/null +++ b/merk/src/proofs/chunk/util.rs @@ -0,0 +1,432 @@ +// TODO: add MIT License +// TODO: add module description + +use std::io::Write; + +// TODO: figure out better nomenclature +use crate::{proofs::chunk::binary_range::BinaryRange, Error}; +use crate::{proofs::chunk::error::ChunkError, Error::InternalError}; + +// TODO: add documentation +fn chunk_height_per_layer(height: usize) -> Vec { + // every chunk has a fixed height of 2 + // it is possible for a chunk to not reach full capacity + let mut two_count = height / 2; + if height % 2 != 0 { + two_count += 1; + } + + return vec![2; two_count]; +} + +/// Represents the height as a linear combination of 3 amd 2 +/// of the form 3x + 2y +/// this breaks the tree into layers of height 3 or 2 +/// the minimum chunk height is 2, so if tree height is less than 2 +/// we just return a single layer of height 2 +fn chunk_height_per_layer_lin_comb(height: usize) -> Vec { + let mut two_count = 0; + let mut three_count = height / 3; + + // minimum chunk height is 2, if tree height is less than 2 + // return a single layer with chunk height 2 + if height < 2 { + two_count = 1; + } else { + match height % 3 { + 0 => { /* do nothing */ } + 1 => { + // reduce the three_count by 1 + // so the remainder becomes 3 + 1 + // which is equivalent to 2 + 2 + three_count = three_count - 1; + two_count += 2; + } + 2 => { + // remainder is a factor of 2 + // just increase the two_count + two_count += 1; + } + // this is unreachable because height is a positive number + // remainder set after diving by 3 is fixed to [0,1,2] + _ => unreachable!(""), + } + } + + let mut layer_heights = vec![3; three_count]; + layer_heights.extend(vec![2; two_count]); + + layer_heights +} + +/// Return the layer a chunk subtree belongs to +pub fn chunk_layer(height: usize, chunk_id: usize) -> Result { + // remaining depth tells us how deep in the tree the specified chunk is + let mut remaining_depth = generate_traversal_instruction(height, chunk_id)?.len() + 1; + let layer_heights = chunk_height_per_layer(height); + + let mut layer = 1; + + while remaining_depth > 1 { + // remaining depth will always larger than the next layer height + // if it is not already 1 + // this is because a every chunk always starts at a layer boundary + // and remaining depth points to a chunk + debug_assert!(remaining_depth > layer_heights[layer - 1]); + + remaining_depth = remaining_depth - layer_heights[layer - 1]; + layer = layer + 1; + } + + Ok(layer - 1) +} + +/// Return the depth of a chunk given the height +/// and chunk id +pub fn chunk_height(height: usize, chunk_id: usize) -> Result { + let chunk_layer = chunk_layer(height, chunk_id)?; + let layer_heights = chunk_height_per_layer(height); + + Ok(layer_heights[chunk_layer]) +} + +/// Given a tree of height h, return the number of chunks needed +/// to completely represent the tree +pub fn number_of_chunks(height: usize) -> usize { + let layer_heights = chunk_height_per_layer(height); + number_of_chunks_internal(layer_heights) +} + +/// Locates the subtree represented by a chunk id and returns +/// the number of chunks under that subtree +pub fn number_of_chunks_under_chunk_id(height: usize, chunk_id: usize) -> Result { + let chunk_layer = chunk_layer(height, chunk_id)?; + let layer_heights = chunk_height_per_layer(height); + + // we only care about the layer heights after the chunk layer + // as we are getting the number of chunks under a subtree and not + // the entire tree of height h + Ok(number_of_chunks_internal( + layer_heights[chunk_layer..].to_vec(), + )) +} + +/// Given the heights of a tree per layer, return the total number of chunks in +/// that tree +fn number_of_chunks_internal(layer_heights: Vec) -> usize { + // a layer consists of 1 or more subtrees of a given height + // here we figure out number of exit nodes from a single subtree for each layer + let mut single_subtree_exits_per_layer = layer_heights + .into_iter() + .map(exit_node_count) + .collect::>(); + + // we don't care about exit nodes from the last layer + // as that points to non-existent subtrees + single_subtree_exits_per_layer.pop(); + + // now we get the total exit nodes per layer + // by multiplying the exits per subtree with the number of subtrees on that + // layer + let mut chunk_counts_per_layer = vec![1]; + for i in 0..single_subtree_exits_per_layer.len() { + let previous_layer_chunk_count = chunk_counts_per_layer[i]; + let current_layer_chunk_count = + previous_layer_chunk_count * single_subtree_exits_per_layer[i]; + chunk_counts_per_layer.push(current_layer_chunk_count); + } + + return chunk_counts_per_layer.into_iter().sum(); +} + +/// Calculates the maximum number of exit nodes for a tree of height h. +fn exit_node_count(height: usize) -> usize { + 2_usize.pow(height as u32) +} + +/// Generate instruction for traversing to a given chunk in a binary tree +pub fn generate_traversal_instruction(height: usize, chunk_id: usize) -> Result, Error> { + let mut instructions = vec![]; + + let total_chunk_count = number_of_chunks(height); + + // out of bounds + if chunk_id < 1 || chunk_id > total_chunk_count { + return Err(Error::ChunkingError(ChunkError::OutOfBounds( + "chunk id out of bounds", + ))); + } + + let mut chunk_range = BinaryRange::new(1, total_chunk_count).map_err(|_| { + Error::ChunkingError(ChunkError::InternalError( + "failed to initialize chunk range", + )) + })?; + + // total chunk count will always be odd because + // from the initial chunk (1) we have an even number of + // exit nodes, and they have even numbers of exit nodes ... + // so total_chunk_count = 1 + some_even_number = odd + debug_assert_eq!(chunk_range.odd(), true); + + // bisect and reduce the chunk range until we get to the desired chunk + // we keep track of every left right decision we make + while chunk_range.len() > 1 { + if chunk_range.odd() { + // checks if we last decision we made got us to the desired chunk id + let advance_result = chunk_range.advance_range_start().unwrap(); + chunk_range = advance_result.0; + if advance_result.1 == chunk_id { + return Ok(instructions); + } + } else { + // for even chunk range, we are at the decision point + // we can either go left or right + // we first check which half the desired chunk is + // then follow that path + let chunk_id_half = chunk_range + .which_half(chunk_id) + .expect("chunk id must exist in range"); + instructions.push(chunk_id_half); + chunk_range = chunk_range + .get_half(chunk_id_half) + .expect("confirmed range is not odd"); + } + } + + // chunk range len is exactly 1 + // this must be the desired chunk id + // return instructions that got us here + return Ok(instructions); +} + +/// Convert traversal instruction to byte string +/// 1 represents left +/// 0 represents right +pub fn traversal_instruction_as_string(instruction: Vec) -> String { + instruction + .iter() + .map(|v| if *v { "1" } else { "0" }) + .collect() +} + +// TODO: move this to a better file +pub fn write_to_vec(dest: &mut W, value: &[u8]) -> Result<(), Error> { + dest.write_all(value) + .map_err(|_e| InternalError("failed to write to vector")) +} + +#[cfg(test)] +mod test { + use byteorder::LE; + + use super::*; + use crate::proofs::chunk::chunk2::{LEFT, RIGHT}; + + #[test] + fn test_chunk_height_per_layer() { + let layer_heights = chunk_height_per_layer(10); + assert_eq!(layer_heights.iter().sum::(), 10); + assert_eq!(layer_heights, [2, 2, 2, 2, 2]); + + let layer_heights = chunk_height_per_layer(45); + assert_eq!(layer_heights.iter().sum::(), 46); + assert_eq!(layer_heights, [2; 23]); + + let layer_heights = chunk_height_per_layer(2); + assert_eq!(layer_heights.iter().sum::(), 2); + assert_eq!(layer_heights, [2]); + + // height less than 2 + let layer_heights = chunk_height_per_layer(1); + assert_eq!(layer_heights.iter().sum::(), 2); + assert_eq!(layer_heights, [2]); + + let layer_heights = chunk_height_per_layer(0); + assert_eq!(layer_heights.iter().sum::(), 0); + assert_eq!(layer_heights, Vec::::new()); + } + + #[test] + fn test_exit_node_count() { + // tree with just one node has 2 exit nodes + assert_eq!(exit_node_count(1), 2); + + // tree with height 2 has 4 exit nodes + assert_eq!(exit_node_count(2), 4); + + // tree with height 6 has 64 exit nodes + assert_eq!(exit_node_count(6), 64); + } + + #[test] + fn test_number_of_chunks() { + // given a chunk of height less than 3 chunk count should be 1 + assert_eq!(number_of_chunks(1), 1); + assert_eq!(number_of_chunks(2), 1); + + // tree with height 4 should have 5 chunks + // we split the tree into 2 layers of chunk height 2 each + // first layer contains just one chunk (1), but has 4 exit nodes + // hence total chunk count = 1 + 4 = 5 + assert_eq!(number_of_chunks(4), 5); + + // tree with height 6 should have 21 chunks + // will be split into three layers of chunk height 2 = [2,2,2] + // first chunk takes 1, has 2^2 = 4 exit nodes + // second chunk takes 4 with each having 2^2 exit nodes + // total exit from second chunk = 4 * 4 = 16 + // total chunks = 1 + 4 + 16 = 21 + assert_eq!(number_of_chunks(6), 21); + + // tree with height 10 should have 341 chunks + // will be split into 5 layers = [2,2,2,2,2] + // first layer has just 1 chunk, exit nodes = 2^2 = 4 + // second layer has 4 chunks, exit nodes = 2^2 * 4 = 16 + // third layer has 16 chunks, exit nodes = 2^2 * 16 = 64 + // fourth layer has 64 chunks, exit nodes = 2^2 * 64 = 256 + // fifth layer has 256 chunks + // total chunks = 1 + 4 + 16 + 64 + 256 = 341 chunks + assert_eq!(number_of_chunks(10), 341); + } + + #[test] + fn test_number_of_chunks_under_chunk_id() { + // tree with height less than 3 should have just 1 chunk + assert_eq!(number_of_chunks_under_chunk_id(1, 1).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(2, 1).unwrap(), 1); + + // asking for chunk out of bounds should return error + assert!(number_of_chunks_under_chunk_id(1, 3).is_err()); + + // tree with height 4 should have 5 chunks at chunk id 1 + // but 1 chunk at id 2 - 5 + assert_eq!(number_of_chunks_under_chunk_id(4, 1).unwrap(), 5); + assert_eq!(number_of_chunks_under_chunk_id(4, 2).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(4, 3).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(4, 4).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(4, 5).unwrap(), 1); + + // tree with height 10 should have 341 chunks + // layer_heights = [2, 2, 2, 2, 2] + // chunk_id 1 = 341 + // chunk_id 2 = 85 i.e (341 - 1) / 2^2 + // chunk_id 3 = 21 i.e (85 - 1) / 2^2 + // chunk_id 4 = 5 i.e (21 - 1) / 2^2 + // chunk_id 5 = 1 i.e (5 - 1) / 2^2 + // chunk_id 6 = 1 on the same layer as 5 + // chunk_id 87 = 85 as chunk 87 should wrap back to the same layer as chunk_id 2 + // chunk_id 88 = mirrors chunk_id 3 + // chunk_id 89 = mirrors chunk_id 4 + // chunk_id 90 = mirrors chunk_id 5 + assert_eq!(number_of_chunks_under_chunk_id(10, 1).unwrap(), 341); + assert_eq!(number_of_chunks_under_chunk_id(10, 2).unwrap(), 85); + assert_eq!(number_of_chunks_under_chunk_id(10, 3).unwrap(), 21); + assert_eq!(number_of_chunks_under_chunk_id(10, 4).unwrap(), 5); + assert_eq!(number_of_chunks_under_chunk_id(10, 5).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(10, 6).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(10, 87).unwrap(), 85); + assert_eq!(number_of_chunks_under_chunk_id(10, 88).unwrap(), 21); + assert_eq!(number_of_chunks_under_chunk_id(10, 89).unwrap(), 5); + assert_eq!(number_of_chunks_under_chunk_id(10, 90).unwrap(), 1); + } + + #[test] + fn test_traversal_instruction_generation() { + // 3 + // / \ + // 1 7 + // / \ / \ + // 0 2 5 8 + // / \ \ + // 4 6 9 + // height: 4 + // layer_height: 3, 3 + // 3 + // / \ + // 1 7 + // / \ / \ + // 0 2 5 8 + // ............................ + // / \ \ + // 4 6 9 + // 5 chunks + // chunk 1 entry - 3 + // chunk 2 entry - 0 + // chunk 3 entry - 2 + // chunk 4 entry - 5 + // chunk 5 entry - 8 + + // chunk 1 entry - 3 is at the top of the tree so empty instruction set + let instruction = + generate_traversal_instruction(4, 1).expect("should generate traversal instruction"); + let empty_instruction: &[bool] = &[]; + assert_eq!(instruction, empty_instruction); + + // chunk 2 entry - 0 + // go left twice from root i.e 3 left -> 1 left -> 0 + let instruction = + generate_traversal_instruction(4, 2).expect("should generate traversal instruction"); + assert_eq!(instruction, &[LEFT, LEFT]); + + // chunk 3 entry - 2 + // go left then right from root i.e 3 left -> 1 right -> 2 + let instruction = + generate_traversal_instruction(4, 3).expect("should generate traversal instruction"); + assert_eq!(instruction, &[LEFT, RIGHT]); + + // chunk 4 entry - 5 + // go right then left i.e 3 right -> 7 left -> 5 + let instruction = + generate_traversal_instruction(4, 4).expect("should generate traversal instruction"); + assert_eq!(instruction, &[RIGHT, LEFT]); + + // chunk 5 entry - 8 + // go right twice i.e 3 right -> 7 right -> 8 + let instruction = + generate_traversal_instruction(4, 5).expect("should generate traversal instruction"); + assert_eq!(instruction, &[RIGHT, RIGHT]); + + // out of bound tests + assert_eq!(generate_traversal_instruction(4, 6).is_err(), true); + assert_eq!(generate_traversal_instruction(4, 0).is_err(), true); + } + + #[test] + fn test_chunk_height() { + // tree of height 6 + // all chunks have the same height + // since layer height = [2,2,2] + // we have 21 chunks in a tree of this height + for i in 1..=21 { + assert_eq!(chunk_height(6, i).unwrap(), 2); + } + + // tree of height 5 + // layer_height = [2, 2] + // we also have 21 chunks here + for i in 1..=21 { + assert_eq!(chunk_height(5, i).unwrap(), 2); + } + + // tree of height 10 + // layer_height = [3, 3, 3, 3] + // just going to check chunk 1 - 5 + assert_eq!(chunk_height(10, 1).unwrap(), 2); + assert_eq!(chunk_height(10, 2).unwrap(), 2); + assert_eq!(chunk_height(10, 3).unwrap(), 2); + assert_eq!(chunk_height(10, 4).unwrap(), 2); + assert_eq!(chunk_height(10, 5).unwrap(), 2); + } + + #[test] + fn test_traversal_instruction_as_string() { + assert_eq!(traversal_instruction_as_string(vec![]), ""); + assert_eq!(traversal_instruction_as_string(vec![LEFT]), "1"); + assert_eq!(traversal_instruction_as_string(vec![RIGHT]), "0"); + assert_eq!( + traversal_instruction_as_string(vec![RIGHT, LEFT, LEFT, RIGHT]), + "0110" + ); + } +} diff --git a/merk/src/test_utils/mod.rs b/merk/src/test_utils/mod.rs index b7518158..0611fa7a 100644 --- a/merk/src/test_utils/mod.rs +++ b/merk/src/test_utils/mod.rs @@ -190,6 +190,7 @@ pub const fn seq_key(n: u64) -> [u8; 8] { /// Create batch entry with Put op using key n and a fixed value pub fn put_entry(n: u64) -> BatchEntry> { + let key = seq_key(n).to_vec(); (seq_key(n).to_vec(), Op::Put(vec![123; 60], BasicMerk)) } @@ -274,7 +275,15 @@ pub fn make_tree_rand( /// Create tree with initial fixed values and apply `node count` Put ops using /// sequential keys using memory only +/// starting tree node is [0; 20] pub fn make_tree_seq(node_count: u64) -> Tree { + make_tree_seq_with_start_key(node_count, [0; 20].to_vec()) +} + +/// Create tree with initial fixed values and apply `node count` Put ops using +/// sequential keys using memory only +/// requires a starting key vector +pub fn make_tree_seq_with_start_key(node_count: u64, start_key: Vec) -> Tree { let batch_size = if node_count >= 10_000 { assert_eq!(node_count % 10_000, 0); 10_000 @@ -283,7 +292,8 @@ pub fn make_tree_seq(node_count: u64) -> Tree { }; let value = vec![123; 60]; - let mut tree = Tree::new(vec![0; 20], value, None, BasicMerk).unwrap(); + + let mut tree = Tree::new(start_key, value, None, BasicMerk).unwrap(); let batch_count = node_count / batch_size; for i in 0..batch_count { From 03b42c9600b81647012670cf88505d014c823668 Mon Sep 17 00:00:00 2001 From: Wisdom Ogwu Date: Mon, 2 Oct 2023 08:11:04 +0100 Subject: [PATCH 2/7] Squashed commit of the following: remove bad test rename files update documentation wip wip implement merk verifier + state building implement replication from multichunk fix chunk verification fixed implementation of chunkid from traversal instructions fix some tests make chunk_id from traversal instruction test resistant to changes in underlying chunking scheme add restoration logic test function returning the next chunk id when you call chunk use strings as communication interface between producer and restorer implement chunk id from traversal instruction add traversal instruction generation to direct string chunk producer returns next index as string for multi chunk clean up rewrite parent links restoration done successfully rough implementation of rewrite parent implement function to extract sum from node type wip chunk write logic + restorer finalization + parent key tracking new visit ref function that keeps track of traversal path implement instruction string to traversal instruction test child to link functionality for basic and sum merks implement node to link include sum wip implement and test chunk verification Fix layer iter function Previous implementation made a key assumption that nodes are unique including hash nodes, this made the layer iteration functionality depend on the contents of the tree, which shouldn't be the case. This adds a simpler implementation of the layer iter logic using breadth first search. add test to ensure chunks only contain hash and kvfeaturetype test for avl tree during proof op execution remove chunk_height_per_layer_lin_comb every chunk now has fixed height of 2 --- grovedb/src/lib.rs | 4 +- grovedb/src/replication.rs | 1978 ++++++++--------- merk/src/error.rs | 4 + merk/src/lib.rs | 2 +- merk/src/merk/chunks.rs | 1278 ++++++++--- merk/src/merk/chunks2.rs | 943 -------- merk/src/merk/mod.rs | 138 +- merk/src/merk/restore.rs | 1384 +++++++++--- merk/src/merk/restore2.rs | 195 -- merk/src/proofs/chunk.rs | 603 +---- merk/src/proofs/chunk/binary_range.rs | 28 + merk/src/proofs/chunk/{chunk2.rs => chunk.rs} | 30 +- merk/src/proofs/chunk/chunk_op.rs | 30 +- merk/src/proofs/chunk/error.rs | 47 + merk/src/proofs/chunk/util.rs | 417 +++- merk/src/proofs/tree.rs | 258 ++- merk/src/tree/link.rs | 2 +- merk/src/tree/mod.rs | 4 +- 18 files changed, 3795 insertions(+), 3550 deletions(-) delete mode 100644 merk/src/merk/chunks2.rs delete mode 100644 merk/src/merk/restore2.rs rename merk/src/proofs/chunk/{chunk2.rs => chunk.rs} (94%) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 87ca2443..23840455 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -215,8 +215,8 @@ use grovedb_storage::{Storage, StorageContext}; use grovedb_visualize::DebugByteVectors; #[cfg(any(feature = "full", feature = "verify"))] pub use query::{PathQuery, SizedQuery}; -#[cfg(feature = "full")] -pub use replication::{BufferedRestorer, Restorer, SiblingsChunkProducer, SubtreeChunkProducer}; +// #[cfg(feature = "full")] +// pub use replication::{BufferedRestorer, Restorer, SiblingsChunkProducer, SubtreeChunkProducer}; #[cfg(any(feature = "full", feature = "verify"))] pub use crate::error::Error; diff --git a/grovedb/src/replication.rs b/grovedb/src/replication.rs index 86c1c3f0..e97f7820 100644 --- a/grovedb/src/replication.rs +++ b/grovedb/src/replication.rs @@ -1,989 +1,989 @@ -// MIT LICENSE -// -// Copyright (c) 2021 Dash Core Group -// -// Permission is hereby granted, free of charge, to any -// person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the -// Software without restriction, including without -// limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of -// the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice -// shall be included in all copies or substantial portions -// of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS IN THE SOFTWARE. - -//! Replication - -use std::{ - collections::VecDeque, - iter::{empty, once}, -}; - -use grovedb_merk::{ - proofs::{Node, Op}, - Merk, TreeFeatureType, -}; -use grovedb_path::SubtreePath; -use grovedb_storage::{ - rocksdb_storage::{PrefixedRocksDbImmediateStorageContext, PrefixedRocksDbStorageContext}, - Storage, StorageContext, -}; - -use crate::{Element, Error, GroveDb, Hash, Transaction}; - -const OPS_PER_CHUNK: usize = 128; - -impl GroveDb { - /// Creates a chunk producer to replicate GroveDb. - pub fn chunks(&self) -> SubtreeChunkProducer { - SubtreeChunkProducer::new(self) - } -} - -/// Subtree chunks producer. -pub struct SubtreeChunkProducer<'db> { - grove_db: &'db GroveDb, - cache: Option>, -} - -struct SubtreeChunkProducerCache<'db> { - current_merk_path: Vec>, - current_merk: Merk>, - // This needed to be an `Option` because it requires a reference on Merk but it's within the - // same struct and during struct init a referenced Merk would be moved inside a struct, - // using `Option` this init happens in two steps. - current_chunk_producer: - Option>>, -} - -impl<'db> SubtreeChunkProducer<'db> { - fn new(storage: &'db GroveDb) -> Self { - SubtreeChunkProducer { - grove_db: storage, - cache: None, - } - } - - /// Chunks in current producer - pub fn chunks_in_current_producer(&self) -> usize { - self.cache - .as_ref() - .and_then(|c| c.current_chunk_producer.as_ref().map(|p| p.len())) - .unwrap_or(0) - } - - /// Get chunk - pub fn get_chunk<'p, P>(&mut self, path: P, index: usize) -> Result, Error> - where - P: IntoIterator, -

::IntoIter: Clone + DoubleEndedIterator, - { - let path_iter = path.into_iter(); - - if let Some(SubtreeChunkProducerCache { - current_merk_path, .. - }) = &self.cache - { - if !itertools::equal(current_merk_path, path_iter.clone()) { - self.cache = None; - } - } - - if self.cache.is_none() { - let current_merk = self - .grove_db - .open_non_transactional_merk_at_path( - path_iter.clone().collect::>().as_slice().into(), - None, - ) - .unwrap()?; - - if current_merk.root_key().is_none() { - return Ok(Vec::new()); - } - - self.cache = Some(SubtreeChunkProducerCache { - current_merk_path: path_iter.map(|p| p.to_vec()).collect(), - current_merk, - current_chunk_producer: None, - }); - let cache = self.cache.as_mut().expect("exists at this point"); - cache.current_chunk_producer = Some( - grovedb_merk::ChunkProducer::new(&cache.current_merk) - .map_err(|e| Error::CorruptedData(e.to_string()))?, - ); - } - - self.cache - .as_mut() - .expect("must exist at this point") - .current_chunk_producer - .as_mut() - .expect("must exist at this point") - .chunk(index) - .map_err(|e| Error::CorruptedData(e.to_string())) - } -} - -// TODO: make generic over storage_cost context -type MerkRestorer<'db> = grovedb_merk::Restorer>; - -type Path = Vec>; - -/// Structure to drive GroveDb restore process. -pub struct Restorer<'db> { - current_merk_restorer: Option>, - current_merk_chunk_index: usize, - current_merk_path: Path, - queue: VecDeque<(Path, Vec, Hash, TreeFeatureType)>, - grove_db: &'db GroveDb, - tx: &'db Transaction<'db>, -} - -/// Indicates what next piece of information `Restorer` expects or wraps a -/// successful result. -#[derive(Debug)] -pub enum RestorerResponse { - AwaitNextChunk { path: Vec>, index: usize }, - Ready, -} - -#[derive(Debug)] -pub struct RestorerError(String); - -impl<'db> Restorer<'db> { - /// Create a GroveDb restorer using a backing storage_cost and root hash. - pub fn new( - grove_db: &'db GroveDb, - root_hash: Hash, - tx: &'db Transaction<'db>, - ) -> Result { - Ok(Restorer { - tx, - current_merk_restorer: Some(MerkRestorer::new( - Merk::open_base( - grove_db - .db - .get_immediate_storage_context(SubtreePath::empty(), tx) - .unwrap(), - false, - ) - .unwrap() - .map_err(|e| RestorerError(e.to_string()))?, - None, - root_hash, - )), - current_merk_chunk_index: 0, - current_merk_path: vec![], - queue: VecDeque::new(), - grove_db, - }) - } - - /// Process next chunk and receive instruction on what to do next. - pub fn process_chunk( - &mut self, - chunk_ops: impl IntoIterator, - ) -> Result { - if self.current_merk_restorer.is_none() { - // Last restorer was consumed and no more Merks to process. - return Ok(RestorerResponse::Ready); - } - // First we decode a chunk to take out info about nested trees to add them into - // todo list. - let mut ops = Vec::new(); - for op in chunk_ops { - ops.push(op); - match ops.last().expect("just inserted") { - Op::Push(Node::KVValueHashFeatureType( - key, - value_bytes, - value_hash, - feature_type, - )) - | Op::PushInverted(Node::KVValueHashFeatureType( - key, - value_bytes, - value_hash, - feature_type, - )) => { - if let Element::Tree(root_key, _) | Element::SumTree(root_key, ..) = - Element::deserialize(value_bytes) - .map_err(|e| RestorerError(e.to_string()))? - { - if root_key.is_none() || self.current_merk_path.last() == Some(key) { - // We add only subtrees of the current subtree to queue, skipping - // itself; Also skipping empty Merks. - continue; - } - let mut path = self.current_merk_path.clone(); - path.push(key.clone()); - // The value hash is the root tree hash - self.queue.push_back(( - path, - value_bytes.to_owned(), - *value_hash, - *feature_type, - )); - } - } - _ => {} - } - } - - // Process chunk using Merk's possibilities. - let remaining = self - .current_merk_restorer - .as_mut() - .expect("restorer exists at this point") - .process_chunk(ops) - .map_err(|e| RestorerError(e.to_string()))?; - - self.current_merk_chunk_index += 1; - - if remaining == 0 { - // If no more chunks for this Merk required decide if we're done or take a next - // Merk to process. - self.current_merk_restorer - .take() - .expect("restorer exists at this point") - .finalize() - .map_err(|e| RestorerError(e.to_string()))?; - if let Some((next_path, combining_value, expected_hash, _)) = self.queue.pop_front() { - // Process next subtree. - let merk = self - .grove_db - .open_merk_for_replication(next_path.as_slice().into(), self.tx) - .map_err(|e| RestorerError(e.to_string()))?; - self.current_merk_restorer = Some(MerkRestorer::new( - merk, - Some(combining_value), - expected_hash, - )); - self.current_merk_chunk_index = 0; - self.current_merk_path = next_path; - - Ok(RestorerResponse::AwaitNextChunk { - path: self.current_merk_path.clone(), - index: self.current_merk_chunk_index, - }) - } else { - Ok(RestorerResponse::Ready) - } - } else { - // Request a chunk at the same path but with incremented index. - Ok(RestorerResponse::AwaitNextChunk { - path: self.current_merk_path.clone(), - index: self.current_merk_chunk_index, - }) - } - } -} - -/// Chunk producer wrapper which uses bigger messages that may include chunks of -/// requested subtree with its right siblings. -/// -/// Because `Restorer` builds GroveDb replica breadth-first way from top to -/// bottom it makes sense to send a subtree's siblings next instead of its own -/// subtrees. -pub struct SiblingsChunkProducer<'db> { - chunk_producer: SubtreeChunkProducer<'db>, -} - -#[derive(Debug)] -pub struct GroveChunk { - subtree_chunks: Vec<(usize, Vec)>, -} - -impl<'db> SiblingsChunkProducer<'db> { - /// New - pub fn new(chunk_producer: SubtreeChunkProducer<'db>) -> Self { - SiblingsChunkProducer { chunk_producer } - } - - /// Get a collection of chunks possibly from different Merks with the first - /// one as requested. - pub fn get_chunk<'p, P>(&mut self, path: P, index: usize) -> Result, Error> - where - P: IntoIterator, -

::IntoIter: Clone + DoubleEndedIterator + ExactSizeIterator, - { - let path_iter = path.into_iter(); - let mut result = Vec::new(); - let mut ops_count = 0; - - if path_iter.len() == 0 { - // We're at the root of GroveDb, no siblings here. - self.process_subtree_chunks(&mut result, &mut ops_count, empty(), index)?; - return Ok(result); - }; - - // Get siblings on the right to send chunks of multiple Merks if it meets the - // limit. - - let mut siblings_keys: VecDeque> = VecDeque::new(); - - let mut parent_path = path_iter; - let requested_key = parent_path.next_back(); - - let parent_ctx = self - .chunk_producer - .grove_db - .db - .get_storage_context( - parent_path.clone().collect::>().as_slice().into(), - None, - ) - .unwrap(); - let mut siblings_iter = Element::iterator(parent_ctx.raw_iter()).unwrap(); - - if let Some(key) = requested_key { - siblings_iter.fast_forward(key)?; - } - - while let Some(element) = siblings_iter.next_element().unwrap()? { - if let (key, Element::Tree(..)) | (key, Element::SumTree(..)) = element { - siblings_keys.push_back(key); - } - } - - let mut current_index = index; - // Process each subtree - while let Some(subtree_key) = siblings_keys.pop_front() { - #[allow(clippy::map_identity)] - let subtree_path = parent_path - .clone() - .map(|x| x) - .chain(once(subtree_key.as_slice())); - - self.process_subtree_chunks(&mut result, &mut ops_count, subtree_path, current_index)?; - // Going to a next sibling, should start from 0. - - if ops_count >= OPS_PER_CHUNK { - break; - } - current_index = 0; - } - - Ok(result) - } - - /// Process one subtree's chunks - fn process_subtree_chunks<'p, P>( - &mut self, - result: &mut Vec, - ops_count: &mut usize, - subtree_path: P, - from_index: usize, - ) -> Result<(), Error> - where - P: IntoIterator, -

::IntoIter: Clone + DoubleEndedIterator, - { - let path_iter = subtree_path.into_iter(); - - let mut current_index = from_index; - let mut subtree_chunks = Vec::new(); - - loop { - let ops = self - .chunk_producer - .get_chunk(path_iter.clone(), current_index)?; - - *ops_count += ops.len(); - subtree_chunks.push((current_index, ops)); - current_index += 1; - if current_index >= self.chunk_producer.chunks_in_current_producer() - || *ops_count >= OPS_PER_CHUNK - { - break; - } - } - - result.push(GroveChunk { subtree_chunks }); - - Ok(()) - } -} - -/// `Restorer` wrapper that applies multiple chunks at once and eventually -/// returns less requests. It is named by analogy with IO types that do less -/// syscalls. -pub struct BufferedRestorer<'db> { - restorer: Restorer<'db>, -} - -impl<'db> BufferedRestorer<'db> { - /// New - pub fn new(restorer: Restorer<'db>) -> Self { - BufferedRestorer { restorer } - } - - /// Process next chunk and receive instruction on what to do next. - pub fn process_grove_chunks(&mut self, chunks: I) -> Result - where - I: IntoIterator + ExactSizeIterator, - { - let mut response = RestorerResponse::Ready; - - for c in chunks.into_iter() { - for ops in c.subtree_chunks.into_iter().map(|x| x.1) { - if !ops.is_empty() { - response = self.restorer.process_chunk(ops)?; - } - } - } - - Ok(response) - } -} - -#[cfg(test)] -mod test { - use rand::RngCore; - use tempfile::TempDir; - - use super::*; - use crate::{ - batch::GroveDbOp, - reference_path::ReferencePathType, - tests::{common::EMPTY_PATH, make_test_grovedb, TempGroveDb, ANOTHER_TEST_LEAF, TEST_LEAF}, - }; - - fn replicate(original_db: &GroveDb) -> TempDir { - let replica_tempdir = TempDir::new().unwrap(); - - { - let replica_db = GroveDb::open(replica_tempdir.path()).unwrap(); - let mut chunk_producer = original_db.chunks(); - let tx = replica_db.start_transaction(); - - let mut restorer = Restorer::new( - &replica_db, - original_db.root_hash(None).unwrap().unwrap(), - &tx, - ) - .expect("cannot create restorer"); - - // That means root tree chunk with index 0 - let mut next_chunk: (Vec>, usize) = (vec![], 0); - - loop { - let chunk = chunk_producer - .get_chunk(next_chunk.0.iter().map(|x| x.as_slice()), next_chunk.1) - .expect("cannot get next chunk"); - match restorer.process_chunk(chunk).expect("cannot process chunk") { - RestorerResponse::Ready => break, - RestorerResponse::AwaitNextChunk { path, index } => { - next_chunk = (path, index); - } - } - } - - replica_db.commit_transaction(tx).unwrap().unwrap(); - } - replica_tempdir - } - - fn replicate_bigger_messages(original_db: &GroveDb) -> TempDir { - let replica_tempdir = TempDir::new().unwrap(); - - { - let replica_grove_db = GroveDb::open(replica_tempdir.path()).unwrap(); - let mut chunk_producer = SiblingsChunkProducer::new(original_db.chunks()); - let tx = replica_grove_db.start_transaction(); - - let mut restorer = BufferedRestorer::new( - Restorer::new( - &replica_grove_db, - original_db.root_hash(None).unwrap().unwrap(), - &tx, - ) - .expect("cannot create restorer"), - ); - - // That means root tree chunk with index 0 - let mut next_chunk: (Vec>, usize) = (vec![], 0); - - loop { - let chunks = chunk_producer - .get_chunk(next_chunk.0.iter().map(|x| x.as_slice()), next_chunk.1) - .expect("cannot get next chunk"); - match restorer - .process_grove_chunks(chunks.into_iter()) - .expect("cannot process chunk") - { - RestorerResponse::Ready => break, - RestorerResponse::AwaitNextChunk { path, index } => { - next_chunk = (path, index); - } - } - } - - replica_grove_db.commit_transaction(tx).unwrap().unwrap(); - } - - replica_tempdir - } - - fn test_replication_internal<'a, I, R, F>( - original_db: &TempGroveDb, - to_compare: I, - replicate_fn: F, - ) where - R: AsRef<[u8]> + 'a, - I: Iterator, - F: Fn(&GroveDb) -> TempDir, - { - let expected_root_hash = original_db.root_hash(None).unwrap().unwrap(); - - let replica_tempdir = replicate_fn(original_db); - - let replica = GroveDb::open(replica_tempdir.path()).unwrap(); - assert_eq!( - replica.root_hash(None).unwrap().unwrap(), - expected_root_hash - ); - - for full_path in to_compare { - let (key, path) = full_path.split_last().unwrap(); - assert_eq!( - original_db.get(path, key.as_ref(), None).unwrap().unwrap(), - replica.get(path, key.as_ref(), None).unwrap().unwrap() - ); - } - } - - fn test_replication<'a, I, R>(original_db: &TempGroveDb, to_compare: I) - where - R: AsRef<[u8]> + 'a, - I: Iterator + Clone, - { - test_replication_internal(original_db, to_compare.clone(), replicate); - test_replication_internal(original_db, to_compare, replicate_bigger_messages); - } - - #[test] - fn replicate_wrong_root_hash() { - let db = make_test_grovedb(); - let mut bad_hash = db.root_hash(None).unwrap().unwrap(); - bad_hash[0] = bad_hash[0].wrapping_add(1); - - let tmp_dir = TempDir::new().unwrap(); - let restored_db = GroveDb::open(tmp_dir.path()).unwrap(); - let tx = restored_db.start_transaction(); - let mut restorer = Restorer::new(&restored_db, bad_hash, &tx).unwrap(); - let mut chunks = db.chunks(); - assert!(restorer - .process_chunk(chunks.get_chunk([], 0).unwrap()) - .is_err()); - } - - #[test] - fn replicate_provide_wrong_tree() { - let db = make_test_grovedb(); - db.insert( - &[TEST_LEAF], - b"key1", - Element::new_item(b"ayya".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF], - b"key1", - Element::new_item(b"ayyb".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - - let expected_hash = db.root_hash(None).unwrap().unwrap(); - - let tmp_dir = TempDir::new().unwrap(); - let restored_db = GroveDb::open(tmp_dir.path()).unwrap(); - let tx = restored_db.start_transaction(); - let mut restorer = Restorer::new(&restored_db, expected_hash, &tx).unwrap(); - let mut chunks = db.chunks(); - - let next_op = restorer - .process_chunk(chunks.get_chunk([], 0).unwrap()) - .unwrap(); - match next_op { - RestorerResponse::AwaitNextChunk { path, index } => { - // Feed restorer a wrong Merk! - let chunk = if path == [TEST_LEAF] { - chunks.get_chunk([ANOTHER_TEST_LEAF], index).unwrap() - } else { - chunks.get_chunk([TEST_LEAF], index).unwrap() - }; - assert!(restorer.process_chunk(chunk).is_err()); - } - _ => {} - } - } - - #[test] - fn replicate_nested_grovedb() { - let db = make_test_grovedb(); - db.insert( - &[TEST_LEAF], - b"key1", - Element::new_item(b"ayya".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[TEST_LEAF], - b"key2", - Element::new_reference(ReferencePathType::SiblingReference(b"key1".to_vec())), - None, - None, - ) - .unwrap() - .expect("should insert reference"); - db.insert( - &[ANOTHER_TEST_LEAF], - b"key2", - Element::empty_tree(), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF, b"key2"], - b"key3", - Element::empty_tree(), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF, b"key2", b"key3"], - b"key4", - Element::new_item(b"ayyb".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - - let to_compare = [ - [TEST_LEAF].as_ref(), - [TEST_LEAF, b"key1"].as_ref(), - [TEST_LEAF, b"key2"].as_ref(), - [ANOTHER_TEST_LEAF].as_ref(), - [ANOTHER_TEST_LEAF, b"key2"].as_ref(), - [ANOTHER_TEST_LEAF, b"key2", b"key3"].as_ref(), - [ANOTHER_TEST_LEAF, b"key2", b"key3", b"key4"].as_ref(), - ]; - test_replication(&db, to_compare.into_iter()); - } - - #[test] - fn replicate_nested_grovedb_with_sum_trees() { - let db = make_test_grovedb(); - db.insert( - &[TEST_LEAF], - b"key1", - Element::new_item(b"ayya".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[TEST_LEAF], - b"key2", - Element::new_reference(ReferencePathType::SiblingReference(b"key1".to_vec())), - None, - None, - ) - .unwrap() - .expect("should insert reference"); - db.insert( - &[ANOTHER_TEST_LEAF], - b"key2", - Element::empty_sum_tree(), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF, b"key2"], - b"sumitem", - Element::new_sum_item(15), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF, b"key2"], - b"key3", - Element::empty_tree(), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF, b"key2", b"key3"], - b"key4", - Element::new_item(b"ayyb".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - - let to_compare = [ - [TEST_LEAF].as_ref(), - [TEST_LEAF, b"key1"].as_ref(), - [TEST_LEAF, b"key2"].as_ref(), - [ANOTHER_TEST_LEAF].as_ref(), - [ANOTHER_TEST_LEAF, b"key2"].as_ref(), - [ANOTHER_TEST_LEAF, b"key2", b"sumitem"].as_ref(), - [ANOTHER_TEST_LEAF, b"key2", b"key3"].as_ref(), - [ANOTHER_TEST_LEAF, b"key2", b"key3", b"key4"].as_ref(), - ]; - test_replication(&db, to_compare.into_iter()); - } - - // TODO: Highlights a bug in replication - #[test] - fn replicate_grovedb_with_sum_tree() { - let db = make_test_grovedb(); - db.insert(&[TEST_LEAF], b"key1", Element::empty_tree(), None, None) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[TEST_LEAF, b"key1"], - b"key2", - Element::new_item(vec![4]), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[TEST_LEAF, b"key1"], - b"key3", - Element::new_item(vec![10]), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - - let to_compare = [ - [TEST_LEAF].as_ref(), - [ANOTHER_TEST_LEAF].as_ref(), - [TEST_LEAF, b"key1"].as_ref(), - [TEST_LEAF, b"key1", b"key2"].as_ref(), - [TEST_LEAF, b"key1", b"key3"].as_ref(), - ]; - test_replication(&db, to_compare.into_iter()); - } - - #[test] - fn replicate_a_big_one() { - const HEIGHT: usize = 3; - const SUBTREES_FOR_EACH: usize = 3; - const SCALARS_FOR_EACH: usize = 600; - - let db = make_test_grovedb(); - let mut to_compare = Vec::new(); - - let mut rng = rand::thread_rng(); - let mut subtrees: VecDeque> = VecDeque::new(); - - // Generate root tree leafs - for _ in 0..SUBTREES_FOR_EACH { - let mut bytes = [0; 8]; - rng.fill_bytes(&mut bytes); - db.insert(EMPTY_PATH, &bytes, Element::empty_tree(), None, None) - .unwrap() - .unwrap(); - subtrees.push_front(vec![bytes]); - to_compare.push(vec![bytes]); - } - - while let Some(path) = subtrees.pop_front() { - let mut batch = Vec::new(); - - if path.len() < HEIGHT { - for _ in 0..SUBTREES_FOR_EACH { - let mut bytes = [0; 8]; - rng.fill_bytes(&mut bytes); - - batch.push(GroveDbOp::insert_op( - path.iter().map(|x| x.to_vec()).collect(), - bytes.to_vec(), - Element::empty_tree(), - )); - - let mut new_path = path.clone(); - new_path.push(bytes); - subtrees.push_front(new_path.clone()); - to_compare.push(new_path.clone()); - } - } - - for _ in 0..SCALARS_FOR_EACH { - let mut bytes = [0; 8]; - let mut bytes_val = vec![]; - rng.fill_bytes(&mut bytes); - rng.fill_bytes(&mut bytes_val); - - batch.push(GroveDbOp::insert_op( - path.iter().map(|x| x.to_vec()).collect(), - bytes.to_vec(), - Element::new_item(bytes_val), - )); - - let mut new_path = path.clone(); - new_path.push(bytes); - to_compare.push(new_path.clone()); - } - - db.apply_batch(batch, None, None).unwrap().unwrap(); - } - - test_replication(&db, to_compare.iter().map(|x| x.as_slice())); - } - - #[test] - fn replicate_from_checkpoint() { - // Create a simple GroveDb first - let db = make_test_grovedb(); - db.insert( - &[TEST_LEAF], - b"key1", - Element::new_item(b"ayya".to_vec()), - None, - None, - ) - .unwrap() - .unwrap(); - db.insert( - &[ANOTHER_TEST_LEAF], - b"key2", - Element::new_item(b"ayyb".to_vec()), - None, - None, - ) - .unwrap() - .unwrap(); - - // Save its state with checkpoint - let checkpoint_dir_parent = TempDir::new().unwrap(); - let checkpoint_dir = checkpoint_dir_parent.path().join("cp"); - db.create_checkpoint(&checkpoint_dir).unwrap(); - - // Alter the db to make difference between current state and checkpoint - db.delete(&[TEST_LEAF], b"key1", None, None) - .unwrap() - .unwrap(); - db.insert( - &[TEST_LEAF], - b"key3", - Element::new_item(b"ayyd".to_vec()), - None, - None, - ) - .unwrap() - .unwrap(); - db.insert( - &[ANOTHER_TEST_LEAF], - b"key2", - Element::new_item(b"ayyc".to_vec()), - None, - None, - ) - .unwrap() - .unwrap(); - - let checkpoint_db = GroveDb::open(&checkpoint_dir).unwrap(); - - // Ensure checkpoint differs from current state - assert_ne!( - checkpoint_db - .get(&[ANOTHER_TEST_LEAF], b"key2", None) - .unwrap() - .unwrap(), - db.get(&[ANOTHER_TEST_LEAF], b"key2", None) - .unwrap() - .unwrap(), - ); - - // Build a replica from checkpoint - let replica_dir = replicate(&checkpoint_db); - let replica_db = GroveDb::open(&replica_dir).unwrap(); - - assert_eq!( - checkpoint_db.root_hash(None).unwrap().unwrap(), - replica_db.root_hash(None).unwrap().unwrap() - ); - - assert_eq!( - checkpoint_db - .get(&[TEST_LEAF], b"key1", None) - .unwrap() - .unwrap(), - replica_db - .get(&[TEST_LEAF], b"key1", None) - .unwrap() - .unwrap(), - ); - assert_eq!( - checkpoint_db - .get(&[ANOTHER_TEST_LEAF], b"key2", None) - .unwrap() - .unwrap(), - replica_db - .get(&[ANOTHER_TEST_LEAF], b"key2", None) - .unwrap() - .unwrap(), - ); - assert!(matches!( - replica_db.get(&[TEST_LEAF], b"key3", None).unwrap(), - Err(Error::PathKeyNotFound(_)) - )); - - // Drop original db and checkpoint dir too to ensure there is no dependency - drop(db); - drop(checkpoint_db); - drop(checkpoint_dir); - - assert_eq!( - replica_db - .get(&[ANOTHER_TEST_LEAF], b"key2", None) - .unwrap() - .unwrap(), - Element::new_item(b"ayyb".to_vec()) - ); - } -} +// // MIT LICENSE +// // +// // Copyright (c) 2021 Dash Core Group +// // +// // Permission is hereby granted, free of charge, to any +// // person obtaining a copy of this software and associated +// // documentation files (the "Software"), to deal in the +// // Software without restriction, including without +// // limitation the rights to use, copy, modify, merge, +// // publish, distribute, sublicense, and/or sell copies of +// // the Software, and to permit persons to whom the Software +// // is furnished to do so, subject to the following +// // conditions: +// // +// // The above copyright notice and this permission notice +// // shall be included in all copies or substantial portions +// // of the Software. +// // +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// // ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// // TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// // PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// // SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// // DEALINGS IN THE SOFTWARE. +// +// //! Replication +// +// use std::{ +// collections::VecDeque, +// iter::{empty, once}, +// }; +// +// use grovedb_merk::{ +// proofs::{Node, Op}, +// Merk, TreeFeatureType, +// }; +// use grovedb_path::SubtreePath; +// use grovedb_storage::{ +// rocksdb_storage::{PrefixedRocksDbImmediateStorageContext, PrefixedRocksDbStorageContext}, +// Storage, StorageContext, +// }; +// +// use crate::{Element, Error, GroveDb, Hash, Transaction}; +// +// const OPS_PER_CHUNK: usize = 128; +// +// impl GroveDb { +// /// Creates a chunk producer to replicate GroveDb. +// pub fn chunks(&self) -> SubtreeChunkProducer { +// SubtreeChunkProducer::new(self) +// } +// } +// +// /// Subtree chunks producer. +// pub struct SubtreeChunkProducer<'db> { +// grove_db: &'db GroveDb, +// cache: Option>, +// } +// +// struct SubtreeChunkProducerCache<'db> { +// current_merk_path: Vec>, +// current_merk: Merk>, +// // This needed to be an `Option` because it requires a reference on Merk but it's within the +// // same struct and during struct init a referenced Merk would be moved inside a struct, +// // using `Option` this init happens in two steps. +// current_chunk_producer: +// Option>>, +// } +// +// impl<'db> SubtreeChunkProducer<'db> { +// fn new(storage: &'db GroveDb) -> Self { +// SubtreeChunkProducer { +// grove_db: storage, +// cache: None, +// } +// } +// +// /// Chunks in current producer +// pub fn chunks_in_current_producer(&self) -> usize { +// self.cache +// .as_ref() +// .and_then(|c| c.current_chunk_producer.as_ref().map(|p| p.len())) +// .unwrap_or(0) +// } +// +// /// Get chunk +// pub fn get_chunk<'p, P>(&mut self, path: P, index: usize) -> Result, Error> +// where +// P: IntoIterator, +//

::IntoIter: Clone + DoubleEndedIterator, +// { +// let path_iter = path.into_iter(); +// +// if let Some(SubtreeChunkProducerCache { +// current_merk_path, .. +// }) = &self.cache +// { +// if !itertools::equal(current_merk_path, path_iter.clone()) { +// self.cache = None; +// } +// } +// +// if self.cache.is_none() { +// let current_merk = self +// .grove_db +// .open_non_transactional_merk_at_path( +// path_iter.clone().collect::>().as_slice().into(), +// None, +// ) +// .unwrap()?; +// +// if current_merk.root_key().is_none() { +// return Ok(Vec::new()); +// } +// +// self.cache = Some(SubtreeChunkProducerCache { +// current_merk_path: path_iter.map(|p| p.to_vec()).collect(), +// current_merk, +// current_chunk_producer: None, +// }); +// let cache = self.cache.as_mut().expect("exists at this point"); +// cache.current_chunk_producer = Some( +// grovedb_merk::ChunkProducer::new(&cache.current_merk) +// .map_err(|e| Error::CorruptedData(e.to_string()))?, +// ); +// } +// +// self.cache +// .as_mut() +// .expect("must exist at this point") +// .current_chunk_producer +// .as_mut() +// .expect("must exist at this point") +// .chunk(index) +// .map_err(|e| Error::CorruptedData(e.to_string())) +// } +// } +// +// // TODO: make generic over storage_cost context +// type MerkRestorer<'db> = grovedb_merk::Restorer>; +// +// type Path = Vec>; +// +// /// Structure to drive GroveDb restore process. +// pub struct Restorer<'db> { +// current_merk_restorer: Option>, +// current_merk_chunk_index: usize, +// current_merk_path: Path, +// queue: VecDeque<(Path, Vec, Hash, TreeFeatureType)>, +// grove_db: &'db GroveDb, +// tx: &'db Transaction<'db>, +// } +// +// /// Indicates what next piece of information `Restorer` expects or wraps a +// /// successful result. +// #[derive(Debug)] +// pub enum RestorerResponse { +// AwaitNextChunk { path: Vec>, index: usize }, +// Ready, +// } +// +// #[derive(Debug)] +// pub struct RestorerError(String); +// +// impl<'db> Restorer<'db> { +// /// Create a GroveDb restorer using a backing storage_cost and root hash. +// pub fn new( +// grove_db: &'db GroveDb, +// root_hash: Hash, +// tx: &'db Transaction<'db>, +// ) -> Result { +// Ok(Restorer { +// tx, +// current_merk_restorer: Some(MerkRestorer::new( +// Merk::open_base( +// grove_db +// .db +// .get_immediate_storage_context(SubtreePath::empty(), tx) +// .unwrap(), +// false, +// ) +// .unwrap() +// .map_err(|e| RestorerError(e.to_string()))?, +// None, +// root_hash, +// )), +// current_merk_chunk_index: 0, +// current_merk_path: vec![], +// queue: VecDeque::new(), +// grove_db, +// }) +// } +// +// /// Process next chunk and receive instruction on what to do next. +// pub fn process_chunk( +// &mut self, +// chunk_ops: impl IntoIterator, +// ) -> Result { +// if self.current_merk_restorer.is_none() { +// // Last restorer was consumed and no more Merks to process. +// return Ok(RestorerResponse::Ready); +// } +// // First we decode a chunk to take out info about nested trees to add them into +// // todo list. +// let mut ops = Vec::new(); +// for op in chunk_ops { +// ops.push(op); +// match ops.last().expect("just inserted") { +// Op::Push(Node::KVValueHashFeatureType( +// key, +// value_bytes, +// value_hash, +// feature_type, +// )) +// | Op::PushInverted(Node::KVValueHashFeatureType( +// key, +// value_bytes, +// value_hash, +// feature_type, +// )) => { +// if let Element::Tree(root_key, _) | Element::SumTree(root_key, ..) = +// Element::deserialize(value_bytes) +// .map_err(|e| RestorerError(e.to_string()))? +// { +// if root_key.is_none() || self.current_merk_path.last() == Some(key) { +// // We add only subtrees of the current subtree to queue, skipping +// // itself; Also skipping empty Merks. +// continue; +// } +// let mut path = self.current_merk_path.clone(); +// path.push(key.clone()); +// // The value hash is the root tree hash +// self.queue.push_back(( +// path, +// value_bytes.to_owned(), +// *value_hash, +// *feature_type, +// )); +// } +// } +// _ => {} +// } +// } +// +// // Process chunk using Merk's possibilities. +// let remaining = self +// .current_merk_restorer +// .as_mut() +// .expect("restorer exists at this point") +// .process_chunk(ops) +// .map_err(|e| RestorerError(e.to_string()))?; +// +// self.current_merk_chunk_index += 1; +// +// if remaining == 0 { +// // If no more chunks for this Merk required decide if we're done or take a next +// // Merk to process. +// self.current_merk_restorer +// .take() +// .expect("restorer exists at this point") +// .finalize() +// .map_err(|e| RestorerError(e.to_string()))?; +// if let Some((next_path, combining_value, expected_hash, _)) = self.queue.pop_front() { +// // Process next subtree. +// let merk = self +// .grove_db +// .open_merk_for_replication(next_path.as_slice().into(), self.tx) +// .map_err(|e| RestorerError(e.to_string()))?; +// self.current_merk_restorer = Some(MerkRestorer::new( +// merk, +// Some(combining_value), +// expected_hash, +// )); +// self.current_merk_chunk_index = 0; +// self.current_merk_path = next_path; +// +// Ok(RestorerResponse::AwaitNextChunk { +// path: self.current_merk_path.clone(), +// index: self.current_merk_chunk_index, +// }) +// } else { +// Ok(RestorerResponse::Ready) +// } +// } else { +// // Request a chunk at the same path but with incremented index. +// Ok(RestorerResponse::AwaitNextChunk { +// path: self.current_merk_path.clone(), +// index: self.current_merk_chunk_index, +// }) +// } +// } +// } +// +// /// Chunk producer wrapper which uses bigger messages that may include chunks of +// /// requested subtree with its right siblings. +// /// +// /// Because `Restorer` builds GroveDb replica breadth-first way from top to +// /// bottom it makes sense to send a subtree's siblings next instead of its own +// /// subtrees. +// pub struct SiblingsChunkProducer<'db> { +// chunk_producer: SubtreeChunkProducer<'db>, +// } +// +// #[derive(Debug)] +// pub struct GroveChunk { +// subtree_chunks: Vec<(usize, Vec)>, +// } +// +// impl<'db> SiblingsChunkProducer<'db> { +// /// New +// pub fn new(chunk_producer: SubtreeChunkProducer<'db>) -> Self { +// SiblingsChunkProducer { chunk_producer } +// } +// +// /// Get a collection of chunks possibly from different Merks with the first +// /// one as requested. +// pub fn get_chunk<'p, P>(&mut self, path: P, index: usize) -> Result, Error> +// where +// P: IntoIterator, +//

::IntoIter: Clone + DoubleEndedIterator + ExactSizeIterator, +// { +// let path_iter = path.into_iter(); +// let mut result = Vec::new(); +// let mut ops_count = 0; +// +// if path_iter.len() == 0 { +// // We're at the root of GroveDb, no siblings here. +// self.process_subtree_chunks(&mut result, &mut ops_count, empty(), index)?; +// return Ok(result); +// }; +// +// // Get siblings on the right to send chunks of multiple Merks if it meets the +// // limit. +// +// let mut siblings_keys: VecDeque> = VecDeque::new(); +// +// let mut parent_path = path_iter; +// let requested_key = parent_path.next_back(); +// +// let parent_ctx = self +// .chunk_producer +// .grove_db +// .db +// .get_storage_context( +// parent_path.clone().collect::>().as_slice().into(), +// None, +// ) +// .unwrap(); +// let mut siblings_iter = Element::iterator(parent_ctx.raw_iter()).unwrap(); +// +// if let Some(key) = requested_key { +// siblings_iter.fast_forward(key)?; +// } +// +// while let Some(element) = siblings_iter.next_element().unwrap()? { +// if let (key, Element::Tree(..)) | (key, Element::SumTree(..)) = element { +// siblings_keys.push_back(key); +// } +// } +// +// let mut current_index = index; +// // Process each subtree +// while let Some(subtree_key) = siblings_keys.pop_front() { +// #[allow(clippy::map_identity)] +// let subtree_path = parent_path +// .clone() +// .map(|x| x) +// .chain(once(subtree_key.as_slice())); +// +// self.process_subtree_chunks(&mut result, &mut ops_count, subtree_path, current_index)?; +// // Going to a next sibling, should start from 0. +// +// if ops_count >= OPS_PER_CHUNK { +// break; +// } +// current_index = 0; +// } +// +// Ok(result) +// } +// +// /// Process one subtree's chunks +// fn process_subtree_chunks<'p, P>( +// &mut self, +// result: &mut Vec, +// ops_count: &mut usize, +// subtree_path: P, +// from_index: usize, +// ) -> Result<(), Error> +// where +// P: IntoIterator, +//

::IntoIter: Clone + DoubleEndedIterator, +// { +// let path_iter = subtree_path.into_iter(); +// +// let mut current_index = from_index; +// let mut subtree_chunks = Vec::new(); +// +// loop { +// let ops = self +// .chunk_producer +// .get_chunk(path_iter.clone(), current_index)?; +// +// *ops_count += ops.len(); +// subtree_chunks.push((current_index, ops)); +// current_index += 1; +// if current_index >= self.chunk_producer.chunks_in_current_producer() +// || *ops_count >= OPS_PER_CHUNK +// { +// break; +// } +// } +// +// result.push(GroveChunk { subtree_chunks }); +// +// Ok(()) +// } +// } +// +// /// `Restorer` wrapper that applies multiple chunks at once and eventually +// /// returns less requests. It is named by analogy with IO types that do less +// /// syscalls. +// pub struct BufferedRestorer<'db> { +// restorer: Restorer<'db>, +// } +// +// impl<'db> BufferedRestorer<'db> { +// /// New +// pub fn new(restorer: Restorer<'db>) -> Self { +// BufferedRestorer { restorer } +// } +// +// /// Process next chunk and receive instruction on what to do next. +// pub fn process_grove_chunks(&mut self, chunks: I) -> Result +// where +// I: IntoIterator + ExactSizeIterator, +// { +// let mut response = RestorerResponse::Ready; +// +// for c in chunks.into_iter() { +// for ops in c.subtree_chunks.into_iter().map(|x| x.1) { +// if !ops.is_empty() { +// response = self.restorer.process_chunk(ops)?; +// } +// } +// } +// +// Ok(response) +// } +// } +// +// // #[cfg(test)] +// // mod test { +// // use rand::RngCore; +// // use tempfile::TempDir; +// // +// // use super::*; +// // use crate::{ +// // batch::GroveDbOp, +// // reference_path::ReferencePathType, +// // tests::{common::EMPTY_PATH, make_test_grovedb, TempGroveDb, +// // ANOTHER_TEST_LEAF, TEST_LEAF}, }; +// // +// // fn replicate(original_db: &GroveDb) -> TempDir { +// // let replica_tempdir = TempDir::new().unwrap(); +// // +// // { +// // let replica_db = GroveDb::open(replica_tempdir.path()).unwrap(); +// // let mut chunk_producer = original_db.chunks(); +// // let tx = replica_db.start_transaction(); +// // +// // let mut restorer = Restorer::new( +// // &replica_db, +// // original_db.root_hash(None).unwrap().unwrap(), +// // &tx, +// // ) +// // .expect("cannot create restorer"); +// // +// // That means root tree chunk with index 0 +// // let mut next_chunk: (Vec>, usize) = (vec![], 0); +// // +// // loop { +// // let chunk = chunk_producer +// // .get_chunk(next_chunk.0.iter().map(|x| x.as_slice()), next_chunk.1) +// // .expect("cannot get next chunk"); +// // match restorer.process_chunk(chunk).expect("cannot process chunk") { +// // RestorerResponse::Ready => break, +// // RestorerResponse::AwaitNextChunk { path, index } => { +// // next_chunk = (path, index); +// // } +// // } +// // } +// // +// // replica_db.commit_transaction(tx).unwrap().unwrap(); +// // } +// // replica_tempdir +// // } +// // +// // fn replicate_bigger_messages(original_db: &GroveDb) -> TempDir { +// // let replica_tempdir = TempDir::new().unwrap(); +// // +// // { +// // let replica_grove_db = GroveDb::open(replica_tempdir.path()).unwrap(); +// // let mut chunk_producer = SiblingsChunkProducer::new(original_db.chunks()); +// // let tx = replica_grove_db.start_transaction(); +// // +// // let mut restorer = BufferedRestorer::new( +// // Restorer::new( +// // &replica_grove_db, +// // original_db.root_hash(None).unwrap().unwrap(), +// // &tx, +// // ) +// // .expect("cannot create restorer"), +// // ); +// // +// // That means root tree chunk with index 0 +// // let mut next_chunk: (Vec>, usize) = (vec![], 0); +// // +// // loop { +// // let chunks = chunk_producer +// // .get_chunk(next_chunk.0.iter().map(|x| x.as_slice()), next_chunk.1) +// // .expect("cannot get next chunk"); +// // match restorer +// // .process_grove_chunks(chunks.into_iter()) +// // .expect("cannot process chunk") +// // { +// // RestorerResponse::Ready => break, +// // RestorerResponse::AwaitNextChunk { path, index } => { +// // next_chunk = (path, index); +// // } +// // } +// // } +// // +// // replica_grove_db.commit_transaction(tx).unwrap().unwrap(); +// // } +// // +// // replica_tempdir +// // } +// // +// // fn test_replication_internal<'a, I, R, F>( +// // original_db: &TempGroveDb, +// // to_compare: I, +// // replicate_fn: F, +// // ) where +// // R: AsRef<[u8]> + 'a, +// // I: Iterator, +// // F: Fn(&GroveDb) -> TempDir, +// // { +// // let expected_root_hash = original_db.root_hash(None).unwrap().unwrap(); +// // +// // let replica_tempdir = replicate_fn(original_db); +// // +// // let replica = GroveDb::open(replica_tempdir.path()).unwrap(); +// // assert_eq!( +// // replica.root_hash(None).unwrap().unwrap(), +// // expected_root_hash +// // ); +// // +// // for full_path in to_compare { +// // let (key, path) = full_path.split_last().unwrap(); +// // assert_eq!( +// // original_db.get(path, key.as_ref(), None).unwrap().unwrap(), +// // replica.get(path, key.as_ref(), None).unwrap().unwrap() +// // ); +// // } +// // } +// // +// // fn test_replication<'a, I, R>(original_db: &TempGroveDb, to_compare: I) +// // where +// // R: AsRef<[u8]> + 'a, +// // I: Iterator + Clone, +// // { +// // test_replication_internal(original_db, to_compare.clone(), replicate); +// // test_replication_internal(original_db, to_compare, +// // replicate_bigger_messages); } +// // +// // #[test] +// // fn replicate_wrong_root_hash() { +// // let db = make_test_grovedb(); +// // let mut bad_hash = db.root_hash(None).unwrap().unwrap(); +// // bad_hash[0] = bad_hash[0].wrapping_add(1); +// // +// // let tmp_dir = TempDir::new().unwrap(); +// // let restored_db = GroveDb::open(tmp_dir.path()).unwrap(); +// // let tx = restored_db.start_transaction(); +// // let mut restorer = Restorer::new(&restored_db, bad_hash, &tx).unwrap(); +// // let mut chunks = db.chunks(); +// // assert!(restorer +// // .process_chunk(chunks.get_chunk([], 0).unwrap()) +// // .is_err()); +// // } +// // +// // #[test] +// // fn replicate_provide_wrong_tree() { +// // let db = make_test_grovedb(); +// // db.insert( +// // &[TEST_LEAF], +// // b"key1", +// // Element::new_item(b"ayya".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[ANOTHER_TEST_LEAF], +// // b"key1", +// // Element::new_item(b"ayyb".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // +// // let expected_hash = db.root_hash(None).unwrap().unwrap(); +// // +// // let tmp_dir = TempDir::new().unwrap(); +// // let restored_db = GroveDb::open(tmp_dir.path()).unwrap(); +// // let tx = restored_db.start_transaction(); +// // let mut restorer = Restorer::new(&restored_db, expected_hash, &tx).unwrap(); +// // let mut chunks = db.chunks(); +// // +// // let next_op = restorer +// // .process_chunk(chunks.get_chunk([], 0).unwrap()) +// // .unwrap(); +// // match next_op { +// // RestorerResponse::AwaitNextChunk { path, index } => { +// // Feed restorer a wrong Merk! +// // let chunk = if path == [TEST_LEAF] { +// // chunks.get_chunk([ANOTHER_TEST_LEAF], index).unwrap() +// // } else { +// // chunks.get_chunk([TEST_LEAF], index).unwrap() +// // }; +// // assert!(restorer.process_chunk(chunk).is_err()); +// // } +// // _ => {} +// // } +// // } +// // +// // #[test] +// // fn replicate_nested_grovedb() { +// // let db = make_test_grovedb(); +// // db.insert( +// // &[TEST_LEAF], +// // b"key1", +// // Element::new_item(b"ayya".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[TEST_LEAF], +// // b"key2", +// // Element::new_reference(ReferencePathType::SiblingReference(b"key1". +// // to_vec())), None, +// // None, +// // ) +// // .unwrap() +// // .expect("should insert reference"); +// // db.insert( +// // &[ANOTHER_TEST_LEAF], +// // b"key2", +// // Element::empty_tree(), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[ANOTHER_TEST_LEAF, b"key2"], +// // b"key3", +// // Element::empty_tree(), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[ANOTHER_TEST_LEAF, b"key2", b"key3"], +// // b"key4", +// // Element::new_item(b"ayyb".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // +// // let to_compare = [ +// // [TEST_LEAF].as_ref(), +// // [TEST_LEAF, b"key1"].as_ref(), +// // [TEST_LEAF, b"key2"].as_ref(), +// // [ANOTHER_TEST_LEAF].as_ref(), +// // [ANOTHER_TEST_LEAF, b"key2"].as_ref(), +// // [ANOTHER_TEST_LEAF, b"key2", b"key3"].as_ref(), +// // [ANOTHER_TEST_LEAF, b"key2", b"key3", b"key4"].as_ref(), +// // ]; +// // test_replication(&db, to_compare.into_iter()); +// // } +// // +// // #[test] +// // fn replicate_nested_grovedb_with_sum_trees() { +// // let db = make_test_grovedb(); +// // db.insert( +// // &[TEST_LEAF], +// // b"key1", +// // Element::new_item(b"ayya".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[TEST_LEAF], +// // b"key2", +// // Element::new_reference(ReferencePathType::SiblingReference(b"key1". +// // to_vec())), None, +// // None, +// // ) +// // .unwrap() +// // .expect("should insert reference"); +// // db.insert( +// // &[ANOTHER_TEST_LEAF], +// // b"key2", +// // Element::empty_sum_tree(), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[ANOTHER_TEST_LEAF, b"key2"], +// // b"sumitem", +// // Element::new_sum_item(15), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[ANOTHER_TEST_LEAF, b"key2"], +// // b"key3", +// // Element::empty_tree(), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[ANOTHER_TEST_LEAF, b"key2", b"key3"], +// // b"key4", +// // Element::new_item(b"ayyb".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // +// // let to_compare = [ +// // [TEST_LEAF].as_ref(), +// // [TEST_LEAF, b"key1"].as_ref(), +// // [TEST_LEAF, b"key2"].as_ref(), +// // [ANOTHER_TEST_LEAF].as_ref(), +// // [ANOTHER_TEST_LEAF, b"key2"].as_ref(), +// // [ANOTHER_TEST_LEAF, b"key2", b"sumitem"].as_ref(), +// // [ANOTHER_TEST_LEAF, b"key2", b"key3"].as_ref(), +// // [ANOTHER_TEST_LEAF, b"key2", b"key3", b"key4"].as_ref(), +// // ]; +// // test_replication(&db, to_compare.into_iter()); +// // } +// // +// // TODO: Highlights a bug in replication +// // #[test] +// // fn replicate_grovedb_with_sum_tree() { +// // let db = make_test_grovedb(); +// // db.insert(&[TEST_LEAF], b"key1", Element::empty_tree(), None, None) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[TEST_LEAF, b"key1"], +// // b"key2", +// // Element::new_item(vec![4]), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[TEST_LEAF, b"key1"], +// // b"key3", +// // Element::new_item(vec![10]), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // +// // let to_compare = [ +// // [TEST_LEAF].as_ref(), +// // [ANOTHER_TEST_LEAF].as_ref(), +// // [TEST_LEAF, b"key1"].as_ref(), +// // [TEST_LEAF, b"key1", b"key2"].as_ref(), +// // [TEST_LEAF, b"key1", b"key3"].as_ref(), +// // ]; +// // test_replication(&db, to_compare.into_iter()); +// // } +// // +// // #[test] +// // fn replicate_a_big_one() { +// // const HEIGHT: usize = 3; +// // const SUBTREES_FOR_EACH: usize = 3; +// // const SCALARS_FOR_EACH: usize = 600; +// // +// // let db = make_test_grovedb(); +// // let mut to_compare = Vec::new(); +// // +// // let mut rng = rand::thread_rng(); +// // let mut subtrees: VecDeque> = VecDeque::new(); +// // +// // Generate root tree leafs +// // for _ in 0..SUBTREES_FOR_EACH { +// // let mut bytes = [0; 8]; +// // rng.fill_bytes(&mut bytes); +// // db.insert(EMPTY_PATH, &bytes, Element::empty_tree(), None, None) +// // .unwrap() +// // .unwrap(); +// // subtrees.push_front(vec![bytes]); +// // to_compare.push(vec![bytes]); +// // } +// // +// // while let Some(path) = subtrees.pop_front() { +// // let mut batch = Vec::new(); +// // +// // if path.len() < HEIGHT { +// // for _ in 0..SUBTREES_FOR_EACH { +// // let mut bytes = [0; 8]; +// // rng.fill_bytes(&mut bytes); +// // +// // batch.push(GroveDbOp::insert_op( +// // path.iter().map(|x| x.to_vec()).collect(), +// // bytes.to_vec(), +// // Element::empty_tree(), +// // )); +// // +// // let mut new_path = path.clone(); +// // new_path.push(bytes); +// // subtrees.push_front(new_path.clone()); +// // to_compare.push(new_path.clone()); +// // } +// // } +// // +// // for _ in 0..SCALARS_FOR_EACH { +// // let mut bytes = [0; 8]; +// // let mut bytes_val = vec![]; +// // rng.fill_bytes(&mut bytes); +// // rng.fill_bytes(&mut bytes_val); +// // +// // batch.push(GroveDbOp::insert_op( +// // path.iter().map(|x| x.to_vec()).collect(), +// // bytes.to_vec(), +// // Element::new_item(bytes_val), +// // )); +// // +// // let mut new_path = path.clone(); +// // new_path.push(bytes); +// // to_compare.push(new_path.clone()); +// // } +// // +// // db.apply_batch(batch, None, None).unwrap().unwrap(); +// // } +// // +// // test_replication(&db, to_compare.iter().map(|x| x.as_slice())); +// // } +// // +// // #[test] +// // fn replicate_from_checkpoint() { +// // Create a simple GroveDb first +// // let db = make_test_grovedb(); +// // db.insert( +// // &[TEST_LEAF], +// // b"key1", +// // Element::new_item(b"ayya".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .unwrap(); +// // db.insert( +// // &[ANOTHER_TEST_LEAF], +// // b"key2", +// // Element::new_item(b"ayyb".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .unwrap(); +// // +// // Save its state with checkpoint +// // let checkpoint_dir_parent = TempDir::new().unwrap(); +// // let checkpoint_dir = checkpoint_dir_parent.path().join("cp"); +// // db.create_checkpoint(&checkpoint_dir).unwrap(); +// // +// // Alter the db to make difference between current state and checkpoint +// // db.delete(&[TEST_LEAF], b"key1", None, None) +// // .unwrap() +// // .unwrap(); +// // db.insert( +// // &[TEST_LEAF], +// // b"key3", +// // Element::new_item(b"ayyd".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .unwrap(); +// // db.insert( +// // &[ANOTHER_TEST_LEAF], +// // b"key2", +// // Element::new_item(b"ayyc".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .unwrap(); +// // +// // let checkpoint_db = GroveDb::open(&checkpoint_dir).unwrap(); +// // +// // Ensure checkpoint differs from current state +// // assert_ne!( +// // checkpoint_db +// // .get(&[ANOTHER_TEST_LEAF], b"key2", None) +// // .unwrap() +// // .unwrap(), +// // db.get(&[ANOTHER_TEST_LEAF], b"key2", None) +// // .unwrap() +// // .unwrap(), +// // ); +// // +// // Build a replica from checkpoint +// // let replica_dir = replicate(&checkpoint_db); +// // let replica_db = GroveDb::open(&replica_dir).unwrap(); +// // +// // assert_eq!( +// // checkpoint_db.root_hash(None).unwrap().unwrap(), +// // replica_db.root_hash(None).unwrap().unwrap() +// // ); +// // +// // assert_eq!( +// // checkpoint_db +// // .get(&[TEST_LEAF], b"key1", None) +// // .unwrap() +// // .unwrap(), +// // replica_db +// // .get(&[TEST_LEAF], b"key1", None) +// // .unwrap() +// // .unwrap(), +// // ); +// // assert_eq!( +// // checkpoint_db +// // .get(&[ANOTHER_TEST_LEAF], b"key2", None) +// // .unwrap() +// // .unwrap(), +// // replica_db +// // .get(&[ANOTHER_TEST_LEAF], b"key2", None) +// // .unwrap() +// // .unwrap(), +// // ); +// // assert!(matches!( +// // replica_db.get(&[TEST_LEAF], b"key3", None).unwrap(), +// // Err(Error::PathKeyNotFound(_)) +// // )); +// // +// // Drop original db and checkpoint dir too to ensure there is no dependency +// // drop(db); +// // drop(checkpoint_db); +// // drop(checkpoint_dir); +// // +// // assert_eq!( +// // replica_db +// // .get(&[ANOTHER_TEST_LEAF], b"key2", None) +// // .unwrap() +// // .unwrap(), +// // Element::new_item(b"ayyb".to_vec()) +// // ); +// // } +// // } diff --git a/merk/src/error.rs b/merk/src/error.rs index 96717391..7581ba6f 100644 --- a/merk/src/error.rs +++ b/merk/src/error.rs @@ -59,6 +59,10 @@ pub enum Error { #[error("corrupted code execution error {0}")] CorruptedCodeExecution(&'static str), + /// Corrupted state + #[error("corrupted state: {0}")] + CorruptedState(&'static str), + /// Chunking error #[error("chunking error {0}")] ChunkingError(ChunkError), diff --git a/merk/src/lib.rs b/merk/src/lib.rs index b780b6f4..5a858dfc 100644 --- a/merk/src/lib.rs +++ b/merk/src/lib.rs @@ -38,7 +38,7 @@ extern crate core; mod merk; #[cfg(feature = "full")] -pub use crate::merk::{chunks::ChunkProducer, options::MerkOptions, restore::Restorer}; +pub use crate::merk::{chunks::ChunkProducer, options::MerkOptions}; /// Provides a container type that allows temporarily taking ownership of a /// value. diff --git a/merk/src/merk/chunks.rs b/merk/src/merk/chunks.rs index 0df9655a..51521ced 100644 --- a/merk/src/merk/chunks.rs +++ b/merk/src/merk/chunks.rs @@ -26,475 +26,1047 @@ // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. -//! Provides `ChunkProducer`, which creates chunk proofs for full replication of -//! a Merk. +use std::{ + cmp::max, + collections::{LinkedList, VecDeque}, + path::Iter, +}; -#[cfg(feature = "full")] -use grovedb_costs::CostsExt; -#[cfg(feature = "full")] -use grovedb_storage::{RawIterator, StorageContext}; +use ed::Encode; +use grovedb_costs::{CostResult, CostsExt, OperationCost}; +use grovedb_storage::StorageContext; +use integer_encoding::VarInt; -#[cfg(feature = "full")] -use super::Merk; -#[cfg(feature = "full")] use crate::{ error::Error, - proofs::{chunk::get_next_chunk, Node, Op}, + proofs::{ + chunk::{ + chunk_op::ChunkOp, + error::{ChunkError, ChunkError::InternalError}, + util::{ + chunk_height, chunk_id_from_traversal_instruction, + chunk_id_from_traversal_instruction_with_recovery, generate_traversal_instruction, + generate_traversal_instruction_as_string, number_of_chunks, + string_as_traversal_instruction, traversal_instruction_as_string, write_to_vec, + }, + }, + Node, Op, + }, + tree::RefWalker, + Error::ChunkingError, + Merk, PanicSource, }; -#[cfg(feature = "full")] +/// ChunkProof for replication of a single subtree +#[derive(Debug)] +pub struct SubtreeChunk { + chunk: Vec, + next_index: Option, + remaining_limit: Option, +} + +impl SubtreeChunk { + pub fn new(chunk: Vec, next_index: Option, remaining_limit: Option) -> Self { + Self { + chunk, + next_index, + remaining_limit, + } + } +} + +/// ChunkProof for the replication of multiple subtrees. +#[derive(Debug)] +pub struct MultiChunk { + pub chunk: Vec, + pub next_index: Option, + pub remaining_limit: Option, +} + +impl MultiChunk { + pub fn new( + chunk: Vec, + next_index: Option, + remaining_limit: Option, + ) -> Self { + Self { + chunk, + next_index, + remaining_limit, + } + } +} + /// A `ChunkProducer` allows the creation of chunk proofs, used for trustlessly /// replicating entire Merk trees. Chunks can be generated on the fly in a /// random order, or iterated in order for slightly better performance. -pub struct ChunkProducer<'db, S: StorageContext<'db>> { - trunk: Vec, - chunk_boundaries: Vec>, - raw_iter: S::RawIterator, +pub struct ChunkProducer<'db, S> { + /// Represents the max height of the Merk tree + height: usize, + /// Represents the index of the next chunk index: usize, + merk: &'db Merk, } -#[cfg(feature = "full")] impl<'db, S> ChunkProducer<'db, S> where S: StorageContext<'db>, { - /// Creates a new `ChunkProducer` for the given `Merk` instance. In the - /// constructor, the first chunk (the "trunk") will be created. - pub fn new(merk: &Merk) -> Result { - let (trunk, has_more) = merk - .walk(|maybe_walker| match maybe_walker { - Some(mut walker) => walker.create_trunk_proof(), - None => Ok((vec![], false)).wrap_with_cost(Default::default()), - }) - .unwrap()?; - - let chunk_boundaries = if has_more { - trunk - .iter() - .filter_map(|op| match op { - Op::Push(Node::KVValueHashFeatureType(key, ..)) => Some(key.clone()), - _ => None, - }) - .collect() - } else { - vec![] - }; - - let mut raw_iter = merk.storage.raw_iter(); - raw_iter.seek_to_first().unwrap(); - - Ok(ChunkProducer { - trunk, - chunk_boundaries, - raw_iter, - index: 0, + /// Creates a new `ChunkProducer` for the given `Merk` instance + pub(crate) fn new(merk: &'db Merk) -> Result { + let tree_height = merk + .height() + .ok_or(Error::ChunkingError(ChunkError::EmptyTree( + "cannot create chunk producer for empty Merk", + )))?; + Ok(Self { + height: tree_height as usize, + index: 1, + merk, }) } /// Gets the chunk with the given index. Errors if the index is out of /// bounds or the tree is empty - the number of chunks can be checked by /// calling `producer.len()`. - pub fn chunk(&mut self, index: usize) -> Result, Error> { - if index >= self.len() { - return Err(Error::OldChunkingError("Chunk index out-of-bounds")); + pub fn chunk_with_index( + &mut self, + chunk_index: usize, + ) -> Result<(Vec, Option), Error> { + let traversal_instructions = generate_traversal_instruction(self.height, chunk_index)?; + self.chunk_internal(chunk_index, traversal_instructions) + } + + /// Returns the chunk at a given chunk id. + pub fn chunk(&mut self, chunk_id: &str) -> Result<(Vec, Option), Error> { + let traversal_instructions = string_as_traversal_instruction(chunk_id)?; + let chunk_index = chunk_id_from_traversal_instruction_with_recovery( + traversal_instructions.as_slice(), + self.height, + )?; + let (chunk, next_index) = self.chunk_internal(chunk_index, traversal_instructions)?; + let index_string = next_index + .map(|index| generate_traversal_instruction_as_string(self.height, index)) + .transpose()?; + Ok((chunk, index_string)) + } + + /// Returns the chunk at the given index + /// Assumes index and traversal_instructions represents the same information + fn chunk_internal( + &mut self, + index: usize, + traversal_instructions: Vec, + ) -> Result<(Vec, Option), Error> { + // ensure that the chunk index is within bounds + let max_chunk_index = self.len(); + if index < 1 || index > max_chunk_index { + return Err(ChunkingError(ChunkError::OutOfBounds( + "chunk index out of bounds", + ))); } - self.index = index; + self.index = index + 1; - if index == 0 || index == 1 { - self.raw_iter.seek_to_first().unwrap(); + let chunk_height = chunk_height(self.height, index).unwrap(); + + let chunk = self.merk.walk(|maybe_walker| match maybe_walker { + Some(mut walker) => { + walker.traverse_and_build_chunk(&traversal_instructions, chunk_height) + } + None => Err(Error::ChunkingError(ChunkError::EmptyTree( + "cannot create chunk producer for empty Merk", + ))), + })?; + + // now we need to return the next index + // how do we know if we should return some or none + if self.index > max_chunk_index { + Ok((chunk, None)) } else { - let preceding_key = self.chunk_boundaries.get(index - 2).unwrap(); - self.raw_iter.seek(preceding_key).unwrap(); - self.raw_iter.next().unwrap(); + Ok((chunk, Some(self.index))) } + } - self.next_chunk() + /// Generate multichunk with chunk id + /// Multichunks accumulate as many chunks as they can until they have all + /// chunks or hit some optional limit + pub fn multi_chunk_with_limit( + &mut self, + chunk_id: &str, + limit: Option, + ) -> Result { + // we want to convert the chunk id to the index + let chunk_index = string_as_traversal_instruction(chunk_id).and_then(|instruction| { + chunk_id_from_traversal_instruction(instruction.as_slice(), self.height) + })?; + self.multi_chunk_with_limit_and_index(chunk_index, limit) } - /// Returns the total number of chunks for the underlying Merk tree. - #[allow(clippy::len_without_is_empty)] - pub fn len(&self) -> usize { - let boundaries_len = self.chunk_boundaries.len(); - if boundaries_len == 0 { - 1 - } else { - boundaries_len + 2 + /// Generate multichunk with chunk index + /// Multichunks accumulate as many chunks as they can until they have all + /// chunks or hit some optional limit + pub fn multi_chunk_with_limit_and_index( + &mut self, + index: usize, + limit: Option, + ) -> Result { + // TODO: what happens if the vec is filled? + // we need to have some kind of hardhoc limit value if none is supplied. + // maybe we can just do something with the length to fix this? + let mut chunk = vec![]; + + let mut current_index = Some(index); + let mut current_limit = limit; + + // generate as many subtree chunks as we can + // until we have exhausted all or hit a limit restriction + while current_index != None { + let current_index_traversal_instruction = generate_traversal_instruction( + self.height, + current_index.expect("confirmed is Some"), + )?; + let chunk_id_op = ChunkOp::ChunkId(current_index_traversal_instruction); + + // factor in the ChunkId encoding length in limit calculations + let temp_limit = if let Some(limit) = current_limit { + let chunk_id_op_encoding_len = chunk_id_op.encoding_length().map_err(|e| { + Error::ChunkingError(ChunkError::InternalError("cannot get encoding length")) + })?; + if limit >= chunk_id_op_encoding_len { + Some(limit - chunk_id_op_encoding_len) + } else { + Some(0) + } + } else { + None + }; + + let subtree_multi_chunk_result = self.subtree_multi_chunk_with_limit( + current_index.expect("confirmed is not None"), + temp_limit, + ); + + let limit_too_small_error = matches!( + subtree_multi_chunk_result, + Err(ChunkingError(ChunkError::LimitTooSmall(..))) + ); + + if limit_too_small_error { + if chunk.is_empty() { + // no progress, return limit too small error + return Err(Error::ChunkingError(ChunkError::LimitTooSmall( + "limit too small for initial chunk", + ))); + } else { + // made progress, send accumulated chunk + break; + } + } + + let subtree_multi_chunk = subtree_multi_chunk_result?; + + chunk.push(chunk_id_op); + chunk.push(ChunkOp::Chunk(subtree_multi_chunk.chunk)); + + // update loop parameters + current_index = subtree_multi_chunk.next_index; + current_limit = subtree_multi_chunk.remaining_limit; } + + let index_string = current_index + .map(|index| generate_traversal_instruction_as_string(self.height, index)) + .transpose()?; + + Ok(MultiChunk::new(chunk, index_string, current_limit)) } - /// Gets the next chunk based on the `ChunkProducer`'s internal index state. - /// This is mostly useful for letting `ChunkIter` yield the chunks in order, - /// optimizing throughput compared to random access. - fn next_chunk(&mut self) -> Result, Error> { - if self.index == 0 { - if self.trunk.is_empty() { - return Err(Error::OldChunkingError( - "Attempted to fetch chunk on empty tree", - )); + /// Packs as many chunks as it can from a starting chunk index, into a + /// vector. Stops when we have exhausted all chunks or we have reached + /// some limit. + fn subtree_multi_chunk_with_limit( + &mut self, + index: usize, + limit: Option, + ) -> Result { + let mut chunk_byte_length = 0; + + let max_chunk_index = number_of_chunks(self.height); + let mut chunk_index = index; + + // we first get the chunk at the given index + // TODO: use the returned chunk index rather than tracking + let (chunk_ops, _) = self.chunk_with_index(chunk_index)?; + chunk_byte_length = chunk_ops.encoding_length().map_err(|e| { + Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) + })?; + chunk_index += 1; + + let mut chunk = VecDeque::from(chunk_ops); + + // ensure the limit is not less than first chunk byte length + // if it is we can't proceed and didn't make progress so we return an error + if let Some(limit) = limit { + if chunk_byte_length > limit { + return Err(Error::ChunkingError(ChunkError::LimitTooSmall( + "limit too small for initial chunk", + ))); } - self.index += 1; - return Ok(self.trunk.clone()); } - if self.index >= self.len() { - panic!("Called next_chunk after end"); + let mut iteration_index = 0; + while iteration_index < chunk.len() { + // we only perform replacements on Hash nodes + if matches!(chunk[iteration_index], Op::Push(Node::Hash(..))) { + // TODO: use the returned chunk index rather than tracking + let (replacement_chunk, _) = self.chunk_with_index(chunk_index)?; + + // calculate the new total + let new_total = replacement_chunk.encoding_length().map_err(|e| { + Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) + })? + chunk_byte_length + - chunk[iteration_index].encoding_length().map_err(|e| { + Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) + })?; + + // verify that this chunk doesn't make use exceed the limit + if let Some(limit) = limit { + if new_total > limit { + let next_index = match chunk_index > max_chunk_index { + true => None, + _ => Some(chunk_index), + }; + + return Ok(SubtreeChunk::new( + chunk.into(), + next_index, + Some(limit - chunk_byte_length), + )); + } + } + + chunk_byte_length = new_total; + chunk_index += 1; + + chunk.remove(iteration_index); + for op in replacement_chunk.into_iter().rev() { + chunk.insert(iteration_index, op); + } + } else { + iteration_index += 1; + } } - let end_key = self.chunk_boundaries.get(self.index - 1); - let end_key_slice = end_key.as_ref().map(|k| k.as_slice()); + let remaining_limit = limit.map(|l| l - chunk_byte_length); + let next_index = match chunk_index > max_chunk_index { + true => None, + _ => Some(chunk_index), + }; - self.index += 1; + Ok(SubtreeChunk::new(chunk.into(), next_index, remaining_limit)) + } - get_next_chunk(&mut self.raw_iter, end_key_slice).unwrap() + /// Returns the total number of chunks for the underlying Merk tree. + pub fn len(&self) -> usize { + number_of_chunks(self.height as usize) } -} -#[cfg(feature = "full")] -impl<'db, S> IntoIterator for ChunkProducer<'db, S> -where - S: StorageContext<'db>, -{ - type IntoIter = ChunkIter<'db, S>; - type Item = as Iterator>::Item; + /// Gets the next chunk based on the `ChunkProducer`'s internal index state. + /// This is mostly useful for letting `ChunkIter` yield the chunks in order, + /// optimizing throughput compared to random access. + // TODO: this is not better than random access, as we are not keeping state + // that will make this more efficient, decide if this should be fixed or not + fn next_chunk(&mut self) -> Option, Option), Error>> { + let max_index = number_of_chunks(self.height); + if self.index > max_index { + return None; + } + + // get the chunk at the given index + // return the next index as a string + Some( + self.chunk_with_index(self.index) + .and_then(|(chunk, chunk_index)| { + chunk_index + .map(|index| generate_traversal_instruction_as_string(self.height, index)) + .transpose() + .and_then(|v| Ok((chunk, v))) + }), + ) + } - fn into_iter(self) -> Self::IntoIter { - ChunkIter(self) + // TODO: test this logic out + fn get_chunk_encoding_length(chunk: &[Op]) -> usize { + // TODO: deal with error + chunk + .iter() + .fold(0, |sum, op| sum + op.encoding_length().unwrap()) } } -#[cfg(feature = "full")] -/// A `ChunkIter` iterates through all the chunks for the underlying `Merk` -/// instance in order (the first chunk is the "trunk" chunk). Yields `None` -/// after all chunks have been yielded. -pub struct ChunkIter<'db, S>(ChunkProducer<'db, S>) -where - S: StorageContext<'db>; - -#[cfg(feature = "full")] -impl<'db, S> Iterator for ChunkIter<'db, S> +/// Iterate over each chunk, returning `None` after last chunk +impl<'db, S> Iterator for ChunkProducer<'db, S> where S: StorageContext<'db>, { - type Item = Result, Error>; - - fn size_hint(&self) -> (usize, Option) { - (self.0.len(), Some(self.0.len())) - } + type Item = Result<(Vec, Option), Error>; fn next(&mut self) -> Option { - if self.0.index >= self.0.len() { - None - } else { - Some(self.0.next_chunk()) - } + self.next_chunk() } } -#[cfg(feature = "full")] impl<'db, S> Merk where S: StorageContext<'db>, { /// Creates a `ChunkProducer` which can return chunk proofs for replicating /// the entire Merk tree. - pub fn chunks_old(&self) -> Result, Error> { + pub fn chunks(&'db self) -> Result, Error> { ChunkProducer::new(self) } } -#[cfg(feature = "full")] #[cfg(test)] -mod tests { - use grovedb_path::SubtreePath; - use grovedb_storage::{rocksdb_storage::RocksDbStorage, Storage, StorageBatch}; - use tempfile::TempDir; - +mod test { use super::*; use crate::{ - proofs::chunk::{verify_leaf, verify_trunk}, - test_utils::*, + proofs::{ + chunk::chunk::{ + tests::{traverse_get_kv_feature_type, traverse_get_node_hash}, + LEFT, RIGHT, + }, + tree::execute, + Tree, + }, + test_utils::{make_batch_seq, TempMerk}, }; - #[test] - fn len_small() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..256); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); - merk.commit(); + #[derive(Default)] + struct NodeCounts { + hash: usize, + kv_hash: usize, + kv: usize, + kv_value_hash: usize, + kv_digest: usize, + kv_ref_value_hash: usize, + kv_value_hash_feature_type: usize, + } - let chunks = merk.chunks_old().unwrap(); - assert_eq!(chunks.len(), 1); - assert_eq!(chunks.into_iter().size_hint().0, 1); + impl NodeCounts { + fn sum(&self) -> usize { + return self.hash + + self.kv_hash + + self.kv + + self.kv_value_hash + + self.kv_digest + + self.kv_ref_value_hash + + self.kv_value_hash_feature_type; + } + } + + fn count_node_types(tree: Tree) -> NodeCounts { + let mut counts = NodeCounts::default(); + + tree.visit_nodes(&mut |node| { + match node { + Node::Hash(_) => counts.hash += 1, + Node::KVHash(_) => counts.kv_hash += 1, + Node::KV(..) => counts.kv += 1, + Node::KVValueHash(..) => counts.kv_value_hash += 1, + Node::KVDigest(..) => counts.kv_digest += 1, + Node::KVRefValueHash(..) => counts.kv_ref_value_hash += 1, + Node::KVValueHashFeatureType(..) => counts.kv_value_hash_feature_type += 1, + }; + }); + + counts } #[test] - fn len_big() { + fn test_merk_chunk_len() { + // Tree of height 5 - max of 31 elements, min of 16 elements + // 5 will be broken into 2 layers = [3, 2] + // exit nodes from first layer = 2^3 = 8 + // total_chunk = 1 + 8 = 9 chunks let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..10_000); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); - merk.commit(); + let batch = make_batch_seq(0..20); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(5)); + let chunk_producer = ChunkProducer::new(&merk).unwrap(); + assert_eq!(chunk_producer.len(), 9); - let chunks = merk.chunks_old().unwrap(); - assert_eq!(chunks.len(), 129); - assert_eq!(chunks.into_iter().size_hint().0, 129); + // Tree of height 10 - max of 1023 elements, min of 512 elements + // 4 layers -> [3,3,2,2] + // chunk_count_per_layer -> [1, 8, 64, 256] + // total = 341 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..1000); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(10)); + let chunk_producer = ChunkProducer::new(&merk).unwrap(); + assert_eq!(chunk_producer.len(), 329); } #[test] - fn generate_and_verify_chunks() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..10_000); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); - merk.commit(); + fn test_chunk_producer_iter() { + // tree with height 4 + // full tree + // 7 + // / \ + // 3 11 + // / \ / \ + // 1 5 9 13 + // / \ / \ / \ / \ + // 0 2 4 6 8 10 12 14 + // going to be broken into [2, 2] + // that's a total of 5 chunks - let mut chunks = merk.chunks_old().unwrap().into_iter().map(|x| x.unwrap()); + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); - let chunk = chunks.next().unwrap(); - let (trunk, height) = verify_trunk(chunk.into_iter().map(Ok)).unwrap().unwrap(); - assert_eq!(height, 14); - assert_eq!(trunk.hash().unwrap(), merk.root_hash().unwrap()); + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - assert_eq!(trunk.layer(7).count(), 128); + // build iterator from first chunk producer + let mut chunks = merk.chunks().expect("should return producer"); - for (ops, node) in chunks.zip(trunk.layer(height / 2)) { - verify_leaf(ops.into_iter().map(Ok), node.hash().unwrap()) - .unwrap() - .unwrap(); + // ensure that the chunks gotten from the iterator is the same + // as that from the chunk producer + for i in 1..=5 { + assert_eq!( + chunks.next().unwrap().unwrap().0, + chunk_producer.chunk_with_index(i).unwrap().0 + ); } + + // returns None after max + assert_eq!(chunks.next().is_none(), true); } #[test] - fn chunks_from_reopen() { - let tmp_dir = TempDir::new().expect("cannot create tempdir"); - let original_chunks = { - let storage = RocksDbStorage::default_rocksdb_with_path(tmp_dir.path()) - .expect("cannot open rocksdb storage"); - let batch = StorageBatch::new(); - let mut merk = Merk::open_base( - storage - .get_storage_context(SubtreePath::empty(), Some(&batch)) - .unwrap(), - false, - ) + fn test_random_chunk_access() { + // tree with height 4 + // full tree + // 7 + // / \ + // 3 11 + // / \ / \ + // 1 5 9 13 + // / \ / \ / \ / \ + // 0 2 4 6 8 10 12 14 + // going to be broken into [2, 2] + // that's a total of 5 chunks + + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) .unwrap() - .unwrap(); - let merk_batch = make_batch_seq(1..10); - merk.apply::<_, Vec<_>>(&merk_batch, &[], None) - .unwrap() - .unwrap(); + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); - storage - .commit_multi_context_batch(batch, None) - .unwrap() - .expect("cannot commit batch"); + let mut inner_tree = merk.tree.take().expect("has inner tree"); + merk.tree.set(Some(inner_tree.clone())); - let merk = Merk::open_base( - storage - .get_storage_context(SubtreePath::empty(), None) - .unwrap(), - false, - ) - .unwrap() - .unwrap(); + // TODO: should I be using panic source? + let mut tree_walker = RefWalker::new(&mut inner_tree, PanicSource {}); - merk.chunks_old() - .unwrap() - .into_iter() - .map(|x| x.unwrap()) - .collect::>() - .into_iter() - }; - let storage = RocksDbStorage::default_rocksdb_with_path(tmp_dir.path()) - .expect("cannot open rocksdb storage"); - let merk = Merk::open_base( - storage - .get_storage_context(SubtreePath::empty(), None) - .unwrap(), - false, - ) - .unwrap() - .unwrap(); - let reopen_chunks = merk.chunks_old().unwrap().into_iter().map(|x| x.unwrap()); + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + assert_eq!(chunk_producer.len(), 5); - for (original, checkpoint) in original_chunks.zip(reopen_chunks) { - assert_eq!(original.len(), checkpoint.len()); - } - } + // assert bounds + assert_eq!(chunk_producer.chunk_with_index(0).is_err(), true); + assert_eq!(chunk_producer.chunk_with_index(6).is_err(), true); - // #[test] - // fn chunks_from_checkpoint() { - // let mut merk = TempMerk::new(); - // let batch = make_batch_seq(1..10); - // merk.apply(batch.as_slice(), &[]).unwrap(); + // first chunk + // expected: + // 7 + // / \ + // 3 11 + // / \ / \ + // H(1) H(5) H(9) H(13) + let (chunk, next_chunk) = chunk_producer + .chunk_with_index(1) + .expect("should generate chunk"); + assert_eq!(chunk.len(), 13); + assert_eq!(next_chunk, Some(2)); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, LEFT])), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[LEFT])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT])), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[RIGHT])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT, RIGHT])), + Op::Child, + Op::Child + ] + ); - // let path: std::path::PathBuf = - // "generate_and_verify_chunks_from_checkpoint.db".into(); if path. - // exists() { std::fs::remove_dir_all(&path).unwrap(); - // } - // let checkpoint = merk.checkpoint(&path).unwrap(); + // second chunk + // expected: + // 1 + // / \ + // 0 2 + let (chunk, next_chunk) = chunk_producer + .chunk_with_index(2) + .expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!(next_chunk, Some(3)); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT, RIGHT] + )), + Op::Child + ] + ); - // let original_chunks = - // merk.chunks().unwrap().into_iter().map(Result::unwrap); - // let checkpoint_chunks = - // checkpoint.chunks().unwrap().into_iter().map(Result::unwrap); + // third chunk + // expected: + // 5 + // / \ + // 4 6 + let (chunk, next_chunk) = chunk_producer + .chunk_with_index(3) + .expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!(next_chunk, Some(4)); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT, RIGHT] + )), + Op::Child + ] + ); - // for (original, checkpoint) in original_chunks.zip(checkpoint_chunks) { - // assert_eq!(original.len(), checkpoint.len()); - // } + // third chunk + // expected: + // 9 + // / \ + // 8 10 + let (chunk, next_chunk) = chunk_producer + .chunk_with_index(4) + .expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!(next_chunk, Some(5)); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child + ] + ); - // std::fs::remove_dir_all(&path).unwrap(); - // } + // third chunk + // expected: + // 13 + // / \ + // 12 14 + let (chunk, next_chunk) = chunk_producer + .chunk_with_index(5) + .expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!(next_chunk, None); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT, RIGHT] + )), + Op::Child + ] + ); + } #[test] - fn random_access_chunks() { + fn test_subtree_chunk_no_limit() { + // tree of height 4 + // 5 chunks let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..111); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + // generate multi chunk with no limit + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, None) + .expect("should generate chunk with limit"); - let chunks = merk - .chunks_old() + assert_eq!(chunk_result.remaining_limit, None); + assert_eq!(chunk_result.next_index, None); + + let tree = execute(chunk_result.chunk.into_iter().map(Ok), false, |_| Ok(())) .unwrap() - .into_iter() - .map(|x| x.unwrap()) - .collect::>(); - - let mut producer = merk.chunks_old().unwrap(); - for i in 0..chunks.len() * 2 { - let index = i % chunks.len(); - assert_eq!(producer.chunk(index).unwrap(), chunks[index]); - } + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + // assert that all nodes are of type kv_value_hash_feature_type + let node_counts = count_node_types(tree); + assert_eq!(node_counts.hash, 0); + assert_eq!(node_counts.kv_hash, 0); + assert_eq!(node_counts.kv, 0); + assert_eq!(node_counts.kv_value_hash, 0); + assert_eq!(node_counts.kv_digest, 0); + assert_eq!(node_counts.kv_ref_value_hash, 0); + assert_eq!(node_counts.kv_value_hash_feature_type, 15); } #[test] - #[should_panic(expected = "Attempted to fetch chunk on empty tree")] - fn test_chunk_empty() { - let merk = TempMerk::new(); + fn test_subtree_chunk_with_limit() { + // tree of height 4 + // 5 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); - let _chunks = merk - .chunks_old() + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // initial chunk is of size 453, so limit of 10 is too small + // should return an error + let chunk = chunk_producer.subtree_multi_chunk_with_limit(1, Some(10)); + assert!(chunk.is_err()); + + // get just the fist chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(453)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(2)); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 453); + assert_eq!(chunk.len(), 13); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 3); + assert_eq!(node_counts.hash, 4); + assert_eq!(node_counts.sum(), 4 + 3); + + // get up to second chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(737)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(3)); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 737); + assert_eq!(chunk.len(), 17); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) .unwrap() - .into_iter() - .map(|x| x.unwrap()) - .collect::>(); + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 6); + assert_eq!(node_counts.hash, 3); + assert_eq!(node_counts.sum(), 6 + 3); + + // get up to third chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(1021)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(4)); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1021); + assert_eq!(chunk.len(), 21); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 9); + assert_eq!(node_counts.hash, 2); + assert_eq!(node_counts.sum(), 9 + 2); + + // get up to fourth chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(1305)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(5)); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1305); + assert_eq!(chunk.len(), 25); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 12); + assert_eq!(node_counts.hash, 1); + assert_eq!(node_counts.sum(), 12 + 1); + + // get up to fifth chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(1589)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, None); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1589); + assert_eq!(chunk.len(), 29); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 15); + assert_eq!(node_counts.hash, 0); + assert_eq!(node_counts.sum(), 15); + + // limit larger than total chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(usize::MAX)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(18446744073709550026)); + assert_eq!(chunk_result.next_index, None); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1589); + assert_eq!(chunk.len(), 29); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 15); + assert_eq!(node_counts.hash, 0); + assert_eq!(node_counts.sum(), 15); } #[test] - #[should_panic(expected = "Chunk index out-of-bounds")] - fn test_chunk_index_oob() { + fn test_multi_chunk_with_no_limit_trunk() { + // tree of height 4 + // 5 chunks let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..42); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); - let mut producer = merk.chunks_old().unwrap(); - let _chunk = producer.chunk(50000).unwrap(); + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // we generate the chunk starting from index 1, this has no hash nodes + // so no multi chunk will be generated + let chunk_result = chunk_producer + .multi_chunk_with_limit_and_index(1, None) + .expect("should generate chunk with limit"); + + assert_eq!(chunk_result.remaining_limit, None); + assert_eq!(chunk_result.next_index, None); + + // should only contain 2 items, the starting chunk id and the entire tree + assert_eq!(chunk_result.chunk.len(), 2); + + // assert items + assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![])); + if let ChunkOp::Chunk(chunk) = &chunk_result.chunk[1] { + let tree = execute(chunk.clone().into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + } else { + panic!("expected ChunkOp::Chunk"); + } } - // #[test] - // fn test_chunk_index_gt_1_access() { - // let mut merk = TempMerk::new(); - // let batch = make_batch_seq(1..513); - // merk.apply::<_, Vec<_>>(&batch, &[]).unwrap().unwrap(); - - // let mut producer = merk.chunks().unwrap(); - // println!("length: {}", producer.len()); - // let chunk = producer.chunk(2).unwrap(); - // assert_eq!( - // chunk, - // vec![ - // 3, 8, 0, 0, 0, 0, 0, 0, 0, 18, 0, 60, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 3, 8, 0, 0, 0, 0, 0, 0, 0, 19, 0, 60, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 16, 3, 8, 0, 0, - // 0, 0, 0, 0, 0, 20, 0, 60, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 17, 3, 8, 0, 0, 0, 0, 0, 0, 0, 21, 0, 60, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 16, 3, 8, 0, 0, 0, 0, 0, 0, 0, 22, - // 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 3, 8, 0, 0, - // 0, 0, 0, 0, 0, 23, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 16, 3, 8, 0, 0, 0, 0, 0, 0, 0, 24, 0, 60, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 17, 17, 3, 8, 0, 0, 0, 0, 0, 0, 0, 25, 0, - // 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 16, 3, 8, 0, 0, - // 0, 0, 0, 0, 0, 26, 0, 60, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 3, 8, 0, 0, 0, 0, 0, 0, 0, 27, 0, 60, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 16, 3, 8, 0, 0, 0, 0, - // 0, 0, 0, 28, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 17, 3, 8, 0, 0, 0, 0, 0, 0, 0, 29, 0, 60, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 16, 3, 8, 0, 0, 0, 0, 0, 0, 0, - // 30, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 3, 8, 0, 0, - // 0, 0, 0, 0, 0, 31, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 16, 3, 8, 0, 0, 0, 0, 0, 0, 0, 32, 0, 60, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 17, 17, 17 - // ] - // ); - // } + #[test] + fn test_multi_chunk_with_no_limit_not_trunk() { + // tree of height 4 + // 5 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // we generate the chunk starting from index 2, this has no hash nodes + // so no multi chunk will be generated + let chunk_result = chunk_producer + .multi_chunk_with_limit_and_index(2, None) + .expect("should generate chunk with limit"); + + assert_eq!(chunk_result.remaining_limit, None); + assert_eq!(chunk_result.next_index, None); + + // chunk 2 - 5 will be considered separate subtrees + // each will have an accompanying chunk id, so 8 elements total + assert_eq!(chunk_result.chunk.len(), 8); + + // assert the chunk id's + assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![LEFT, LEFT])); + assert_eq!(chunk_result.chunk[2], ChunkOp::ChunkId(vec![LEFT, RIGHT])); + assert_eq!(chunk_result.chunk[4], ChunkOp::ChunkId(vec![RIGHT, LEFT])); + assert_eq!(chunk_result.chunk[6], ChunkOp::ChunkId(vec![RIGHT, RIGHT])); + + // assert the chunks + assert_eq!( + chunk_result.chunk[1], + ChunkOp::Chunk( + chunk_producer + .chunk_with_index(2) + .expect("should generate chunk") + .0 + ) + ); + assert_eq!( + chunk_result.chunk[3], + ChunkOp::Chunk( + chunk_producer + .chunk_with_index(3) + .expect("should generate chunk") + .0 + ) + ); + assert_eq!( + chunk_result.chunk[5], + ChunkOp::Chunk( + chunk_producer + .chunk_with_index(4) + .expect("should generate chunk") + .0 + ) + ); + assert_eq!( + chunk_result.chunk[7], + ChunkOp::Chunk( + chunk_producer + .chunk_with_index(5) + .expect("should generate chunk") + .0 + ) + ); + } #[test] - #[should_panic(expected = "Called next_chunk after end")] - fn test_next_chunk_index_oob() { + fn test_multi_chunk_with_limit() { + // tree of height 4 + // 5 chunks let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..42); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // ensure that the remaining limit, next index and values given are correct + // if limit is smaller than first chunk, we should get an error + let chunk_result = chunk_producer.multi_chunk_with_limit("", Some(5)); + assert!(matches!( + chunk_result, + Err(Error::ChunkingError(ChunkError::LimitTooSmall(..))) + )); + + // get chunk 2 + // data size of chunk 2 is exactly 317 + // chunk op encoding for chunk 2 = 321 + // hence limit of 317 will be insufficient + let chunk_result = chunk_producer.multi_chunk_with_limit_and_index(2, Some(317)); + assert!(matches!( + chunk_result, + Err(Error::ChunkingError(ChunkError::LimitTooSmall(..))) + )); - let mut producer = merk.chunks_old().unwrap(); - let _chunk1 = producer.next_chunk(); - let _chunk2 = producer.next_chunk(); + // get chunk 2 and 3 + // chunk 2 chunk op = 331 + // chunk 3 chunk op = 321 + // padding = 5 + let chunk_result = chunk_producer + .multi_chunk_with_limit_and_index(2, Some(321 + 321 + 5)) + .expect("should generate chunk"); + assert_eq!( + chunk_result.next_index, + Some(traversal_instruction_as_string( + &generate_traversal_instruction(4, 4).unwrap() + )) + ); + assert_eq!(chunk_result.remaining_limit, Some(5)); + assert_eq!(chunk_result.chunk.len(), 4); + assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![LEFT, LEFT])); + assert_eq!(chunk_result.chunk[2], ChunkOp::ChunkId(vec![LEFT, RIGHT])); } } diff --git a/merk/src/merk/chunks2.rs b/merk/src/merk/chunks2.rs deleted file mode 100644 index d455dbde..00000000 --- a/merk/src/merk/chunks2.rs +++ /dev/null @@ -1,943 +0,0 @@ -// TODO: add MIT License -// TODO: add module description -// TODO: figure out verification features - -use std::{ - cmp::max, - collections::{LinkedList, VecDeque}, - path::Iter, -}; - -use ed::Encode; -use grovedb_costs::{CostResult, CostsExt, OperationCost}; -use grovedb_storage::StorageContext; -use integer_encoding::VarInt; - -use crate::{ - error::Error, - proofs::{ - chunk::{ - chunk_op::ChunkOp, - error::{ChunkError, ChunkError::InternalError}, - util::{ - chunk_height, generate_traversal_instruction, number_of_chunks, - traversal_instruction_as_string, write_to_vec, - }, - }, - Node, Op, - }, - tree::RefWalker, - Error::ChunkingError, - Merk, PanicSource, -}; - -// TODO: move types to some other file -// TODO: add documentation -#[derive(Debug)] -pub struct SubtreeChunk { - chunk: Vec, - next_index: Option, - remaining_limit: Option, -} - -impl SubtreeChunk { - pub fn new(chunk: Vec, next_index: Option, remaining_limit: Option) -> Self { - Self { - chunk, - next_index, - remaining_limit, - } - } -} - -#[derive(Debug)] -pub struct MultiChunk { - pub chunk: Vec, - pub next_index: Option, - pub remaining_limit: Option, -} - -impl MultiChunk { - pub fn new( - chunk: Vec, - next_index: Option, - remaining_limit: Option, - ) -> Self { - Self { - chunk, - next_index, - remaining_limit, - } - } -} - -/// A `ChunkProducer` allows the creation of chunk proofs, used for trustlessly -/// replicating entire Merk trees. Chunks can be generated on the fly in a -/// random order, or iterated in order for slightly better performance. -pub struct ChunkProducer<'db, S> { - /// Represents the max height of the Merk tree - height: usize, - /// Represents the index of the next chunk - index: usize, - merk: &'db Merk, -} - -impl<'db, S> ChunkProducer<'db, S> -where - S: StorageContext<'db>, -{ - /// Creates a new `ChunkProducer` for the given `Merk` instance - pub(crate) fn new(merk: &'db Merk) -> Result { - let tree_height = merk - .height() - .ok_or(Error::ChunkingError(ChunkError::EmptyTree( - "cannot create chunk producer for empty Merk", - )))?; - Ok(Self { - height: tree_height as usize, - index: 1, - merk, - }) - } - - /// Gets the chunk with the given index. Errors if the index is out of - /// bounds or the tree is empty - the number of chunks can be checked by - /// calling `producer.len()`. - pub fn chunk(&mut self, index: usize) -> Result, Error> { - // ensure that the chunk index is within bounds - let max_chunk_index = self.len(); - if index < 1 || index > max_chunk_index { - return Err(ChunkingError(ChunkError::OutOfBounds( - "chunk index out of bounds", - ))); - } - - self.index = index + 1; - - let traversal_instructions = generate_traversal_instruction(self.height, index)?; - - let chunk_height = chunk_height(self.height, index).unwrap(); - - self.merk.walk(|maybe_walker| match maybe_walker { - Some(mut walker) => { - walker.traverse_and_build_chunk(&traversal_instructions, chunk_height) - } - None => Err(Error::ChunkingError(ChunkError::EmptyTree( - "cannot create chunk producer for empty Merk", - ))), - }) - } - - // TODO: add documentation - pub fn multi_chunk_with_limit( - &mut self, - index: usize, - limit: Option, - ) -> Result { - // TODO: what happens if the vec is filled? - // we need to have some kind of hardhoc limit value if none is supplied. - // maybe we can just do something with the length to fix this? - let mut chunk = vec![]; - - let mut current_index = Some(index); - let mut current_limit = limit; - - // generate as many subtree chunks as we can - // until we have exhausted all or hit a limit restriction - while current_index != None { - let current_index_traversal_instruction = generate_traversal_instruction( - self.height, - current_index.expect("confirmed is Some"), - )?; - let chunk_id_op = ChunkOp::ChunkId(current_index_traversal_instruction); - - // factor in the ChunkId encoding length in limit calculations - let temp_limit = if let Some(limit) = current_limit { - let chunk_id_op_encoding_len = chunk_id_op.encoding_length().map_err(|e| { - Error::ChunkingError(ChunkError::InternalError("cannot get encoding length")) - })?; - if limit >= chunk_id_op_encoding_len { - Some(limit - chunk_id_op_encoding_len) - } else { - Some(0) - } - } else { - None - }; - - let subtree_multi_chunk_result = self.subtree_multi_chunk_with_limit( - current_index.expect("confirmed is not None"), - temp_limit, - ); - - let limit_too_small_error = matches!( - subtree_multi_chunk_result, - Err(ChunkingError(ChunkError::LimitTooSmall(..))) - ); - - if limit_too_small_error { - if chunk.is_empty() { - // no progress, return limit too small error - return Err(Error::ChunkingError(ChunkError::LimitTooSmall( - "limit too small for initial chunk", - ))); - } else { - // made progress, send accumulated chunk - break; - } - } - - let subtree_multi_chunk = subtree_multi_chunk_result?; - - chunk.push(chunk_id_op); - chunk.push(ChunkOp::Chunk(subtree_multi_chunk.chunk)); - - // update loop parameters - current_index = subtree_multi_chunk.next_index; - current_limit = subtree_multi_chunk.remaining_limit; - } - - Ok(MultiChunk::new(chunk, current_index, current_limit)) - } - - /// Packs as many chunks as it can from a starting chunk index, into a - /// vector. Stops when we have exhausted all chunks or we have reached - /// some limit. - pub fn subtree_multi_chunk_with_limit( - &mut self, - index: usize, - limit: Option, - ) -> Result { - let mut chunk_byte_length = 0; - - let max_chunk_index = number_of_chunks(self.height); - let mut chunk_index = index; - - // we first get the chunk at the given index - let chunk_ops = self.chunk(chunk_index)?; - chunk_byte_length = chunk_ops.encoding_length().map_err(|e| { - Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) - })?; - chunk_index += 1; - - let mut chunk = VecDeque::from(chunk_ops); - - // ensure the limit is not less than first chunk byte length - // if it is we can't proceed and didn't make progress so we return an error - if let Some(limit) = limit { - if chunk_byte_length > limit { - return Err(Error::ChunkingError(ChunkError::LimitTooSmall( - "limit too small for initial chunk", - ))); - } - } - - let mut iteration_index = 0; - while iteration_index < chunk.len() { - // we only perform replacements on Hash nodes - if matches!(chunk[iteration_index], Op::Push(Node::Hash(..))) { - let replacement_chunk = self.chunk(chunk_index)?; - - // calculate the new total - let new_total = replacement_chunk.encoding_length().map_err(|e| { - Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) - })? + chunk_byte_length - - chunk[iteration_index].encoding_length().map_err(|e| { - Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) - })?; - - // verify that this chunk doesn't make use exceed the limit - if let Some(limit) = limit { - if new_total > limit { - let next_index = match chunk_index > max_chunk_index { - true => None, - _ => Some(chunk_index), - }; - - return Ok(SubtreeChunk::new( - chunk.into(), - next_index, - Some(limit - chunk_byte_length), - )); - } - } - - chunk_byte_length = new_total; - chunk_index += 1; - - chunk.remove(iteration_index); - for op in replacement_chunk.into_iter().rev() { - chunk.insert(iteration_index, op); - } - } else { - iteration_index += 1; - } - } - - let remaining_limit = limit.map(|l| l - chunk_byte_length); - let next_index = match chunk_index > max_chunk_index { - true => None, - _ => Some(chunk_index), - }; - - Ok(SubtreeChunk::new(chunk.into(), next_index, remaining_limit)) - } - - /// Returns the total number of chunks for the underlying Merk tree. - pub fn len(&self) -> usize { - number_of_chunks(self.height as usize) - } - - /// Gets the next chunk based on the `ChunkProducer`'s internal index state. - /// This is mostly useful for letting `ChunkIter` yield the chunks in order, - /// optimizing throughput compared to random access. - // TODO: does this really optimize throughput, how can you make the statement - // true? - fn next_chunk(&mut self) -> Option, Error>> { - // for now not better than random access - // TODO: fix - let max_index = number_of_chunks(self.height); - if self.index > max_index { - return None; - } - - let chunk = self.chunk(self.index); - - return Some(chunk); - } - - // TODO: test this logic out - fn get_chunk_encoding_length(chunk: &[Op]) -> usize { - // TODO: deal with error - chunk - .iter() - .fold(0, |sum, op| sum + op.encoding_length().unwrap()) - } -} - -/// Iterate over each chunk, returning `None` after last chunk -impl<'db, S> Iterator for ChunkProducer<'db, S> -where - S: StorageContext<'db>, -{ - type Item = Result, Error>; - - fn next(&mut self) -> Option { - self.next_chunk() - } -} - -impl<'db, S> Merk -where - S: StorageContext<'db>, -{ - /// Creates a `ChunkProducer` which can return chunk proofs for replicating - /// the entire Merk tree. - pub fn chunks(&'db self) -> Result, Error> { - ChunkProducer::new(self) - } -} - -#[cfg(test)] -mod test { - use super::*; - use crate::{ - proofs::{ - chunk::chunk2::{ - tests::{traverse_get_kv_feature_type, traverse_get_node_hash}, - LEFT, RIGHT, - }, - tree::execute, - Tree, - }, - test_utils::{make_batch_seq, TempMerk}, - }; - - #[derive(Default)] - struct NodeCounts { - hash: usize, - kv_hash: usize, - kv: usize, - kv_value_hash: usize, - kv_digest: usize, - kv_ref_value_hash: usize, - kv_value_hash_feature_type: usize, - } - - impl NodeCounts { - fn sum(&self) -> usize { - return self.hash - + self.kv_hash - + self.kv - + self.kv_value_hash - + self.kv_digest - + self.kv_ref_value_hash - + self.kv_value_hash_feature_type; - } - } - - fn count_node_types(tree: Tree) -> NodeCounts { - let mut counts = NodeCounts::default(); - - tree.visit_nodes(&mut |node| { - match node { - Node::Hash(_) => counts.hash += 1, - Node::KVHash(_) => counts.kv_hash += 1, - Node::KV(..) => counts.kv += 1, - Node::KVValueHash(..) => counts.kv_value_hash += 1, - Node::KVDigest(..) => counts.kv_digest += 1, - Node::KVRefValueHash(..) => counts.kv_ref_value_hash += 1, - Node::KVValueHashFeatureType(..) => counts.kv_value_hash_feature_type += 1, - }; - }); - - counts - } - - #[test] - fn test_merk_chunk_len() { - // Tree of height 5 - max of 31 elements, min of 16 elements - // 5 will be broken into 3 layers = [2, 2, 2] - // exit nodes from first layer = 2^2 = 4 - // exit nodes from the second layer = 4 ^ 2^2 = 16 - // total_chunk = 1 + 4 + 16 = 21 chunks - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..20); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(5)); - let chunk_producer = ChunkProducer::new(&merk).unwrap(); - assert_eq!(chunk_producer.len(), 21); - - // Tree of height 10 - max of 1023 elements, min of 512 elements - // 4 layers -> [2,2,2,2,2] - // chunk_count_per_layer -> [1, 4, 16, 64, 256] - // total = 341 chunks - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..1000); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(10)); - let chunk_producer = ChunkProducer::new(&merk).unwrap(); - assert_eq!(chunk_producer.len(), 341); - } - - #[test] - fn test_chunk_producer_iter() { - // tree with height 4 - // full tree - // 7 - // / \ - // 3 11 - // / \ / \ - // 1 5 9 13 - // / \ / \ / \ / \ - // 0 2 4 6 8 10 12 14 - // going to be broken into [2, 2] - // that's a total of 5 chunks - - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - - // build iterator from first chunk producer - let mut chunks = merk.chunks().expect("should return producer"); - - // ensure that the chunks gotten from the iterator is the same - // as that from the chunk producer - for i in 1..=5 { - assert_eq!( - chunks.next().unwrap().unwrap(), - chunk_producer.chunk(i).unwrap() - ); - } - - // returns None after max - assert_eq!(chunks.next().is_none(), true); - } - - #[test] - fn test_random_chunk_access() { - // tree with height 4 - // full tree - // 7 - // / \ - // 3 11 - // / \ / \ - // 1 5 9 13 - // / \ / \ / \ / \ - // 0 2 4 6 8 10 12 14 - // going to be broken into [2, 2] - // that's a total of 5 chunks - - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let mut inner_tree = merk.tree.take().expect("has inner tree"); - merk.tree.set(Some(inner_tree.clone())); - - // TODO: should I be using panic source? - let mut tree_walker = RefWalker::new(&mut inner_tree, PanicSource {}); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - assert_eq!(chunk_producer.len(), 5); - - // assert bounds - assert_eq!(chunk_producer.chunk(0).is_err(), true); - assert_eq!(chunk_producer.chunk(6).is_err(), true); - - // first chunk - // expected: - // 7 - // / \ - // 3 11 - // / \ / \ - // H(1) H(5) H(9) H(13) - let chunk = chunk_producer.chunk(1).expect("should generate chunk"); - assert_eq!(chunk.len(), 13); - assert_eq!( - chunk, - vec![ - Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, LEFT])), - Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[LEFT])), - Op::Parent, - Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])), - Op::Child, - Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[])), - Op::Parent, - Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT])), - Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[RIGHT])), - Op::Parent, - Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT, RIGHT])), - Op::Child, - Op::Child - ] - ); - - // second chunk - // expected: - // 1 - // / \ - // 0 2 - let chunk = chunk_producer.chunk(2).expect("should generate chunk"); - assert_eq!(chunk.len(), 5); - assert_eq!( - chunk, - vec![ - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[LEFT, LEFT, LEFT] - )), - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[LEFT, LEFT] - )), - Op::Parent, - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[LEFT, LEFT, RIGHT] - )), - Op::Child - ] - ); - - // third chunk - // expected: - // 5 - // / \ - // 4 6 - let chunk = chunk_producer.chunk(3).expect("should generate chunk"); - assert_eq!(chunk.len(), 5); - assert_eq!( - chunk, - vec![ - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[LEFT, RIGHT, LEFT] - )), - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[LEFT, RIGHT] - )), - Op::Parent, - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[LEFT, RIGHT, RIGHT] - )), - Op::Child - ] - ); - - // third chunk - // expected: - // 9 - // / \ - // 8 10 - let chunk = chunk_producer.chunk(4).expect("should generate chunk"); - assert_eq!(chunk.len(), 5); - assert_eq!( - chunk, - vec![ - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[RIGHT, LEFT, LEFT] - )), - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[RIGHT, LEFT] - )), - Op::Parent, - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[RIGHT, LEFT, RIGHT] - )), - Op::Child - ] - ); - - // third chunk - // expected: - // 13 - // / \ - // 12 14 - let chunk = chunk_producer.chunk(5).expect("should generate chunk"); - assert_eq!(chunk.len(), 5); - assert_eq!( - chunk, - vec![ - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[RIGHT, RIGHT, LEFT] - )), - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[RIGHT, RIGHT] - )), - Op::Parent, - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[RIGHT, RIGHT, RIGHT] - )), - Op::Child - ] - ); - } - - #[test] - fn test_subtree_chunk_no_limit() { - // tree of height 4 - // 5 chunks - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - // generate multi chunk with no limit - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut chunk_result = chunk_producer - .subtree_multi_chunk_with_limit(1, None) - .expect("should generate chunk with limit"); - - assert_eq!(chunk_result.remaining_limit, None); - assert_eq!(chunk_result.next_index, None); - - let tree = execute(chunk_result.chunk.into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .expect("should reconstruct tree"); - assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); - - // assert that all nodes are of type kv_value_hash_feature_type - let node_counts = count_node_types(tree); - assert_eq!(node_counts.hash, 0); - assert_eq!(node_counts.kv_hash, 0); - assert_eq!(node_counts.kv, 0); - assert_eq!(node_counts.kv_value_hash, 0); - assert_eq!(node_counts.kv_digest, 0); - assert_eq!(node_counts.kv_ref_value_hash, 0); - assert_eq!(node_counts.kv_value_hash_feature_type, 15); - } - - #[test] - fn test_subtree_chunk_with_limit() { - // tree of height 4 - // 5 chunks - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - - // initial chunk is of size 453, so limit of 10 is too small - // should return an error - let chunk = chunk_producer.subtree_multi_chunk_with_limit(1, Some(10)); - assert!(chunk.is_err()); - - // get just the fist chunk - let chunk_result = chunk_producer - .subtree_multi_chunk_with_limit(1, Some(453)) - .expect("should generate chunk with limit"); - assert_eq!(chunk_result.remaining_limit, Some(0)); - assert_eq!(chunk_result.next_index, Some(2)); - - let mut chunk = chunk_result.chunk; - assert_eq!(chunk.encoding_length().unwrap(), 453); - assert_eq!(chunk.len(), 13); // op count - let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .expect("should reconstruct tree"); - assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); - - let node_counts = count_node_types(tree); - assert_eq!(node_counts.kv_value_hash_feature_type, 3); - assert_eq!(node_counts.hash, 4); - assert_eq!(node_counts.sum(), 4 + 3); - - // get up to second chunk - let chunk_result = chunk_producer - .subtree_multi_chunk_with_limit(1, Some(737)) - .expect("should generate chunk with limit"); - assert_eq!(chunk_result.remaining_limit, Some(0)); - assert_eq!(chunk_result.next_index, Some(3)); - - let mut chunk = chunk_result.chunk; - assert_eq!(chunk.encoding_length().unwrap(), 737); - assert_eq!(chunk.len(), 17); // op count - let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .expect("should reconstruct tree"); - assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); - - let node_counts = count_node_types(tree); - assert_eq!(node_counts.kv_value_hash_feature_type, 6); - assert_eq!(node_counts.hash, 3); - assert_eq!(node_counts.sum(), 6 + 3); - - // get up to third chunk - let chunk_result = chunk_producer - .subtree_multi_chunk_with_limit(1, Some(1021)) - .expect("should generate chunk with limit"); - assert_eq!(chunk_result.remaining_limit, Some(0)); - assert_eq!(chunk_result.next_index, Some(4)); - - let mut chunk = chunk_result.chunk; - assert_eq!(chunk.encoding_length().unwrap(), 1021); - assert_eq!(chunk.len(), 21); // op count - let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .expect("should reconstruct tree"); - assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); - - let node_counts = count_node_types(tree); - assert_eq!(node_counts.kv_value_hash_feature_type, 9); - assert_eq!(node_counts.hash, 2); - assert_eq!(node_counts.sum(), 9 + 2); - - // get up to fourth chunk - let chunk_result = chunk_producer - .subtree_multi_chunk_with_limit(1, Some(1305)) - .expect("should generate chunk with limit"); - assert_eq!(chunk_result.remaining_limit, Some(0)); - assert_eq!(chunk_result.next_index, Some(5)); - - let mut chunk = chunk_result.chunk; - assert_eq!(chunk.encoding_length().unwrap(), 1305); - assert_eq!(chunk.len(), 25); // op count - let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .expect("should reconstruct tree"); - assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); - - let node_counts = count_node_types(tree); - assert_eq!(node_counts.kv_value_hash_feature_type, 12); - assert_eq!(node_counts.hash, 1); - assert_eq!(node_counts.sum(), 12 + 1); - - // get up to fifth chunk - let chunk_result = chunk_producer - .subtree_multi_chunk_with_limit(1, Some(1589)) - .expect("should generate chunk with limit"); - assert_eq!(chunk_result.remaining_limit, Some(0)); - assert_eq!(chunk_result.next_index, None); - - let mut chunk = chunk_result.chunk; - assert_eq!(chunk.encoding_length().unwrap(), 1589); - assert_eq!(chunk.len(), 29); // op count - let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .expect("should reconstruct tree"); - assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); - - let node_counts = count_node_types(tree); - assert_eq!(node_counts.kv_value_hash_feature_type, 15); - assert_eq!(node_counts.hash, 0); - assert_eq!(node_counts.sum(), 15); - - // limit larger than total chunk - let chunk_result = chunk_producer - .subtree_multi_chunk_with_limit(1, Some(usize::MAX)) - .expect("should generate chunk with limit"); - assert_eq!(chunk_result.remaining_limit, Some(18446744073709550026)); - assert_eq!(chunk_result.next_index, None); - - let mut chunk = chunk_result.chunk; - assert_eq!(chunk.encoding_length().unwrap(), 1589); - assert_eq!(chunk.len(), 29); // op count - let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .expect("should reconstruct tree"); - assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); - - let node_counts = count_node_types(tree); - assert_eq!(node_counts.kv_value_hash_feature_type, 15); - assert_eq!(node_counts.hash, 0); - assert_eq!(node_counts.sum(), 15); - } - - #[test] - fn test_multi_chunk_with_no_limit_trunk() { - // tree of height 4 - // 5 chunks - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - - // we generate the chunk starting from index 2, this has no hash nodes - // so no multi chunk will be generated - let chunk_result = chunk_producer - .multi_chunk_with_limit(1, None) - .expect("should generate chunk with limit"); - - assert_eq!(chunk_result.remaining_limit, None); - assert_eq!(chunk_result.next_index, None); - - // should only contain 2 items, the starting chunk id and the entire tree - assert_eq!(chunk_result.chunk.len(), 2); - - // assert items - assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![])); - if let ChunkOp::Chunk(chunk) = &chunk_result.chunk[1] { - let tree = execute(chunk.clone().into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .expect("should reconstruct tree"); - assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); - } else { - panic!("expected ChunkOp::Chunk"); - } - } - - #[test] - fn test_multi_chunk_with_no_limit_not_trunk() { - // tree of height 4 - // 5 chunks - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - - // we generate the chunk starting from index 2, this has no hash nodes - // so no multi chunk will be generated - let chunk_result = chunk_producer - .multi_chunk_with_limit(2, None) - .expect("should generate chunk with limit"); - - assert_eq!(chunk_result.remaining_limit, None); - assert_eq!(chunk_result.next_index, None); - - // chunk 2 - 5 will be considered separate subtrees - // each will have an accompanying chunk id, so 8 elements total - assert_eq!(chunk_result.chunk.len(), 8); - - // assert the chunk id's - assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![LEFT, LEFT])); - assert_eq!(chunk_result.chunk[2], ChunkOp::ChunkId(vec![LEFT, RIGHT])); - assert_eq!(chunk_result.chunk[4], ChunkOp::ChunkId(vec![RIGHT, LEFT])); - assert_eq!(chunk_result.chunk[6], ChunkOp::ChunkId(vec![RIGHT, RIGHT])); - - // assert the chunks - assert_eq!( - chunk_result.chunk[1], - ChunkOp::Chunk(chunk_producer.chunk(2).expect("should generate chunk")) - ); - assert_eq!( - chunk_result.chunk[3], - ChunkOp::Chunk(chunk_producer.chunk(3).expect("should generate chunk")) - ); - assert_eq!( - chunk_result.chunk[5], - ChunkOp::Chunk(chunk_producer.chunk(4).expect("should generate chunk")) - ); - assert_eq!( - chunk_result.chunk[7], - ChunkOp::Chunk(chunk_producer.chunk(5).expect("should generate chunk")) - ); - } - - #[test] - fn test_multi_chunk_with_limit() { - // tree of height 4 - // 5 chunks - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - - // ensure that the remaining limit, next index and values given are correct - // if limit is smaller than first chunk, we should get an error - let chunk_result = chunk_producer.multi_chunk_with_limit(1, Some(5)); - assert!(matches!( - chunk_result, - Err(Error::ChunkingError(ChunkError::LimitTooSmall(..))) - )); - - // get chunk 2 - // data size of chunk 2 is exactly 317 - // chunk op encoding for chunk 2 = 321 - // hence limit of 317 will be insufficient - let chunk_result = chunk_producer.multi_chunk_with_limit(2, Some(317)); - assert!(matches!( - chunk_result, - Err(Error::ChunkingError(ChunkError::LimitTooSmall(..))) - )); - - // get chunk 2 and 3 - // chunk 2 chunk op = 331 - // chunk 3 chunk op = 321 - let chunk_result = chunk_producer - .multi_chunk_with_limit(2, Some(321 + 321 + 5)) - .expect("should generate chunk"); - assert_eq!(chunk_result.next_index, Some(4)); - assert_eq!(chunk_result.remaining_limit, Some(5)); - assert_eq!(chunk_result.chunk.len(), 4); - assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![LEFT, LEFT])); - assert_eq!(chunk_result.chunk[2], ChunkOp::ChunkId(vec![LEFT, RIGHT])); - } -} diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index 0eb3f8cd..56308a64 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -29,19 +29,14 @@ //! Merk pub mod chunks; - pub(crate) mod defaults; - pub mod options; - -mod chunks2; pub mod restore; -mod restore2; use std::{ cell::Cell, cmp::Ordering, - collections::{BTreeSet, LinkedList}, + collections::{BTreeMap, BTreeSet, LinkedList}, fmt, }; @@ -62,12 +57,21 @@ use crate::{ defaults::{MAX_UPDATE_VALUE_BASED_ON_COSTS_TIMES, ROOT_KEY_KEY}, options::MerkOptions, }, - proofs::{encode_into, query::query_item::QueryItem, Op as ProofOp, Query}, + proofs::{ + chunk::{ + chunk::{LEFT, RIGHT}, + util::traversal_instruction_as_string, + }, + encode_into, + query::query_item::QueryItem, + Op as ProofOp, Query, + }, tree::{ kv::{ValueDefinedCostType, KV}, AuxMerkBatch, Commit, CryptoHash, Fetch, Link, MerkBatch, Op, RefWalker, Tree, Walker, NULL_HASH, }, + verify_query, Error::{CostsError, EdError, StorageError}, MerkType::{BaseMerk, LayeredMerk, StandaloneMerk}, TreeFeatureType, @@ -1284,6 +1288,126 @@ where Ok(()).wrap_with_cost(Default::default()) } } + + /// Verifies the correctness of a merk tree + /// hash values are computed correctly, heights are accurate and links + /// consistent with backing store. + // TODO: define the return types + pub fn verify(&self) -> (BTreeMap, BTreeMap>) { + let tree = self.tree.take(); + + let mut bad_link_map: BTreeMap = BTreeMap::new(); + let mut parent_keys: BTreeMap> = BTreeMap::new(); + let mut root_traversal_instruction = vec![]; + + // TODO: remove clone + self.verify_tree( + // TODO: handle unwrap + &tree.clone().unwrap(), + &mut root_traversal_instruction, + &mut bad_link_map, + &mut parent_keys, + ); + self.tree.set(tree); + + return (bad_link_map, parent_keys); + } + + fn verify_tree( + &self, + tree: &Tree, + traversal_instruction: &mut Vec, + bad_link_map: &mut BTreeMap, + parent_keys: &mut BTreeMap>, + ) { + if let Some(link) = tree.link(LEFT) { + traversal_instruction.push(LEFT); + self.verify_link( + link, + tree.key(), + traversal_instruction, + bad_link_map, + parent_keys, + ); + traversal_instruction.pop(); + } + + if let Some(link) = tree.link(RIGHT) { + traversal_instruction.push(RIGHT); + self.verify_link( + link, + tree.key(), + traversal_instruction, + bad_link_map, + parent_keys, + ); + traversal_instruction.pop(); + } + } + + fn verify_link( + &self, + link: &Link, + parent_key: &[u8], + traversal_instruction: &mut Vec, + bad_link_map: &mut BTreeMap, + parent_keys: &mut BTreeMap>, + ) { + let (hash, key, sum) = match link { + Link::Reference { hash, key, sum, .. } => { + (hash.to_owned(), key.to_owned(), sum.to_owned()) + } + Link::Modified { + tree, + child_heights, + .. + } => ( + tree.hash().unwrap(), + tree.key().to_vec(), + tree.sum().unwrap(), + ), + Link::Loaded { + hash, + child_heights, + sum, + tree, + } => (hash.to_owned(), tree.key().to_vec(), sum.to_owned()), + _ => todo!(), + }; + + let instruction_id = traversal_instruction_as_string(&traversal_instruction); + let node = Tree::get(&self.storage, key).unwrap(); + + if node.is_err() { + bad_link_map.insert(instruction_id.clone(), hash.clone()); + parent_keys.insert(instruction_id, parent_key.to_vec()); + return; + } + + let node = node.unwrap(); + if node.is_none() { + bad_link_map.insert(instruction_id.clone(), hash.clone()); + parent_keys.insert(instruction_id, parent_key.to_vec()); + return; + } + + let node = node.unwrap(); + if &node.hash().unwrap() != &hash { + bad_link_map.insert(instruction_id.clone(), hash.clone()); + parent_keys.insert(instruction_id, parent_key.to_vec()); + return; + } + + if node.sum().unwrap() != sum { + bad_link_map.insert(instruction_id.clone(), hash.clone()); + parent_keys.insert(instruction_id, parent_key.to_vec()); + return; + } + + // TODO: check child heights + // all checks passed, recurse + self.verify_tree(&node, traversal_instruction, bad_link_map, parent_keys); + } } fn fetch_node<'db>(db: &impl StorageContext<'db>, key: &[u8]) -> Result, Error> { diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index e1a1afd4..0b866cac 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -29,250 +29,260 @@ //! Provides `Restorer`, which can create a replica of a Merk instance by //! receiving chunk proofs. -#[cfg(feature = "full")] -use std::{iter::Peekable, u8}; +use std::collections::BTreeMap; -#[cfg(feature = "full")] use grovedb_storage::{Batch, StorageContext}; -#[cfg(feature = "full")] -use super::Merk; -#[cfg(feature = "full")] use crate::{ - error::Error, + merk, merk::MerkSource, proofs::{ - chunk::{verify_leaf, verify_trunk, MIN_TRUNK_HEIGHT}, - tree::{Child, Tree as ProofTree}, + chunk::{ + chunk::{LEFT, RIGHT}, + chunk_op::ChunkOp, + error::{ChunkError, ChunkError::InternalError}, + util::{ + string_as_traversal_instruction, traversal_instruction_as_string, write_to_vec, + }, + }, + tree::{execute, Child, Tree as ProofTree}, Node, Op, }, - tree::{combine_hash, value_hash, Link, RefWalker, Tree}, - CryptoHash, + tree::{RefWalker, Tree}, + CryptoHash, Error, Error::{CostsError, EdError, StorageError}, - TreeFeatureType::BasicMerk, + Link, Merk, + TreeFeatureType::{BasicMerk, SummedMerk}, }; -#[cfg(feature = "full")] -/// A `Restorer` handles decoding, verifying, and storing chunk proofs to -/// replicate an entire Merk tree. It expects the chunks to be processed in -/// order, retrying the last chunk if verification fails. +/// Restorer handles verification of chunks and replication of Merk trees. +/// Chunks can be processed randomly as long as their parent has been processed +/// already. pub struct Restorer { - leaf_hashes: Option>>, - parent_keys: Option>>>, - trunk_height: Option, merk: Merk, - expected_root_hash: CryptoHash, - combining_value: Option>, + chunk_id_to_root_hash: BTreeMap, + // this is used to keep track of parents whose links need to be rewritten + parent_keys: BTreeMap>, } -#[cfg(feature = "full")] impl<'db, S: StorageContext<'db>> Restorer { - /// Creates a new `Restorer`, which will initialize a new Merk at the given - /// file path. The first chunk (the "trunk") will be compared against - /// `expected_root_hash`, then each subsequent chunk will be compared - /// against the hashes stored in the trunk, so that the restore process will - /// never allow malicious peers to send more than a single invalid chunk. - pub fn new( - merk: Merk, - combining_value: Option>, - expected_root_hash: CryptoHash, - ) -> Self { + /// Initializes a new chunk restorer with the expected root hash for the + /// first chunk + pub fn new(merk: Merk, expected_root_hash: CryptoHash) -> Self { + let mut chunk_id_to_root_hash = BTreeMap::new(); + chunk_id_to_root_hash.insert(traversal_instruction_as_string(&vec![]), expected_root_hash); + Self { - expected_root_hash, - combining_value, - trunk_height: None, merk, - leaf_hashes: None, - parent_keys: None, + chunk_id_to_root_hash, + parent_keys: BTreeMap::new(), } } - /// Verifies a chunk and writes it to the working RocksDB instance. Expects - /// to be called for each chunk in order. Returns the number of remaining - /// chunks. - /// - /// Once there are no remaining chunks to be processed, `finalize` should - /// be called. - pub fn process_chunk(&mut self, ops: impl IntoIterator) -> Result { - match self.leaf_hashes { - None => self.process_trunk(ops), - Some(_) => self.process_leaf(ops), - } - } - - /// Consumes the `Restorer` and returns the newly-created, fully-populated - /// Merk instance. This method will return an error if called before - /// processing all chunks (e.g. `restorer.remaining_chunks()` is not equal - /// to 0). - pub fn finalize(mut self) -> Result, Error> { - if self.remaining_chunks().unwrap_or(0) != 0 { - return Err(Error::OldChunkRestoringError( - "Called finalize before all chunks were processed".to_string(), - )); + // TODO: consider converting chunk id to a vec + /// Processes a chunk at some chunk id, returns the chunks id's of chunks + /// that can be requested + pub fn process_chunk( + &mut self, + chunk_id: String, + chunk: Vec, + ) -> Result, Error> { + let expected_root_hash = self + .chunk_id_to_root_hash + .get(&chunk_id) + .ok_or(Error::ChunkRestoringError(ChunkError::UnexpectedChunk))?; + + let chunk_tree = Self::verify_chunk(chunk, expected_root_hash)?; + + let mut root_traversal_instruction = string_as_traversal_instruction(&chunk_id)?; + + if root_traversal_instruction.is_empty() { + self.merk.set_base_root_key(Some(chunk_tree.key().to_vec())); + } else { + // every non root chunk has some associated parent with an placeholder link + // here we update the placeholder link to represent the true data + self.rewrite_parent_link(&chunk_id, &root_traversal_instruction, &chunk_tree)?; } - if self.trunk_height.unwrap() >= MIN_TRUNK_HEIGHT { - self.rewrite_trunk_child_heights()?; + // next up, we need to write the chunk and build the map again + let chunk_write_result = self.write_chunk(chunk_tree, &mut root_traversal_instruction); + if chunk_write_result.is_ok() { + // if we were able to successfully write the chunk, we can remove + // the chunk expected root hash from our chunk id map + self.chunk_id_to_root_hash.remove(&chunk_id); } - self.merk.load_base_root().unwrap()?; - - Ok(self.merk) + chunk_write_result } - /// Returns the number of remaining chunks to be processed. If called before - /// the first chunk is processed, this method will return `None` since we do - /// not yet have enough information to know about the number of chunks. - pub fn remaining_chunks(&self) -> Option { - self.leaf_hashes.as_ref().map(|lh| lh.len()) + /// Process multi chunks (space optimized chunk proofs that can contain + /// multiple singluar chunks) + pub fn process_multi_chunk(&mut self, multi_chunk: Vec) -> Result, Error> { + let mut expect_chunk_id = true; + let mut chunk_ids = vec![]; + let mut current_chunk_id: String = "".to_string(); + + for chunk_op in multi_chunk { + if (matches!(chunk_op, ChunkOp::ChunkId(..)) && !expect_chunk_id) + || (matches!(chunk_op, ChunkOp::Chunk(..)) && expect_chunk_id) + { + return Err(Error::ChunkRestoringError(ChunkError::InvalidMultiChunk( + "invalid multi chunk ordering", + ))); + } + match chunk_op { + ChunkOp::ChunkId(instructions) => { + current_chunk_id = traversal_instruction_as_string(&instructions); + } + ChunkOp::Chunk(chunk) => { + // TODO: remove clone + let next_chunk_ids = self.process_chunk(current_chunk_id.clone(), chunk)?; + chunk_ids.extend(next_chunk_ids); + } + } + expect_chunk_id = !expect_chunk_id; + } + Ok(chunk_ids) } - /// Writes the data contained in `tree` (extracted from a verified chunk - /// proof) to the RocksDB. - fn write_chunk(&mut self, tree: ProofTree) -> Result<(), Error> { - let mut batch = self.merk.storage.new_batch(); - - tree.visit_refs(&mut |proof_node| { - if let Some((mut node, key)) = match &proof_node.node { - Node::KV(key, value) => Some(( - Tree::new(key.clone(), value.clone(), None, BasicMerk).unwrap(), - key, - )), - Node::KVValueHash(key, value, value_hash) => Some(( - Tree::new_with_value_hash(key.clone(), value.clone(), *value_hash, BasicMerk) - .unwrap(), - key, - )), - Node::KVValueHashFeatureType(key, value, value_hash, feature_type) => Some(( - Tree::new_with_value_hash( - key.clone(), - value.clone(), - *value_hash, - *feature_type, - ) - .unwrap(), - key, - )), - _ => None, - } { - // TODO: encode tree node without cloning key/value - *node.slot_mut(true) = proof_node.left.as_ref().map(Child::as_link); - *node.slot_mut(false) = proof_node.right.as_ref().map(Child::as_link); - - let bytes = node.encode(); - batch.put(key, &bytes, None, None).map_err(CostsError) - } else { + /// Verifies the structure of a chunk and ensures the chunk matches the + /// expected root hash + fn verify_chunk(chunk: Vec, expected_root_hash: &CryptoHash) -> Result { + let chunk_len = chunk.len(); + let mut kv_count = 0; + let mut hash_count = 0; + + // build tree from ops + // ensure only made of KvValueFeatureType and Hash nodes and count them + let tree = execute(chunk.clone().into_iter().map(Ok), false, |node| { + if matches!(node, Node::KVValueHashFeatureType(..)) { + kv_count += 1; + Ok(()) + } else if matches!(node, Node::Hash(..)) { + hash_count += 1; Ok(()) + } else { + Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) } - })?; + }) + .unwrap()?; - self.merk - .storage - .commit_batch(batch) - .unwrap() - .map_err(StorageError) - } - - /// Verifies the trunk then writes its data to the RocksDB. - fn process_trunk(&mut self, ops: impl IntoIterator) -> Result { - let (trunk, height) = verify_trunk(ops.into_iter().map(Ok)).unwrap()?; + // chunk len must be exactly equal to the kv_count + hash_count + + // parent_branch_count + child_branch_count + debug_assert_eq!(chunk_len, ((kv_count + hash_count) * 2) - 1); - let root_hash = if self.combining_value.is_none() { - trunk.hash().unwrap() - } else { - combine_hash( - value_hash(self.combining_value.as_ref().expect("confirmed exists")).value(), - &trunk.hash().unwrap(), - ) - .value - }; - - if root_hash != self.expected_root_hash { - return Err(Error::OldChunkRestoringError(format!( - "Proof did not match expected hash\n\tExpected: {:?}\n\tActual: {:?}", - self.expected_root_hash, - trunk.hash() + // chunk structure verified, next verify root hash + if &tree.hash().unwrap() != expected_root_hash { + return Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + "chunk doesn't match expected root hash", ))); } - let root_key = trunk.key().to_vec(); - - let trunk_height = height / 2; - self.trunk_height = Some(trunk_height); - - let chunks_remaining = if trunk_height >= MIN_TRUNK_HEIGHT { - let leaf_hashes = trunk - .layer(trunk_height) - .map(|node| node.hash().unwrap()) - .collect::>() - .into_iter() - .peekable(); - self.leaf_hashes = Some(leaf_hashes); - - let parent_keys = trunk - .layer(trunk_height - 1) - .map(|node| node.key().to_vec()) - .collect::>>() - .into_iter() - .peekable(); - self.parent_keys = Some(parent_keys); - assert_eq!( - self.parent_keys.as_ref().unwrap().len(), - self.leaf_hashes.as_ref().unwrap().len() / 2 - ); - - let chunks_remaining = (2_usize).pow(trunk_height as u32); - assert_eq!(self.remaining_chunks_unchecked(), chunks_remaining); - chunks_remaining - } else { - self.leaf_hashes = Some(vec![].into_iter().peekable()); - self.parent_keys = Some(vec![].into_iter().peekable()); - 0 - }; - - // note that these writes don't happen atomically, which is fine here - // because if anything fails during the restore process we will just - // scrap the whole restore and start over - self.write_chunk(trunk)?; - self.merk.set_base_root_key(Some(root_key)).unwrap()?; - - Ok(chunks_remaining) + Ok(tree) } - /// Verifies a leaf chunk then writes it to the RocksDB. This needs to be - /// called in order, retrying the last chunk for any failed verifications. - fn process_leaf(&mut self, ops: impl IntoIterator) -> Result { - let leaf_hashes = self.leaf_hashes.as_mut().unwrap(); - let leaf_hash = leaf_hashes - .peek() - .expect("Received more chunks than expected"); - - let leaf = verify_leaf(ops.into_iter().map(Ok), *leaf_hash).unwrap()?; - self.rewrite_parent_link(&leaf)?; - self.write_chunk(leaf)?; - - let leaf_hashes = self.leaf_hashes.as_mut().unwrap(); - leaf_hashes.next(); + /// Write the verified chunk to storage + fn write_chunk( + &mut self, + chunk_tree: ProofTree, + traversal_instruction: &mut Vec, + ) -> Result, Error> { + // this contains all the elements we want to write to storage + let mut batch = self.merk.storage.new_batch(); + let mut new_chunk_ids = Vec::new(); + + chunk_tree.visit_refs_track_traversal_and_parent( + traversal_instruction, + None, + &mut |proof_node, node_traversal_instruction, parent_key| { + match &proof_node.node { + Node::KVValueHashFeatureType(key, value, value_hash, feature_type) => { + // build tree from node value + let mut tree = Tree::new_with_value_hash( + key.clone(), + value.clone(), + value_hash.clone(), + *feature_type, + ) + .unwrap(); + + // update tree links + *tree.slot_mut(LEFT) = proof_node.left.as_ref().map(Child::as_link); + *tree.slot_mut(RIGHT) = proof_node.right.as_ref().map(Child::as_link); + + // encode the node and add it to the batch + let bytes = tree.encode(); + + batch.put(key, &bytes, None, None).map_err(CostsError) + } + Node::Hash(hash) => { + // the node hash points to the root of another chunk + // we get the chunk id and add the hash to restorer state + let chunk_id = traversal_instruction_as_string(node_traversal_instruction); + new_chunk_ids.push(chunk_id.clone()); + self.chunk_id_to_root_hash + .insert(chunk_id.clone(), hash.clone()); + // TODO: handle unwrap + self.parent_keys + .insert(chunk_id, parent_key.unwrap().to_owned()); + Ok(()) + } + _ => { + // we do nothing for other node types + // technically verify chunk will be called before this + // as such this should be be reached + Ok(()) + } + } + }, + )?; + + // write the batch + self.merk + .storage + .commit_batch(batch) + .unwrap() + .map_err(StorageError)?; - Ok(self.remaining_chunks_unchecked()) + Ok(new_chunk_ids) } - /// The parent of the root node of the leaf does not know the key of its - /// children when it is first written. Now that we have verified this leaf, - /// we can write the key into the parent node's entry. Note that this does - /// not need to recalcuate hashes since it already had the child hash. - fn rewrite_parent_link(&mut self, leaf: &ProofTree) -> Result<(), Error> { - let parent_keys = self.parent_keys.as_mut().unwrap(); - let parent_key = parent_keys.peek().unwrap().clone(); - let mut parent = crate::merk::fetch_node(&self.merk.storage, parent_key.as_slice())? - .expect("Could not find parent of leaf chunk"); - - let is_left_child = self.remaining_chunks_unchecked() % 2 == 0; - if let Some(Link::Reference { ref mut key, .. }) = parent.link_mut(is_left_child) { - *key = leaf.key().to_vec(); - } else { - panic!("Expected parent links to be type Link::Reference"); - }; + /// When we process truncated chunks, the parents of Node::Hash have invalid + /// placeholder for links. + /// When we get the actual chunk associated with the Node::Hash, + /// we need to update the parent link to reflect the correct data. + fn rewrite_parent_link( + &mut self, + chunk_id: &str, + traversal_instruction: &[bool], + chunk_tree: &ProofTree, + ) -> Result<(), Error> { + let parent_key = self + .parent_keys + .get(chunk_id) + .ok_or(Error::ChunkRestoringError(InternalError( + "after successful chunk verification parent key should exist", + )))?; + + let mut parent = merk::fetch_node(&self.merk.storage, parent_key.as_slice())?.ok_or( + Error::ChunkRestoringError(InternalError( + "cannot find expected parent in memory, most likely state corruption issue", + )), + )?; + + let is_left = traversal_instruction + .last() + .expect("rewrite is only called when traversal_instruction is not empty"); + + let updated_key = chunk_tree.key(); + let updated_sum = chunk_tree.sum(); + + if let Some(Link::Reference { key, sum, .. }) = parent.link_mut(is_left.clone()) { + *key = updated_key.to_vec(); + *sum = updated_sum; + } let parent_bytes = parent.encode(); self.merk @@ -281,56 +291,59 @@ impl<'db, S: StorageContext<'db>> Restorer { .unwrap() .map_err(StorageError)?; - if !is_left_child { - let parent_keys = self.parent_keys.as_mut().unwrap(); - parent_keys.next(); - } + self.parent_keys + .remove(chunk_id) + .expect("confirmed parent key exists above"); Ok(()) } - fn rewrite_trunk_child_heights(&mut self) -> Result<(), Error> { - fn recurse<'s, 'db, S: StorageContext<'db>>( - mut node: RefWalker>, - remaining_depth: usize, + /// Each nodes height is not added to state as such the producer could lie + /// about the height values after replication we need to verify the + /// heights and if invalid recompute the correct values + fn rewrite_heights(&mut self) -> Result<(), Error> { + fn rewrite_child_heights<'s, 'db, S: StorageContext<'db>>( + mut walker: RefWalker>, batch: &mut >::Batch, ) -> Result<(u8, u8), Error> { - if remaining_depth == 0 { - return Ok(node.tree().child_heights()); - } + // TODO: remove unwrap + let mut cloned_node = Tree::decode( + walker.tree().key().to_vec(), + walker.tree().encode().as_slice(), + ) + .unwrap(); - let mut cloned_node = - Tree::decode(node.tree().key().to_vec(), node.tree().encode().as_slice()) - .map_err(EdError)?; + let mut left_height = 0; + let mut right_height = 0; - let left_child = node.walk(true).unwrap()?.unwrap(); - let left_child_heights = recurse(left_child, remaining_depth - 1, batch)?; - let left_height = left_child_heights.0.max(left_child_heights.1) + 1; - *cloned_node.link_mut(true).unwrap().child_heights_mut() = left_child_heights; + if let Some(left_walker) = walker.walk(LEFT).unwrap()? { + let left_child_heights = rewrite_child_heights(left_walker, batch)?; + left_height = left_child_heights.0.max(left_child_heights.1) + 1; + *cloned_node.link_mut(LEFT).unwrap().child_heights_mut() = left_child_heights; + } - let right_child = node.walk(false).unwrap()?.unwrap(); - let right_child_heights = recurse(right_child, remaining_depth - 1, batch)?; - let right_height = right_child_heights.0.max(right_child_heights.1) + 1; - *cloned_node.link_mut(false).unwrap().child_heights_mut() = right_child_heights; + if let Some(right_walker) = walker.walk(RIGHT).unwrap()? { + let right_child_heights = rewrite_child_heights(right_walker, batch)?; + right_height = right_child_heights.0.max(right_child_heights.1) + 1; + *cloned_node.link_mut(RIGHT).unwrap().child_heights_mut() = right_child_heights; + } let bytes = cloned_node.encode(); batch - .put(node.tree().key(), &bytes, None, None) + .put(walker.tree().key(), &bytes, None, None) .map_err(CostsError)?; - Ok((left_height, right_height)) + return Ok((left_height, right_height)); } - self.merk.load_base_root().unwrap()?; - let mut batch = self.merk.storage.new_batch(); + // TODO: deal with unwrap + let mut tree = self.merk.tree.take().unwrap(); + let mut walker = RefWalker::new(&mut tree, self.merk.source()); - let depth = self.trunk_height.unwrap(); - self.merk.use_tree_mut(|maybe_tree| { - let tree = maybe_tree.unwrap(); - let walker = RefWalker::new(tree, self.merk.source()); - recurse(walker, depth, &mut batch) - })?; + rewrite_child_heights(walker, &mut batch)?; + + self.merk.tree.set(Some(tree)); self.merk .storage @@ -339,72 +352,239 @@ impl<'db, S: StorageContext<'db>> Restorer { .map_err(StorageError) } - /// Returns the number of remaining chunks to be processed. This method will - /// panic if called before processing the first chunk (since that chunk - /// gives us the information to know how many chunks to expect). - pub fn remaining_chunks_unchecked(&self) -> usize { - self.leaf_hashes.as_ref().unwrap().len() - } -} + /// Rebuild restoration state from partial storage state + fn attempt_state_recovery(&mut self) -> Result<(), Error> { + // TODO: think about the return type some more + let (bad_link_map, parent_keys) = self.merk.verify(); + if !bad_link_map.is_empty() { + self.chunk_id_to_root_hash = bad_link_map; + self.parent_keys = parent_keys; + } -#[cfg(feature = "full")] -impl<'db, S: StorageContext<'db>> Merk { - /// Creates a new `Restorer`, which can be used to verify chunk proofs to - /// replicate an entire Merk tree. A new Merk instance will be initialized - /// by creating a RocksDB at `path`. - pub fn restore(merk: Merk, expected_root_hash: CryptoHash) -> Restorer { - Restorer::new(merk, None, expected_root_hash) + Ok(()) } -} -#[cfg(feature = "full")] -impl ProofTree { - fn child_heights(&self) -> (u8, u8) { - ( - self.left.as_ref().map_or(0, |c| c.tree.height as u8), - self.right.as_ref().map_or(0, |c| c.tree.height as u8), - ) + /// Consumes the `Restorer` and returns a newly created, fully populated + /// Merk instance. This method will return an error if called before + /// processing all chunks. + pub fn finalize(mut self) -> Result, Error> { + // ensure all chunks have been processed + if self.chunk_id_to_root_hash.len() != 0 || self.parent_keys.len() != 0 { + return Err(Error::ChunkRestoringError( + ChunkError::RestorationNotComplete, + )); + } + + // get the latest version of the root node + self.merk.load_base_root(); + + // if height values are wrong, rewrite height + if self.verify_height().is_err() { + self.rewrite_heights(); + // update the root node after height rewrite + self.merk.load_base_root(); + } + + if self.merk.verify().0.len() != 0 { + return Err(Error::ChunkRestoringError(ChunkError::InternalError( + "restored tree invalid", + ))); + } + + Ok(self.merk) } -} -#[cfg(feature = "full")] -impl Child { - fn as_link(&self) -> Link { - let key = match &self.tree.node { - Node::KV(key, _) - | Node::KVValueHash(key, ..) - | Node::KVValueHashFeatureType(key, ..) => key.as_slice(), - // for the connection between the trunk and leaf chunks, we don't - // have the child key so we must first write in an empty one. once - // the leaf gets verified, we can write in this key to its parent - _ => &[], + /// Verify that the child heights of the merk tree links correctly represent + /// the tree + fn verify_height(&self) -> Result<(), Error> { + let tree = self.merk.tree.take(); + let height_verification_result = if let Some(tree) = &tree { + self.verify_tree_height(&tree, tree.height()) + } else { + Ok(()) }; + self.merk.tree.set(tree); + height_verification_result + } + + fn verify_tree_height(&self, tree: &Tree, parent_height: u8) -> Result<(), Error> { + let (left_height, right_height) = tree.child_heights(); - Link::Reference { - hash: self.hash, - sum: None, - child_heights: self.tree.child_heights(), - key: key.to_vec(), + if (left_height.abs_diff(right_height)) > 1 { + return Err(Error::CorruptedState( + "invalid child heights, difference greater than 1 for AVL tree", + )); + } + + let max_child_height = left_height.max(right_height); + if parent_height <= max_child_height || parent_height - max_child_height != 1 { + return Err(Error::CorruptedState( + "invalid child heights, parent height is not 1 less than max child height", + )); + } + + let left_link = tree.link(LEFT); + let right_link = tree.link(RIGHT); + + if (left_height == 0 && left_link.is_some()) || (right_height == 0 && right_link.is_some()) + { + return Err(Error::CorruptedState( + "invalid child heights node has child height 0, but hash child", + )); } + + if let Some(link) = left_link { + let left_tree = link.tree(); + if left_tree.is_none() { + let left_tree = Tree::get(&self.merk.storage, link.key().to_vec()) + .unwrap()? + .ok_or(Error::CorruptedState("link points to non-existent node"))?; + self.verify_tree_height(&left_tree, left_height)?; + } else { + self.verify_tree_height(left_tree.unwrap(), left_height)?; + } + } + + if let Some(link) = right_link { + let right_tree = link.tree(); + if right_tree.is_none() { + let right_tree = Tree::get(&self.merk.storage, link.key().to_vec()) + .unwrap()? + .ok_or(Error::CorruptedState("link points to non-existent node"))?; + self.verify_tree_height(&right_tree, right_height)?; + } else { + self.verify_tree_height(right_tree.unwrap(), right_height)?; + } + } + + Ok(()) } } -#[cfg(feature = "full")] #[cfg(test)] mod tests { use grovedb_path::SubtreePath; use grovedb_storage::{ - rocksdb_storage::{test_utils::TempStorage, PrefixedRocksDbImmediateStorageContext}, + rocksdb_storage::{ + test_utils::TempStorage, PrefixedRocksDbImmediateStorageContext, + PrefixedRocksDbStorageContext, + }, RawIterator, Storage, }; use super::*; - use crate::{test_utils::*, tree::Op, MerkBatch}; + use crate::{ + execute_proof, + merk::chunks::ChunkProducer, + proofs::{ + chunk::{ + chunk::tests::traverse_get_node_hash, chunk_op::ChunkOp::Chunk, + error::ChunkError::InvalidChunkProof, + }, + Query, + }, + test_utils::{make_batch_seq, TempMerk}, + Error::ChunkRestoringError, + KVIterator, Merk, PanicSource, + }; + + #[test] + fn test_chunk_verification_non_avl_tree() { + let non_avl_tree_proof = vec![ + Op::Push(Node::KV(vec![1], vec![1])), + Op::Push(Node::KV(vec![2], vec![2])), + Op::Parent, + Op::Push(Node::KV(vec![3], vec![3])), + Op::Parent, + ]; + assert!(Restorer::::verify_chunk( + non_avl_tree_proof, + &[0; 32] + ) + .is_err()); + } + + #[test] + fn test_chunk_verification_only_kv_feature_and_hash() { + // should not accept kv + let invalid_chunk_proof = vec![Op::Push(Node::KV(vec![1], vec![1]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvhash + let invalid_chunk_proof = vec![Op::Push(Node::KVHash([0; 32]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvdigest + let invalid_chunk_proof = vec![Op::Push(Node::KVDigest(vec![0], [0; 32]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvvaluehash + let invalid_chunk_proof = vec![Op::Push(Node::KVValueHash(vec![0], vec![0], [0; 32]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvrefvaluehash + let invalid_chunk_proof = vec![Op::Push(Node::KVRefValueHash(vec![0], vec![0], [0; 32]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + } + + fn get_node_hash(node: Node) -> Result { + match node { + Node::Hash(hash) => Ok(hash), + _ => Err("expected node hash".to_string()), + } + } + + #[test] + fn test_process_chunk_correct_chunk_id_map() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut merk_tree = merk.tree.take().expect("should have inner tree"); + merk.tree.set(Some(merk_tree.clone())); + let mut tree_walker = RefWalker::new(&mut merk_tree, PanicSource {}); - fn restore_test(batches: &[&MerkBatch>], expected_nodes: usize) { let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut original = Merk::open_base( + let mut restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -412,77 +592,136 @@ mod tests { ) .unwrap() .unwrap(); - for batch in batches { - original - .apply::, Vec<_>>(batch, &[], None) - .unwrap() - .unwrap(); - } - let chunks = original.chunks_old().unwrap(); + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - let storage = TempStorage::default(); - let _tx2 = storage.start_transaction(); - let ctx = storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(); - let merk = Merk::open_base(ctx, false).unwrap().unwrap(); - let mut restorer = Merk::restore(merk, original.root_hash().unwrap()); - - assert_eq!(restorer.remaining_chunks(), None); - - let mut expected_remaining = chunks.len(); - for chunk in chunks { - let remaining = restorer.process_chunk(chunk.unwrap()).unwrap(); + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); - expected_remaining -= 1; - assert_eq!(remaining, expected_remaining); - assert_eq!(restorer.remaining_chunks().unwrap(), expected_remaining); - } - assert_eq!(expected_remaining, 0); + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); - let restored = restorer.finalize().unwrap(); - assert_eq!(restored.root_hash(), original.root_hash()); - assert_raw_db_entries_eq(&restored, &original, expected_nodes); - } + // initial restorer state should contain just the root hash of the source merk + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); - #[test] - fn restore_10000() { - restore_test(&[&make_batch_seq(0..10_000)], 10_000); - } + // generate first chunk + let (chunk, _) = chunk_producer.chunk_with_index(1).unwrap(); + // apply first chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![]), chunk) + .expect("should process chunk successfully"); + assert_eq!(new_chunk_ids.len(), 4); + + // after first chunk application + // the chunk_map should contain 4 items + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + // assert all the chunk hash values + assert_eq!( + restorer.chunk_id_to_root_hash.get("11"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[LEFT, LEFT])).unwrap()) + .as_ref() + ); + assert_eq!( + restorer.chunk_id_to_root_hash.get("10"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])).unwrap()) + .as_ref() + ); + assert_eq!( + restorer.chunk_id_to_root_hash.get("01"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT])).unwrap()) + .as_ref() + ); + assert_eq!( + restorer.chunk_id_to_root_hash.get("00"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[RIGHT, RIGHT])).unwrap()) + .as_ref() + ); - #[test] - fn restore_3() { - restore_test(&[&make_batch_seq(0..3)], 3); - } + // generate second chunk + let (chunk, _) = chunk_producer.chunk_with_index(2).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![LEFT, LEFT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 3); + assert_eq!(restorer.chunk_id_to_root_hash.get("11"), None); + + // let's try to apply the second chunk again, should not work + let (chunk, _) = chunk_producer.chunk_with_index(2).unwrap(); + // apply second chunk + let chunk_process_result = + restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, LEFT]), chunk); + assert_eq!(chunk_process_result.is_err(), true); + assert!(matches!( + chunk_process_result, + Err(Error::ChunkRestoringError(ChunkError::UnexpectedChunk)) + )); + + // next let's get a random but expected chunk and work with that e.g. chunk 4 + // but let's apply it to the wrong place + let (chunk, _) = chunk_producer.chunk_with_index(4).unwrap(); + let chunk_process_result = + restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, RIGHT]), chunk); + assert_eq!(chunk_process_result.is_err(), true); + assert!(matches!( + chunk_process_result, + Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + .. + ))) + )); + + // correctly apply chunk 5 + let (chunk, _) = chunk_producer.chunk_with_index(5).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![RIGHT, RIGHT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 2); + assert_eq!(restorer.chunk_id_to_root_hash.get("00"), None); + + // correctly apply chunk 3 + let (chunk, _) = chunk_producer.chunk_with_index(3).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![LEFT, RIGHT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!(restorer.chunk_id_to_root_hash.get("10"), None); + + // correctly apply chunk 4 + let (chunk, _) = chunk_producer.chunk_with_index(4).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![RIGHT, LEFT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.get("01"), None); - #[test] - fn restore_2_left_heavy() { - restore_test( - &[ - &[(vec![0], Op::Put(vec![], BasicMerk))], - &[(vec![1], Op::Put(vec![], BasicMerk))], - ], - 2, - ); - } + // finalize merk + let mut restored_merk = restorer.finalize().expect("should finalized successfully"); - #[test] - fn restore_2_right_heavy() { - restore_test( - &[ - &[(vec![1], Op::Put(vec![], BasicMerk))], - &[(vec![0], Op::Put(vec![], BasicMerk))], - ], - 2, + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() ); } - #[test] - fn restore_1() { - restore_test(&[&make_batch_seq(0..1)], 1); - } - fn assert_raw_db_entries_eq( restored: &Merk, original: &Merk, @@ -497,7 +736,10 @@ mod tests { let mut i = 0; loop { - assert_eq!(restored_entries.valid(), original_entries.valid()); + assert_eq!( + restored_entries.valid().unwrap(), + original_entries.valid().unwrap() + ); if !restored_entries.valid().unwrap() { break; } @@ -513,4 +755,466 @@ mod tests { assert_eq!(i, length); } + + // Builds a source merk with batch_size number of elements + // attempts restoration on some empty merk + // verifies that restoration was performed correctly. + fn test_restoration_single_chunk_strategy(batch_size: u64) { + // build the source merk + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut source_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + let batch = make_batch_seq(0..batch_size); + source_merk + .apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + + // build the restoration merk + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + + // at the start + // restoration merk should have empty root hash + // and source merk should have a different root hash + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + assert_ne!( + source_merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + // instantiate chunk producer and restorer + let mut chunk_producer = + ChunkProducer::new(&source_merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap()); + + // perform chunk production and processing + let mut chunk_id_opt = Some("".to_string()); + while let Some(chunk_id) = chunk_id_opt { + let (chunk, next_chunk_id) = chunk_producer + .chunk(chunk_id.as_str()) + .expect("should get chunk"); + restorer + .process_chunk(chunk_id.to_string(), chunk) + .expect("should process chunk successfully"); + chunk_id_opt = next_chunk_id; + } + + // after chunk processing we should be able to finalize + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + let restored_merk = restorer.finalize().expect("should finalize"); + + // compare root hash values + assert_eq!( + source_merk.root_hash().unwrap(), + restored_merk.root_hash().unwrap() + ); + + assert_raw_db_entries_eq(&restored_merk, &source_merk, batch_size as usize); + } + + #[test] + fn restore_single_chunk_20() { + test_restoration_single_chunk_strategy(20); + } + + #[test] + fn restore_single_chunk_1000() { + test_restoration_single_chunk_strategy(1000); + } + + #[test] + fn test_process_multi_chunk_no_limit() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // generate multi chunk from root with no limit + let chunk = chunk_producer + .multi_chunk_with_limit("", None) + .expect("should generate multichunk"); + + assert_eq!(chunk.chunk.len(), 2); + assert_eq!(chunk.next_index, None); + assert_eq!(chunk.remaining_limit, None); + + let next_ids = restorer + .process_multi_chunk(chunk.chunk) + .expect("should process chunk"); + // should have replicated all chunks + assert_eq!(next_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + + let restored_merk = restorer.finalize().expect("should be able to finalize"); + + // compare root hash values + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + #[test] + fn test_process_multi_chunk_no_limit_but_non_root() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // first restore the first chunk + let (chunk, next_chunk_index) = chunk_producer.chunk_with_index(1).unwrap(); + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![]), chunk) + .expect("should process chunk"); + assert_eq!(new_chunk_ids.len(), 4); + assert_eq!(next_chunk_index, Some(2)); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // generate multi chunk from the 2nd chunk with no limit + let multi_chunk = chunk_producer + .multi_chunk_with_limit_and_index(next_chunk_index.unwrap(), None) + .unwrap(); + // tree of height 4 has 5 chunks + // we have restored the first leaving 4 chunks + // each chunk has an extra chunk id, since they are disjoint + // hence the size of the multi chunk should be 8 + assert_eq!(multi_chunk.chunk.len(), 8); + let new_chunk_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + + let restored_merk = restorer.finalize().expect("should be able to finalize"); + + // compare root hash values + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + #[test] + fn test_process_multi_chunk_with_limit() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + // build multi chunk with with limit of 325 + let multi_chunk = chunk_producer + .multi_chunk_with_limit("", Some(600)) + .unwrap(); + // should only contain the first chunk + assert_eq!(multi_chunk.chunk.len(), 2); + // should point to chunk 2 + assert_eq!(multi_chunk.next_index, Some("11".to_string())); + let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + assert_eq!(next_ids.len(), 4); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // subsequent chunks are of size 321 + // with limit just above 642 should get 2 chunks (2 and 3) + // disjoint, so multi chunk len should be 4 + let multi_chunk = chunk_producer + .multi_chunk_with_limit(multi_chunk.next_index.unwrap().as_str(), Some(645)) + .unwrap(); + assert_eq!(multi_chunk.chunk.len(), 4); + assert_eq!(multi_chunk.next_index, Some("01".to_string())); + let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + // chunks 2 and 3 are leaf chunks + assert_eq!(next_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 2); + assert_eq!(restorer.parent_keys.len(), 2); + + // get the last 2 chunks + let multi_chunk = chunk_producer + .multi_chunk_with_limit(multi_chunk.next_index.unwrap().as_str(), Some(645)) + .unwrap(); + assert_eq!(multi_chunk.chunk.len(), 4); + assert_eq!(multi_chunk.next_index, None); + let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + // chunks 2 and 3 are leaf chunks + assert_eq!(next_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + + // finalize merk + let restored_merk = restorer.finalize().unwrap(); + + // compare root hash values + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + // Builds a source merk with batch_size number of elements + // attempts restoration on some empty merk, with multi chunks + // verifies that restoration was performed correctly. + fn test_restoration_multi_chunk_strategy(batch_size: u64, limit: Option) { + // build the source merk + let mut source_merk = TempMerk::new(); + let batch = make_batch_seq(0..batch_size); + source_merk + .apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + + // build the restoration merk + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + + // at the start + // restoration merk should have empty root hash + // and source merk should have a different root hash + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + assert_ne!( + source_merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + // instantiate chunk producer and restorer + let mut chunk_producer = + ChunkProducer::new(&source_merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap()); + + // perform chunk production and processing + let mut chunk_id_opt = Some("".to_string()); + while let Some(chunk_id) = chunk_id_opt { + let multi_chunk = chunk_producer + .multi_chunk_with_limit(chunk_id.as_str(), limit.clone()) + .expect("should get chunk"); + restorer + .process_multi_chunk(multi_chunk.chunk) + .expect("should process chunk successfully"); + chunk_id_opt = multi_chunk.next_index; + } + + // after chunk processing we should be able to finalize + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + let restored_merk = restorer.finalize().expect("should finalize"); + + // compare root hash values + assert_eq!( + source_merk.root_hash().unwrap(), + restored_merk.root_hash().unwrap() + ); + } + + #[test] + fn restore_multi_chunk_20_no_limit() { + test_restoration_multi_chunk_strategy(20, None); + } + + #[test] + #[should_panic] + fn restore_multi_chunk_20_tiny_limit() { + test_restoration_multi_chunk_strategy(20, Some(1)); + } + + #[test] + fn restore_multi_chunk_20_limit() { + test_restoration_multi_chunk_strategy(20, Some(1200)); + } + + #[test] + fn restore_multi_chunk_10000_limit() { + test_restoration_multi_chunk_strategy(10000, Some(1200)); + } + + #[test] + fn test_restoration_interruption() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // first restore the first chunk + let (chunk, next_chunk_index) = chunk_producer.chunk_with_index(1).unwrap(); + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![]), chunk) + .expect("should process chunk"); + assert_eq!(new_chunk_ids.len(), 4); + assert_eq!(next_chunk_index, Some(2)); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // store old state for later reference + let old_chunk_id_to_root_hash = restorer.chunk_id_to_root_hash.clone(); + let old_parent_keys = restorer.parent_keys.clone(); + + // drop the restorer and the restoration merk + drop(restorer); + // open the restoration merk again and build a restorer from it + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + // assert the state of the restorer + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!(restorer.parent_keys.len(), 0); + + // recover state + let recovery_attempt = restorer.attempt_state_recovery(); + assert_eq!(recovery_attempt.is_ok(), true); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // assert equality to old state + assert_eq!(old_chunk_id_to_root_hash, restorer.chunk_id_to_root_hash); + assert_eq!(old_parent_keys, restorer.parent_keys); + } } diff --git a/merk/src/merk/restore2.rs b/merk/src/merk/restore2.rs deleted file mode 100644 index 084f3759..00000000 --- a/merk/src/merk/restore2.rs +++ /dev/null @@ -1,195 +0,0 @@ -// TODO: add license - -//! Provides `Restorer`, which can create a replica of a Merk instance by -//! receiving chunk proofs. - -use std::collections::BTreeMap; - -use grovedb_storage::{Batch, StorageContext}; - -use crate::{ - merk::MerkSource, - proofs::{ - chunk::{ - chunk_op::ChunkOp, - error::ChunkError, - util::{traversal_instruction_as_string, write_to_vec}, - }, - tree::{execute, Child, Tree as ProofTree}, - Node, - }, - tree::{RefWalker, Tree}, - CryptoHash, Error, - Error::{CostsError, EdError, StorageError}, - Link, Merk, - TreeFeatureType::BasicMerk, -}; - -// TODO: add documentation -pub struct Restorer { - merk: Merk, - chunk_id_to_root_hash: BTreeMap, -} - -impl<'db, S: StorageContext<'db>> Restorer { - // TODO: add documenation - pub fn new(merk: Merk, expected_root_hash: CryptoHash) -> Self { - let mut chunk_id_to_root_hash = BTreeMap::new(); - chunk_id_to_root_hash.insert(traversal_instruction_as_string(vec![]), expected_root_hash); - - Self { - merk, - chunk_id_to_root_hash, - } - } - - // TODO: add documentation - // what does the restorer process? - // it should be able to process single chunks, subtree chunks and multi chunks - // right? or just one of them? - // I think it should process just multi chunk at least for now - pub fn process_multi_chunk( - &mut self, - chunk: impl IntoIterator, - ) -> Result<(), Error> { - // chunk id, chunk - // we use the chunk id to know what to verify against - let mut chunks = chunk.into_iter(); - - // TODO: clean this up, make external function that peeks and asserts - let chunk_id_string = if let Some(ChunkOp::ChunkId(chunk_id)) = chunks.next() { - traversal_instruction_as_string(chunk_id) - } else { - return Err(Error::ChunkRestoringError(ChunkError::ExpectedChunkId)); - }; - - // TODO: deal with unwrap - let expected_root_hash = self.chunk_id_to_root_hash.get(&chunk_id_string).unwrap(); - dbg!(expected_root_hash); - - if let Some(ChunkOp::Chunk(chunk)) = chunks.next() { - // todo: deal with error - let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .unwrap(); - debug_assert!(tree.hash().unwrap() == *expected_root_hash); - dbg!("yayy"); - self.write_chunk(tree); - } else { - return Err(Error::ChunkRestoringError(ChunkError::ExpectedChunk)); - } - - Ok(()) - } - - /// Writes the data contained in `tree` (extracted from a verified chunk - /// proof) to the RocksDB. - fn write_chunk(&mut self, tree: ProofTree) -> Result<(), Error> { - let mut batch = self.merk.storage.new_batch(); - - tree.visit_refs(&mut |proof_node| { - if let Some((mut node, key)) = match &proof_node.node { - Node::KV(key, value) => Some(( - Tree::new(key.clone(), value.clone(), None, BasicMerk).unwrap(), - key, - )), - Node::KVValueHash(key, value, value_hash) => Some(( - Tree::new_with_value_hash(key.clone(), value.clone(), *value_hash, BasicMerk) - .unwrap(), - key, - )), - Node::KVValueHashFeatureType(key, value, value_hash, feature_type) => Some(( - Tree::new_with_value_hash( - key.clone(), - value.clone(), - *value_hash, - *feature_type, - ) - .unwrap(), - key, - )), - _ => None, - } { - // TODO: encode tree node without cloning key/value - // *node.slot_mut(true) = proof_node.left.as_ref().map(Child::as_link); - // *node.slot_mut(false) = proof_node.right.as_ref().map(Child::as_link); - - let bytes = node.encode(); - batch.put(key, &bytes, None, None).map_err(CostsError) - } else { - Ok(()) - } - })?; - - self.merk - .storage - .commit_batch(batch) - .unwrap() - .map_err(StorageError) - } -} - -#[cfg(test)] -mod tests { - use grovedb_path::SubtreePath; - use grovedb_storage::{rocksdb_storage::test_utils::TempStorage, Storage}; - - use super::*; - use crate::{merk::chunks2::ChunkProducer, test_utils::make_batch_seq, Merk}; - - #[test] - fn restoration_test() { - // Create source merk and populate - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut original = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - ) - .unwrap() - .unwrap(); - let batch = make_batch_seq(0..15); - original - .apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(original.height(), Some(4)); - - // Create to be restored merk - let storage = TempStorage::new(); - let tx2 = storage.start_transaction(); - let restored_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx2) - .unwrap(), - false, - ) - .unwrap() - .unwrap(); - assert_eq!(restored_merk.height(), None); - - // assert initial conditions - assert_ne!( - original.root_hash().unwrap(), - restored_merk.root_hash().unwrap() - ); - - // Perform Restoration - let mut chunk_producer = - ChunkProducer::new(&original).expect("should create chunk producer"); - - let mut restorer = Restorer::new(restored_merk, original.root_hash().unwrap()); - - let chunk = chunk_producer - .multi_chunk_with_limit(1, None) - .expect("should generate chunk"); - - assert_eq!(chunk.next_index, None); - assert_eq!(chunk.remaining_limit, None); - assert_eq!(chunk.chunk.len(), 2); - - restorer.process_multi_chunk(chunk.chunk).unwrap(); - } -} diff --git a/merk/src/proofs/chunk.rs b/merk/src/proofs/chunk.rs index d5ef376c..28114876 100644 --- a/merk/src/proofs/chunk.rs +++ b/merk/src/proofs/chunk.rs @@ -53,605 +53,8 @@ use crate::{ mod binary_range; #[cfg(feature = "full")] -// TODO: remove from here -pub mod chunk2; -#[cfg(feature = "full")] -// TODO: remove from here -pub mod util; -// TODO: remove from here -pub mod error; -// TODO: remove from here +pub mod chunk; pub mod chunk_op; - -/// The minimum number of layers the trunk will be guaranteed to have before -/// splitting into multiple chunks. If the tree's height is less than double -/// this value, the trunk should be verified as a leaf chunk. -#[cfg(feature = "full")] -pub const MIN_TRUNK_HEIGHT: usize = 5; - -#[cfg(feature = "full")] -impl<'a, S> RefWalker<'a, S> -where - S: Fetch + Sized + Clone, -{ - /// Generates a trunk proof by traversing the tree. - /// - /// Returns a tuple containing the produced proof, and a boolean indicating - /// whether or not there will be more chunks to follow. If the chunk - /// contains the entire tree, the boolean will be `false`, if the chunk - /// is abridged and will be connected to leaf chunks, it will be `true`. - pub fn create_trunk_proof(&mut self) -> CostResult<(Vec, bool), Error> { - let approx_size = 2usize.pow((self.tree().height() / 2) as u32) * 3; - let mut proof = Vec::with_capacity(approx_size); - - self.traverse_for_height_proof(&mut proof, 1) - .flat_map_ok(|trunk_height| { - if trunk_height < MIN_TRUNK_HEIGHT { - proof.clear(); - self.traverse_for_trunk(&mut proof, usize::MAX, true) - .map_ok(|_| Ok((proof, false))) - } else { - self.traverse_for_trunk(&mut proof, trunk_height, true) - .map_ok(|_| Ok((proof, true))) - } - }) - .flatten() - } - - /// Traverses down the left edge of the tree and pushes ops to the proof, to - /// act as a proof of the height of the tree. This is the first step in - /// generating a trunk proof. - fn traverse_for_height_proof( - &mut self, - proof: &mut Vec, - depth: usize, - ) -> CostResult { - let mut cost = OperationCost::default(); - let maybe_left = match self.walk(true).unwrap_add_cost(&mut cost) { - Ok(maybe_left) => maybe_left, - Err(e) => { - return Err(e).wrap_with_cost(cost); - } - }; - let has_left_child = maybe_left.is_some(); - - let trunk_height = if let Some(mut left) = maybe_left { - match left - .traverse_for_height_proof(proof, depth + 1) - .unwrap_add_cost(&mut cost) - { - Ok(x) => x, - Err(e) => return Err(e).wrap_with_cost(cost), - } - } else { - depth / 2 - }; - - if depth > trunk_height { - proof.push(Op::Push(self.to_kvhash_node())); - - if has_left_child { - proof.push(Op::Parent); - } - - if let Some(right) = self.tree().link(false) { - proof.push(Op::Push(Node::Hash(*right.hash()))); - proof.push(Op::Child); - } - } - - Ok(trunk_height).wrap_with_cost(cost) - } - - /// Traverses down the tree and adds KV push ops for all nodes up to a - /// certain depth. This expects the proof to contain a height proof as - /// generated by `traverse_for_height_proof`. - fn traverse_for_trunk( - &mut self, - proof: &mut Vec, - remaining_depth: usize, - is_leftmost: bool, - ) -> CostResult<(), Error> { - let mut cost = OperationCost::default(); - - if remaining_depth == 0 { - // return early if we have reached bottom of trunk - - // for leftmost node, we already have height proof - if is_leftmost { - return Ok(()).wrap_with_cost(cost); - } - - // add this node's hash - proof.push(Op::Push(self.to_hash_node().unwrap_add_cost(&mut cost))); - - return Ok(()).wrap_with_cost(cost); - } - - // traverse left - let has_left_child = self.tree().link(true).is_some(); - if has_left_child { - let mut left = cost_return_on_error!(&mut cost, self.walk(true)).unwrap(); - cost_return_on_error!( - &mut cost, - left.traverse_for_trunk(proof, remaining_depth - 1, is_leftmost) - ); - } - - // add this node's data - proof.push(Op::Push(self.to_kv_value_hash_feature_type_node())); - - if has_left_child { - proof.push(Op::Parent); - } - - // traverse right - if let Some(mut right) = cost_return_on_error!(&mut cost, self.walk(false)) { - cost_return_on_error!( - &mut cost, - right.traverse_for_trunk(proof, remaining_depth - 1, false) - ); - proof.push(Op::Child); - } - - Ok(()).wrap_with_cost(cost) - } -} - -/// Builds a chunk proof by iterating over values in a RocksDB, ending the chunk -/// when a node with key `end_key` is encountered. -/// -/// Advances the iterator for all nodes in the chunk and the `end_key` (if any). -#[cfg(feature = "full")] -pub(crate) fn get_next_chunk( - iter: &mut impl RawIterator, - end_key: Option<&[u8]>, -) -> CostResult, Error> { - let mut cost = OperationCost::default(); - - let mut chunk = Vec::with_capacity(512); - let mut stack = Vec::with_capacity(32); - let mut node = Tree::new(vec![], vec![], None, BasicMerk).unwrap_add_cost(&mut cost); - - while iter.valid().unwrap_add_cost(&mut cost) { - let key = iter.key().unwrap_add_cost(&mut cost).unwrap(); - - if let Some(end_key) = end_key { - if key == end_key { - break; - } - } - - let encoded_node = iter.value().unwrap_add_cost(&mut cost).unwrap(); - cost_return_on_error_no_add!( - &cost, - Tree::decode_into(&mut node, vec![], encoded_node).map_err(EdError) - ); - - // TODO: Only use the KVValueHash if needed, saves 32 bytes - // only needed when dealing with references and trees - let kv = Node::KVValueHashFeatureType( - key.to_vec(), - node.value_ref().to_vec(), - *node.value_hash(), - node.feature_type(), - ); - - chunk.push(Op::Push(kv)); - - if node.link(true).is_some() { - chunk.push(Op::Parent); - } - - if let Some(child) = node.link(false) { - stack.push(child.key().to_vec()); - } else { - while let Some(top_key) = stack.last() { - if key < top_key.as_slice() { - break; - } - stack.pop(); - chunk.push(Op::Child); - } - } - - iter.next().unwrap_add_cost(&mut cost); - } - - if iter.valid().unwrap_add_cost(&mut cost) { - iter.next().unwrap_add_cost(&mut cost); - } - - Ok(chunk).wrap_with_cost(cost) -} - -/// Verifies a leaf chunk proof by executing its operators. Checks that there -/// were no abridged nodes (Hash or KVHash) and the proof hashes to -/// `expected_hash`. -#[cfg(feature = "full")] -#[allow(dead_code)] // TODO: remove when proofs will be enabled -pub(crate) fn verify_leaf>>( - ops: I, - expected_hash: CryptoHash, -) -> CostResult { - execute(ops, false, |node| match node { - Node::KVValueHash(..) | Node::KV(..) | Node::KVValueHashFeatureType(..) => Ok(()), - _ => Err(Error::OldChunkRestoringError( - "Leaf chunks must contain full subtree".to_string(), - )), - }) - .flat_map_ok(|tree| { - tree.hash().map(|hash| { - if hash != expected_hash { - Error::OldChunkRestoringError(format!( - "Leaf chunk proof did not match expected hash\n\tExpected: {:?}\n\tActual: \ - {:?}", - expected_hash, - tree.hash() - )); - } - Ok(tree) - }) - }) -} - -/// Verifies a trunk chunk proof by executing its operators. Ensures the -/// resulting tree contains a valid height proof, the trunk is the correct -/// height, and all of its inner nodes are not abridged. Returns the tree and -/// the height given by the height proof. -#[cfg(feature = "full")] -pub(crate) fn verify_trunk>>( - ops: I, -) -> CostResult<(ProofTree, usize), Error> { - let mut cost = OperationCost::default(); - - fn verify_height_proof(tree: &ProofTree) -> Result { - Ok(match tree.child(true) { - Some(child) => { - if let Node::Hash(_) = child.tree.node { - return Err(Error::OldChunkRestoringError( - "Expected height proof to only contain KV and KVHash nodes".to_string(), - )); - } - verify_height_proof(&child.tree)? + 1 - } - None => 1, - }) - } - - fn verify_completeness( - tree: &ProofTree, - remaining_depth: usize, - leftmost: bool, - ) -> Result<(), Error> { - let recurse = |left, leftmost| { - if let Some(child) = tree.child(left) { - verify_completeness(&child.tree, remaining_depth - 1, left && leftmost)?; - } - Ok(()) - }; - - if remaining_depth > 0 { - match tree.node { - Node::KVValueHash(..) | Node::KV(..) | Node::KVValueHashFeatureType(..) => {} - _ => { - return Err(Error::OldChunkRestoringError( - "Expected trunk inner nodes to contain keys and values".to_string(), - )) - } - } - recurse(true, leftmost)?; - recurse(false, false) - } else if !leftmost { - match tree.node { - Node::Hash(_) => Ok(()), - _ => Err(Error::OldChunkRestoringError( - "Expected trunk leaves to contain Hash nodes".to_string(), - )), - } - } else { - match &tree.node { - Node::KVHash(_) => Ok(()), - _ => Err(Error::OldChunkRestoringError( - "Expected leftmost trunk leaf to contain KVHash node".to_string(), - )), - } - } - } - - let mut kv_only = true; - let tree = cost_return_on_error!( - &mut cost, - execute(ops, false, |node| { - kv_only &= matches!(node, Node::KVValueHash(..)) - || matches!(node, Node::KV(..)) - || matches!(node, Node::KVValueHashFeatureType(..)); - Ok(()) - }) - ); - - let height = cost_return_on_error_no_add!(&cost, verify_height_proof(&tree)); - let trunk_height = height / 2; - - if trunk_height < MIN_TRUNK_HEIGHT { - if !kv_only { - return Err(Error::OldChunkRestoringError( - "Leaf chunks must contain full subtree".to_string(), - )) - .wrap_with_cost(cost); - } - } else { - cost_return_on_error_no_add!(&cost, verify_completeness(&tree, trunk_height, true)); - } - - Ok((tree, height)).wrap_with_cost(cost) -} - +pub mod error; #[cfg(feature = "full")] -#[cfg(test)] -mod tests { - use std::usize; - - use grovedb_costs::storage_cost::removal::StorageRemovedBytes::NoStorageRemoval; - use grovedb_storage::StorageContext; - - use super::{super::tree::Tree, *}; - use crate::{ - test_utils::*, - tree::{NoopCommit, PanicSource, Tree as BaseTree}, - }; - - #[derive(Default)] - struct NodeCounts { - hash: usize, - kv_hash: usize, - kv: usize, - kv_value_hash: usize, - kv_digest: usize, - kv_ref_value_hash: usize, - kv_value_hash_feature_type: usize, - } - - fn count_node_types(tree: Tree) -> NodeCounts { - let mut counts = NodeCounts::default(); - - tree.visit_nodes(&mut |node| { - match node { - Node::Hash(_) => counts.hash += 1, - Node::KVHash(_) => counts.kv_hash += 1, - Node::KV(..) => counts.kv += 1, - Node::KVValueHash(..) => counts.kv_value_hash += 1, - Node::KVDigest(..) => counts.kv_digest += 1, - Node::KVRefValueHash(..) => counts.kv_ref_value_hash += 1, - Node::KVValueHashFeatureType(..) => counts.kv_value_hash_feature_type += 1, - }; - }); - - counts - } - - #[test] - fn small_trunk_roundtrip() { - let mut tree = make_tree_seq(31); - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(!has_more); - - // println!("{:?}", &proof); - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - - let counts = count_node_types(trunk); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_value_hash_feature_type, 32); - assert_eq!(counts.kv_hash, 0); - } - - #[test] - fn big_trunk_roundtrip() { - let mut tree = make_tree_seq(2u64.pow(MIN_TRUNK_HEIGHT as u32 * 2 + 1) - 1); - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(has_more); - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - - let counts = count_node_types(trunk); - // are these formulas correct for all values of `MIN_TRUNK_HEIGHT`? 🤔 - assert_eq!( - counts.hash, - 2usize.pow(MIN_TRUNK_HEIGHT as u32) + MIN_TRUNK_HEIGHT - 1 - ); - assert_eq!( - counts.kv_value_hash_feature_type, - 2usize.pow(MIN_TRUNK_HEIGHT as u32) - 1 - ); - assert_eq!(counts.kv_hash, MIN_TRUNK_HEIGHT + 1); - } - - #[test] - fn one_node_tree_trunk_roundtrip() { - let mut tree = BaseTree::new(vec![0], vec![], None, BasicMerk).unwrap(); - tree.commit( - &mut NoopCommit {}, - &|_, _| Ok(0), - &mut |_, _, _| Ok((false, None)), - &mut |_, _, _| Ok((NoStorageRemoval, NoStorageRemoval)), - ) - .unwrap() - .unwrap(); - - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(!has_more); - - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - let counts = count_node_types(trunk); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_value_hash_feature_type, 1); - assert_eq!(counts.kv_hash, 0); - } - - #[test] - fn two_node_right_heavy_tree_trunk_roundtrip() { - // 0 - // \ - // 1 - let mut tree = BaseTree::new(vec![0], vec![], None, BasicMerk) - .unwrap() - .attach( - false, - Some(BaseTree::new(vec![1], vec![], None, BasicMerk).unwrap()), - ); - tree.commit( - &mut NoopCommit {}, - &|_, _| Ok(0), - &mut |_, _, _| Ok((false, None)), - &mut |_, _, _| Ok((NoStorageRemoval, NoStorageRemoval)), - ) - .unwrap() - .unwrap(); - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(!has_more); - - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - let counts = count_node_types(trunk); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_value_hash_feature_type, 2); - assert_eq!(counts.kv_hash, 0); - } - - #[test] - fn two_node_left_heavy_tree_trunk_roundtrip() { - // 1 - // / - // 0 - let mut tree = BaseTree::new(vec![1], vec![], None, BasicMerk) - .unwrap() - .attach( - true, - Some(BaseTree::new(vec![0], vec![], None, BasicMerk).unwrap()), - ); - tree.commit( - &mut NoopCommit {}, - &|_, _| Ok(0), - &mut |_, _, _| Ok((false, None)), - &mut |_, _, _| Ok((NoStorageRemoval, NoStorageRemoval)), - ) - .unwrap() - .unwrap(); - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(!has_more); - - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - let counts = count_node_types(trunk); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_value_hash_feature_type, 2); - assert_eq!(counts.kv_hash, 0); - } - - #[test] - fn three_node_tree_trunk_roundtrip() { - // 1 - // / \ - // 0 2 - let mut tree = BaseTree::new(vec![1], vec![], None, BasicMerk) - .unwrap() - .attach( - true, - Some(BaseTree::new(vec![0], vec![], None, BasicMerk).unwrap()), - ) - .attach( - false, - Some(BaseTree::new(vec![2], vec![], None, BasicMerk).unwrap()), - ); - tree.commit( - &mut NoopCommit {}, - &|_, _| Ok(0), - &mut |_, _, _| Ok((false, None)), - &mut |_, _, _| Ok((NoStorageRemoval, NoStorageRemoval)), - ) - .unwrap() - .unwrap(); - - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(!has_more); - - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - let counts = count_node_types(trunk); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_value_hash_feature_type, 3); - assert_eq!(counts.kv_hash, 0); - } - - #[test] - fn leaf_chunk_roundtrip() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..31); - merk.apply::<_, Vec<_>>(batch.as_slice(), &[], None) - .unwrap() - .unwrap(); - - merk.commit(); - - let root_node = merk.tree.take(); - let root_key = root_node.as_ref().unwrap().key().to_vec(); - merk.tree.set(root_node); - - // whole tree as 1 leaf - let mut iter = merk.storage.raw_iter(); - iter.seek_to_first().unwrap(); - let chunk = get_next_chunk(&mut iter, None).unwrap().unwrap(); - let ops = chunk.into_iter().map(Ok); - let chunk = verify_leaf(ops, merk.root_hash().unwrap()) - .unwrap() - .unwrap(); - let counts = count_node_types(chunk); - assert_eq!(counts.kv_value_hash_feature_type, 31); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_hash, 0); - drop(iter); - - let mut iter = merk.storage.raw_iter(); - iter.seek_to_first().unwrap(); - - // left leaf - let chunk = get_next_chunk(&mut iter, Some(root_key.as_slice())) - .unwrap() - .unwrap(); - let ops = chunk.into_iter().map(Ok); - let chunk = verify_leaf( - ops, - [ - 78, 230, 25, 188, 163, 2, 169, 185, 254, 174, 196, 206, 162, 187, 245, 188, 74, 70, - 220, 160, 35, 78, 120, 122, 61, 90, 241, 105, 35, 180, 133, 98, - ], - ) - .unwrap() - .unwrap(); - let counts = count_node_types(chunk); - assert_eq!(counts.kv_value_hash_feature_type, 15); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_hash, 0); - - // right leaf - let chunk = get_next_chunk(&mut iter, None).unwrap().unwrap(); - let ops = chunk.into_iter().map(Ok); - let chunk = verify_leaf( - ops, - [ - 21, 147, 223, 29, 106, 19, 23, 38, 233, 134, 245, 44, 246, 179, 48, 19, 111, 50, - 19, 191, 134, 37, 165, 5, 35, 111, 233, 213, 212, 5, 92, 45, - ], - ) - .unwrap() - .unwrap(); - let counts = count_node_types(chunk); - assert_eq!(counts.kv_value_hash_feature_type, 15); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_hash, 0); - } -} +pub mod util; diff --git a/merk/src/proofs/chunk/binary_range.rs b/merk/src/proofs/chunk/binary_range.rs index 350c9718..01a20531 100644 --- a/merk/src/proofs/chunk/binary_range.rs +++ b/merk/src/proofs/chunk/binary_range.rs @@ -1,3 +1,31 @@ +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + const LEFT: bool = true; const RIGHT: bool = false; diff --git a/merk/src/proofs/chunk/chunk2.rs b/merk/src/proofs/chunk/chunk.rs similarity index 94% rename from merk/src/proofs/chunk/chunk2.rs rename to merk/src/proofs/chunk/chunk.rs index a6072871..95c686b7 100644 --- a/merk/src/proofs/chunk/chunk2.rs +++ b/merk/src/proofs/chunk/chunk.rs @@ -1,3 +1,31 @@ +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + use grovedb_costs::{cost_return_on_error, CostResult, CostsExt, OperationCost}; // TODO: add copyright comment @@ -170,7 +198,7 @@ pub mod tests { use crate::{ proofs::{ - chunk::chunk2::{verify_height_proof, LEFT, RIGHT}, + chunk::chunk::{verify_height_proof, LEFT, RIGHT}, tree::execute, Node, Op, Op::Parent, diff --git a/merk/src/proofs/chunk/chunk_op.rs b/merk/src/proofs/chunk/chunk_op.rs index 29687932..9402d3d5 100644 --- a/merk/src/proofs/chunk/chunk_op.rs +++ b/merk/src/proofs/chunk/chunk_op.rs @@ -1,3 +1,31 @@ +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + use std::io::{Read, Write}; use ed::{Decode, Encode}; @@ -98,7 +126,7 @@ mod test { use crate::proofs::{ chunk::{ - chunk2::{LEFT, RIGHT}, + chunk::{LEFT, RIGHT}, chunk_op::ChunkOp, }, Node, Op, diff --git a/merk/src/proofs/chunk/error.rs b/merk/src/proofs/chunk/error.rs index 0c926203..bd482666 100644 --- a/merk/src/proofs/chunk/error.rs +++ b/merk/src/proofs/chunk/error.rs @@ -1,3 +1,31 @@ +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + #[derive(Debug, thiserror::Error)] /// Chunk related errors pub enum ChunkError { @@ -25,6 +53,25 @@ pub enum ChunkError { #[error("expected chunk when parsing chunk op")] ExpectedChunk, + // Restoration Errors + /// Chunk restoration starts from the root chunk, this lead to a set of + /// root hash values to verify other chunks .... + /// Hence before you can verify a child you need to have verified it's + /// parent. + #[error("unexpected chunk: cannot verify chunk because verification hash is not in memory")] + UnexpectedChunk, + + /// Invalid chunk proof when verifying chunk + #[error("invalid chunk proof: {0}")] + InvalidChunkProof(&'static str), + + /// Invalid multi chunk + #[error("invalid multi chunk: {0}")] + InvalidMultiChunk(&'static str), + + #[error("called finalize too early still expecting chunks")] + RestorationNotComplete, + /// Internal error, this should never surface /// if it does, it means wrong assumption in code #[error("internal error {0}")] diff --git a/merk/src/proofs/chunk/util.rs b/merk/src/proofs/chunk/util.rs index 1094e50f..3e430acf 100644 --- a/merk/src/proofs/chunk/util.rs +++ b/merk/src/proofs/chunk/util.rs @@ -1,33 +1,59 @@ -// TODO: add MIT License -// TODO: add module description +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +//! Collection of state independent algorithms needed for facilitate chunk +//! production and restoration use std::io::Write; // TODO: figure out better nomenclature use crate::{proofs::chunk::binary_range::BinaryRange, Error}; -use crate::{proofs::chunk::error::ChunkError, Error::InternalError}; - -// TODO: add documentation -fn chunk_height_per_layer(height: usize) -> Vec { - // every chunk has a fixed height of 2 - // it is possible for a chunk to not reach full capacity - let mut two_count = height / 2; - if height % 2 != 0 { - two_count += 1; - } - - return vec![2; two_count]; -} +use crate::{ + proofs::chunk::{ + chunk::{LEFT, RIGHT}, + error::{ChunkError, ChunkError::BadTraversalInstruction}, + }, + Error::InternalError, +}; /// Represents the height as a linear combination of 3 amd 2 /// of the form 3x + 2y /// this breaks the tree into layers of height 3 or 2 /// the minimum chunk height is 2, so if tree height is less than 2 /// we just return a single layer of height 2 -fn chunk_height_per_layer_lin_comb(height: usize) -> Vec { +fn chunk_height_per_layer(height: usize) -> Vec { let mut two_count = 0; let mut three_count = height / 3; + if height == 0 { + return vec![]; + } + // minimum chunk height is 2, if tree height is less than 2 // return a single layer with chunk height 2 if height < 2 { @@ -200,17 +226,142 @@ pub fn generate_traversal_instruction(height: usize, chunk_id: usize) -> Result< return Ok(instructions); } +/// Determine the chunk id given the traversal instruction and the max height of +/// the tree +pub fn chunk_id_from_traversal_instruction( + traversal_instruction: &[bool], + height: usize, +) -> Result { + // empty traversal instruction points to the first chunk + if traversal_instruction.is_empty() { + return Ok(1); + } + + let mut chunk_count = number_of_chunks(height); + let mut current_chunk_id = 1; + + let mut layer_heights = chunk_height_per_layer(height); + let last_layer_height = layer_heights.pop().expect("confirmed not empty"); + + // traversal instructions should only point to the root node of chunks (chunk + // boundaries) the layer heights represent the height of each chunk layer + // the last chunk layer is at height = total_height - last_chunk_height + 1 + // traversal instructions require 1 less than height to address it + // e.g. height 1 is represented by [] - len of 0 + // height 2 is represented by [left] or [right] len of 1 + // therefore last chunk root node is address with total_height - + // last_chunk_height + if traversal_instruction.len() > height - last_layer_height { + return Err(Error::ChunkingError(BadTraversalInstruction( + "traversal instruction should not address nodes past the root of the last layer chunks", + ))); + } + + // verify that the traversal instruction points to a chunk boundary + let mut traversal_length = traversal_instruction.len(); + let mut relevant_layer_heights = vec![]; + for layer_height in layer_heights { + // the traversal_length should be a perfect sum of a subset of the layer_height + // if the traversal_length is not 0, it should be larger than or equal to the + // next layer height. + if traversal_length < layer_height { + return Err(Error::ChunkingError(BadTraversalInstruction( + "traversal instruction should point to a chunk boundary", + ))); + } + + traversal_length -= layer_height; + relevant_layer_heights.push(layer_height); + + if traversal_length == 0 { + break; + } + } + + // take layer_height instructions and determine the updated chunk id + let mut start_index = 0; + for layer_height in relevant_layer_heights { + let end_index = start_index + layer_height; + let subset_instructions = &traversal_instruction[start_index..end_index]; + + // offset multiplier determines what subchunk we are on based on the given + // instruction offset multiplier just converts the binary instruction to + // decimal, taking left as 0 and right as 0 i.e [left, left, left] = 0 + // means we are at subchunk 0 + let mut offset_multiplier = 0; + for (i, instruction) in subset_instructions.iter().enumerate() { + offset_multiplier += 2_usize.pow((subset_instructions.len() - i - 1) as u32) + * (1 - *instruction as usize); + } + + if chunk_count % 2 != 0 { + // remove the current chunk from the chunk count + chunk_count = chunk_count - 1; + } + + chunk_count = chunk_count / exit_node_count(layer_height); + + current_chunk_id = current_chunk_id + offset_multiplier as usize * chunk_count + 1; + + start_index = end_index; + } + + Ok(current_chunk_id) +} + +/// Determine the chunk id given the traversal instruction and the max height of +/// the tree. This can recover from traversal instructions not pointing to a +/// chunk boundary, in such a case, it backtracks until it hits a chunk +/// boundary. +pub fn chunk_id_from_traversal_instruction_with_recovery( + traversal_instruction: &[bool], + height: usize, +) -> Result { + let chunk_id_result = chunk_id_from_traversal_instruction(traversal_instruction, height); + if chunk_id_result.is_err() { + return chunk_id_from_traversal_instruction_with_recovery( + &traversal_instruction[0..traversal_instruction.len() - 1], + height, + ); + } + return chunk_id_result; +} + +/// Generate instruction for traversing to a given chunk in a binary tree, +/// returns string representation +pub fn generate_traversal_instruction_as_string( + height: usize, + chunk_id: usize, +) -> Result { + let instruction = generate_traversal_instruction(height, chunk_id)?; + Ok(traversal_instruction_as_string(&instruction)) +} + /// Convert traversal instruction to byte string -/// 1 represents left -/// 0 represents right -pub fn traversal_instruction_as_string(instruction: Vec) -> String { +/// 1 represents left (true) +/// 0 represents right (false) +pub fn traversal_instruction_as_string(instruction: &Vec) -> String { instruction .iter() .map(|v| if *v { "1" } else { "0" }) .collect() } -// TODO: move this to a better file +/// Converts a string that represents a traversal instruction +/// to a vec of bool, true = left and false = right +pub fn string_as_traversal_instruction(instruction_string: &str) -> Result, Error> { + instruction_string + .chars() + .map(|char| match char { + '1' => Ok(LEFT), + '0' => Ok(RIGHT), + _ => Err(Error::ChunkingError(ChunkError::BadTraversalInstruction( + "failed to parse instruction string", + ))), + }) + .collect() +} + pub fn write_to_vec(dest: &mut W, value: &[u8]) -> Result<(), Error> { dest.write_all(value) .map_err(|_e| InternalError("failed to write to vector")) @@ -221,17 +372,17 @@ mod test { use byteorder::LE; use super::*; - use crate::proofs::chunk::chunk2::{LEFT, RIGHT}; + use crate::proofs::chunk::chunk::{LEFT, RIGHT}; #[test] fn test_chunk_height_per_layer() { let layer_heights = chunk_height_per_layer(10); assert_eq!(layer_heights.iter().sum::(), 10); - assert_eq!(layer_heights, [2, 2, 2, 2, 2]); + assert_eq!(layer_heights, [3, 3, 2, 2]); let layer_heights = chunk_height_per_layer(45); - assert_eq!(layer_heights.iter().sum::(), 46); - assert_eq!(layer_heights, [2; 23]); + assert_eq!(layer_heights.iter().sum::(), 45); + assert_eq!(layer_heights, [3; 15]); let layer_heights = chunk_height_per_layer(2); assert_eq!(layer_heights.iter().sum::(), 2); @@ -271,23 +422,20 @@ mod test { // hence total chunk count = 1 + 4 = 5 assert_eq!(number_of_chunks(4), 5); - // tree with height 6 should have 21 chunks - // will be split into three layers of chunk height 2 = [2,2,2] - // first chunk takes 1, has 2^2 = 4 exit nodes - // second chunk takes 4 with each having 2^2 exit nodes - // total exit from second chunk = 4 * 4 = 16 - // total chunks = 1 + 4 + 16 = 21 - assert_eq!(number_of_chunks(6), 21); + // tree with height 6 should have 9 chunks + // will be split into two layers of chunk height 3 = [3,3] + // first chunk takes 1, has 2^3 = 8 exit nodes + // total chunks = 1 + 8 = 9 + assert_eq!(number_of_chunks(6), 9); // tree with height 10 should have 341 chunks - // will be split into 5 layers = [2,2,2,2,2] - // first layer has just 1 chunk, exit nodes = 2^2 = 4 - // second layer has 4 chunks, exit nodes = 2^2 * 4 = 16 - // third layer has 16 chunks, exit nodes = 2^2 * 16 = 64 - // fourth layer has 64 chunks, exit nodes = 2^2 * 64 = 256 - // fifth layer has 256 chunks - // total chunks = 1 + 4 + 16 + 64 + 256 = 341 chunks - assert_eq!(number_of_chunks(10), 341); + // will be split into 5 layers = [3, 3, 2, 2] + // first layer has just 1 chunk, exit nodes = 2^3 = 8 + // second layer has 4 chunks, exit nodes = 2^3 * 8 = 64 + // third layer has 16 chunks, exit nodes = 2^2 * 64 = 256 + // fourth layer has 256 chunks + // total chunks = 1 + 8 + 64 + 256 = 329 chunks + assert_eq!(number_of_chunks(10), 329); } #[test] @@ -307,28 +455,26 @@ mod test { assert_eq!(number_of_chunks_under_chunk_id(4, 4).unwrap(), 1); assert_eq!(number_of_chunks_under_chunk_id(4, 5).unwrap(), 1); - // tree with height 10 should have 341 chunks - // layer_heights = [2, 2, 2, 2, 2] - // chunk_id 1 = 341 - // chunk_id 2 = 85 i.e (341 - 1) / 2^2 - // chunk_id 3 = 21 i.e (85 - 1) / 2^2 - // chunk_id 4 = 5 i.e (21 - 1) / 2^2 - // chunk_id 5 = 1 i.e (5 - 1) / 2^2 - // chunk_id 6 = 1 on the same layer as 5 - // chunk_id 87 = 85 as chunk 87 should wrap back to the same layer as chunk_id 2 - // chunk_id 88 = mirrors chunk_id 3 - // chunk_id 89 = mirrors chunk_id 4 - // chunk_id 90 = mirrors chunk_id 5 - assert_eq!(number_of_chunks_under_chunk_id(10, 1).unwrap(), 341); - assert_eq!(number_of_chunks_under_chunk_id(10, 2).unwrap(), 85); - assert_eq!(number_of_chunks_under_chunk_id(10, 3).unwrap(), 21); - assert_eq!(number_of_chunks_under_chunk_id(10, 4).unwrap(), 5); + // tree with height 10 should have 329 chunks + // layer_heights = [3, 3, 2, 2] + // chunk_id 1 = 329 + // chunk_id 2 = 41 i.e (329 - 1) / 2^3 + // chunk_id 3 = 5 i.e (41 - 1) / 2^3 + // chunk_id 4 = 1 i.e (5 - 1) / 2^2 + // chunk_id 5 = 1 on the same layer as 4 + // chunk_id 43 = 41 as chunk 43 should wrap back to the same layer as chunk_id 2 + // chunk_id 44 = mirrors chunk_id 3 + // chunk_id 45 = mirrors chunk_id 4 + // chunk_id 46 = mirrors chunk_id 5 + assert_eq!(number_of_chunks_under_chunk_id(10, 1).unwrap(), 329); + assert_eq!(number_of_chunks_under_chunk_id(10, 2).unwrap(), 41); + assert_eq!(number_of_chunks_under_chunk_id(10, 3).unwrap(), 5); + assert_eq!(number_of_chunks_under_chunk_id(10, 4).unwrap(), 1); assert_eq!(number_of_chunks_under_chunk_id(10, 5).unwrap(), 1); - assert_eq!(number_of_chunks_under_chunk_id(10, 6).unwrap(), 1); - assert_eq!(number_of_chunks_under_chunk_id(10, 87).unwrap(), 85); - assert_eq!(number_of_chunks_under_chunk_id(10, 88).unwrap(), 21); - assert_eq!(number_of_chunks_under_chunk_id(10, 89).unwrap(), 5); - assert_eq!(number_of_chunks_under_chunk_id(10, 90).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(10, 43).unwrap(), 41); + assert_eq!(number_of_chunks_under_chunk_id(10, 44).unwrap(), 5); + assert_eq!(number_of_chunks_under_chunk_id(10, 45).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(10, 46).unwrap(), 1); } #[test] @@ -396,24 +542,26 @@ mod test { fn test_chunk_height() { // tree of height 6 // all chunks have the same height - // since layer height = [2,2,2] - // we have 21 chunks in a tree of this height - for i in 1..=21 { - assert_eq!(chunk_height(6, i).unwrap(), 2); + // since layer height = [3,3] + // we have 9 chunks in a tree of this height + for i in 1..=9 { + assert_eq!(chunk_height(6, i).unwrap(), 3); } // tree of height 5 - // layer_height = [2, 2] - // we also have 21 chunks here - for i in 1..=21 { + // layer_height = [3, 2] + // we have 9 chunks, just the first chunk is of height 3 + // the rest are of height 2 + assert_eq!(chunk_height(5, 1).unwrap(), 3); + for i in 2..=9 { assert_eq!(chunk_height(5, i).unwrap(), 2); } // tree of height 10 - // layer_height = [3, 3, 3, 3] + // layer_height = [3, 3, 2, 2] // just going to check chunk 1 - 5 - assert_eq!(chunk_height(10, 1).unwrap(), 2); - assert_eq!(chunk_height(10, 2).unwrap(), 2); + assert_eq!(chunk_height(10, 1).unwrap(), 3); + assert_eq!(chunk_height(10, 2).unwrap(), 3); assert_eq!(chunk_height(10, 3).unwrap(), 2); assert_eq!(chunk_height(10, 4).unwrap(), 2); assert_eq!(chunk_height(10, 5).unwrap(), 2); @@ -421,12 +569,133 @@ mod test { #[test] fn test_traversal_instruction_as_string() { - assert_eq!(traversal_instruction_as_string(vec![]), ""); - assert_eq!(traversal_instruction_as_string(vec![LEFT]), "1"); - assert_eq!(traversal_instruction_as_string(vec![RIGHT]), "0"); + assert_eq!(traversal_instruction_as_string(&vec![]), ""); + assert_eq!(traversal_instruction_as_string(&vec![LEFT]), "1"); + assert_eq!(traversal_instruction_as_string(&vec![RIGHT]), "0"); assert_eq!( - traversal_instruction_as_string(vec![RIGHT, LEFT, LEFT, RIGHT]), + traversal_instruction_as_string(&vec![RIGHT, LEFT, LEFT, RIGHT]), "0110" ); } + + #[test] + fn test_instruction_string_to_traversal_instruction() { + assert_eq!(string_as_traversal_instruction("1").unwrap(), vec![LEFT]); + assert_eq!(string_as_traversal_instruction("0").unwrap(), vec![RIGHT]); + assert_eq!( + string_as_traversal_instruction("001").unwrap(), + vec![RIGHT, RIGHT, LEFT] + ); + assert_eq!(string_as_traversal_instruction("002").is_err(), true); + assert_eq!(string_as_traversal_instruction("").unwrap(), vec![]); + } + + #[test] + fn test_chunk_id_from_traversal_instruction() { + // tree of height 4 + let traversal_instruction = generate_traversal_instruction(4, 1).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 4).unwrap(), + 1 + ); + let traversal_instruction = generate_traversal_instruction(4, 2).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 4).unwrap(), + 2 + ); + let traversal_instruction = generate_traversal_instruction(4, 3).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 4).unwrap(), + 3 + ); + let traversal_instruction = generate_traversal_instruction(4, 4).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 4).unwrap(), + 4 + ); + + // tree of height 6 + let traversal_instruction = generate_traversal_instruction(6, 1).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 1 + ); + let traversal_instruction = generate_traversal_instruction(6, 2).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 2 + ); + let traversal_instruction = generate_traversal_instruction(6, 3).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 3 + ); + let traversal_instruction = generate_traversal_instruction(6, 4).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 4 + ); + let traversal_instruction = generate_traversal_instruction(6, 5).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 5 + ); + let traversal_instruction = generate_traversal_instruction(6, 6).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 6 + ); + let traversal_instruction = generate_traversal_instruction(6, 7).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 7 + ); + let traversal_instruction = generate_traversal_instruction(6, 8).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 8 + ); + let traversal_instruction = generate_traversal_instruction(6, 9).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 9 + ); + } + + #[test] + fn test_chunk_id_from_traversal_instruction_with_recovery() { + // tree of height 5 + // layer heights = [3, 2] + // first chunk boundary is at instruction len 0 e.g. [] + // second chunk boundary is at instruction len 3 e.g. [left, left, left] + // anything outside of this should return an error with regular chunk_id + // function with recovery we expect this to backtrack to the last chunk + // boundary e.g. [left] should backtrack to [] + // [left, left, right, left] should backtrack to [left, left, right] + assert_eq!( + chunk_id_from_traversal_instruction(&[LEFT], 5).is_err(), + true + ); + assert_eq!( + chunk_id_from_traversal_instruction_with_recovery(&[LEFT], 5).unwrap(), + 1 + ); + assert_eq!( + chunk_id_from_traversal_instruction_with_recovery(&[LEFT, LEFT], 5).unwrap(), + 1 + ); + assert_eq!( + chunk_id_from_traversal_instruction_with_recovery(&[LEFT, LEFT, RIGHT], 5).unwrap(), + 3 + ); + assert_eq!( + chunk_id_from_traversal_instruction_with_recovery(&[LEFT, LEFT, RIGHT, LEFT], 5) + .unwrap(), + 3 + ); + assert_eq!( + chunk_id_from_traversal_instruction_with_recovery(&[LEFT; 50], 5).unwrap(), + 2 + ); + } } diff --git a/merk/src/proofs/tree.rs b/merk/src/proofs/tree.rs index 819fd43b..99e84827 100644 --- a/merk/src/proofs/tree.rs +++ b/merk/src/proofs/tree.rs @@ -43,6 +43,11 @@ use super::{Node, Op}; use crate::tree::{combine_hash, kv_digest_to_kv_hash, kv_hash, node_hash, value_hash, NULL_HASH}; #[cfg(any(feature = "full", feature = "verify"))] use crate::{error::Error, tree::CryptoHash}; +use crate::{ + proofs::chunk::chunk::{LEFT, RIGHT}, + Link, + TreeFeatureType::SummedMerk, +}; #[cfg(any(feature = "full", feature = "verify"))] /// Contains a tree's child node and its hash. The hash can always be assumed to @@ -55,6 +60,35 @@ pub struct Child { pub hash: CryptoHash, } +impl Child { + pub fn as_link(&self) -> Link { + let (key, sum) = match &self.tree.node { + Node::KV(key, _) | Node::KVValueHash(key, ..) => (key.as_slice(), None), + Node::KVValueHashFeatureType(key, _, _, feature_type) => { + let sum_value = match feature_type { + SummedMerk(sum) => Some(sum.clone()), + _ => None, + }; + (key.as_slice(), sum_value) + } + // for the connection between the trunk and leaf chunks, we don't + // have the child key so we must first write in an empty one. once + // the leaf gets verified, we can write in this key to its parent + _ => (&[] as &[u8], None), + }; + + Link::Reference { + hash: self.hash, + sum, + child_heights: ( + self.tree.child_heights.0 as u8, + self.tree.child_heights.1 as u8, + ), + key: key.to_vec(), + } + } +} + #[cfg(any(feature = "full", feature = "verify"))] /// A binary tree data structure used to represent a select subset of a tree /// when verifying Merkle proofs. @@ -68,6 +102,8 @@ pub struct Tree { pub right: Option, /// Height pub height: usize, + /// Child Heights + pub child_heights: (usize, usize), } #[cfg(any(feature = "full", feature = "verify"))] @@ -79,6 +115,7 @@ impl From for Tree { left: None, right: None, height: 1, + child_heights: (0, 0), } } } @@ -167,6 +204,42 @@ impl Tree { Ok(()) } + #[cfg(feature = "full")] + /// Does an in-order traversal over references to all the nodes in the tree, + /// calling `visit_node` for each with the current traversal path. + pub fn visit_refs_track_traversal_and_parent< + F: FnMut(&Self, &mut Vec, Option<&[u8]>) -> Result<(), Error>, + >( + &self, + base_traversal_instruction: &mut Vec, + parent_key: Option<&[u8]>, + visit_node: &mut F, + ) -> Result<(), Error> { + if let Some(child) = &self.left { + base_traversal_instruction.push(LEFT); + child.tree.visit_refs_track_traversal_and_parent( + base_traversal_instruction, + Some(self.key()), + visit_node, + )?; + base_traversal_instruction.pop(); + } + + visit_node(self, base_traversal_instruction, parent_key)?; + + if let Some(child) = &self.right { + base_traversal_instruction.push(RIGHT); + child.tree.visit_refs_track_traversal_and_parent( + base_traversal_instruction, + Some(self.key()), + visit_node, + )?; + base_traversal_instruction.pop(); + } + + Ok(()) + } + /// Returns an immutable reference to the child on the given side, if any. #[cfg(any(feature = "full", feature = "verify"))] pub const fn child(&self, left: bool) -> Option<&Child> { @@ -202,6 +275,13 @@ impl Tree { self.height = self.height.max(child.height + 1); + // update child height + if left { + self.child_heights.0 = child.height; + } else { + self.child_heights.1 = child.height; + } + let hash = child.hash().unwrap_add_cost(&mut cost); let tree = Box::new(child); *self.child_mut(left) = Some(Child { tree, hash }); @@ -238,13 +318,24 @@ impl Tree { _ => panic!("Expected node to be type KV"), } } + + #[cfg(feature = "full")] + pub(crate) fn sum(&self) -> Option { + match self.node { + Node::KVValueHashFeatureType(.., feature_type) => match feature_type { + SummedMerk(sum) => Some(sum), + _ => None, + }, + _ => panic!("Expected node to be type KVValueHashFeatureType"), + } + } } #[cfg(feature = "full")] /// `LayerIter` iterates over the nodes in a `Tree` at a given depth. Nodes are /// visited in order. pub struct LayerIter<'a> { - stack: Vec<&'a Tree>, + stack: Vec<(&'a Tree, usize)>, depth: usize, } @@ -257,25 +348,9 @@ impl<'a> LayerIter<'a> { depth, }; - iter.traverse_to_start(tree, depth); + iter.stack.push((tree, 0)); iter } - - /// Builds up the stack by traversing through left children to the desired - /// depth. - fn traverse_to_start(&mut self, tree: &'a Tree, remaining_depth: usize) { - self.stack.push(tree); - - if remaining_depth == 0 { - return; - } - - if let Some(child) = tree.child(true) { - self.traverse_to_start(&child.tree, remaining_depth - 1) - } else { - panic!("Could not traverse to given layer") - } - } } #[cfg(feature = "full")] @@ -283,32 +358,21 @@ impl<'a> Iterator for LayerIter<'a> { type Item = &'a Tree; fn next(&mut self) -> Option { - let item = self.stack.pop(); - let mut popped = item; - - loop { - if self.stack.is_empty() { - return item; - } - - let parent = self.stack.last().unwrap(); - let left_child = parent.child(true).unwrap(); - let right_child = parent.child(false).unwrap(); - - if left_child.tree.as_ref() == popped.unwrap() { - self.stack.push(&right_child.tree); - - while self.stack.len() - 1 < self.depth { - let parent = self.stack.last().unwrap(); - let left_child = parent.child(true).unwrap(); - self.stack.push(&left_child.tree); + while !self.stack.is_empty() { + let (item, item_depth) = self.stack.pop().expect("confirmed not None"); + if item_depth != self.depth { + if let Some(right_child) = item.child(false) { + self.stack.push((&right_child.tree, item_depth + 1)) + } + if let Some(left_child) = item.child(true) { + self.stack.push((&left_child.tree, item_depth + 1)) } - - return item; } else { - popped = self.stack.pop(); + return Some(item); } } + + return None; } } @@ -471,7 +535,19 @@ where .wrap_with_cost(cost); } - Ok(stack.pop().unwrap()).wrap_with_cost(cost) + let tree = stack.pop().unwrap(); + + if tree.child_heights.0.max(tree.child_heights.1) + - tree.child_heights.0.min(tree.child_heights.1) + > 1 + { + return Err(Error::InvalidProofError( + "Expected proof to result in a valid avl tree".to_string(), + )) + .wrap_with_cost(cost); + } + + Ok(tree).wrap_with_cost(cost) } #[cfg(feature = "full")] @@ -555,4 +631,104 @@ mod test { } assert!(iter.next().is_none()); } + + #[test] + fn execute_non_avl_tree() { + let non_avl_tree_proof = vec![ + Op::Push(Node::KV(vec![1], vec![1])), + Op::Push(Node::KV(vec![2], vec![2])), + Op::Parent, + Op::Push(Node::KV(vec![3], vec![3])), + Op::Parent, + ]; + let execution_result = + execute(non_avl_tree_proof.into_iter().map(Ok), false, |_| Ok(())).unwrap(); + assert!(execution_result.is_err()); + } + + #[test] + fn child_to_link() { + let basic_merk_tree = vec![ + Op::Push(Node::KV(vec![1], vec![1])), + Op::Push(Node::KV(vec![2], vec![2])), + Op::Parent, + Op::Push(Node::KV(vec![3], vec![3])), + Op::Child, + ]; + let tree = execute(basic_merk_tree.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .unwrap(); + + let left_link = tree.left.as_ref().unwrap().as_link(); + let right_link = tree.right.as_ref().unwrap().as_link(); + + assert_eq!( + left_link, + Link::Reference { + hash: tree.left.as_ref().map(|node| node.hash).clone().unwrap(), + sum: None, + child_heights: (0, 0), + key: vec![1] + } + ); + + assert_eq!( + right_link, + Link::Reference { + hash: tree.right.as_ref().map(|node| node.hash).clone().unwrap(), + sum: None, + child_heights: (0, 0), + key: vec![3] + } + ); + + let sum_merk_tree = vec![ + Op::Push(Node::KVValueHashFeatureType( + vec![1], + vec![1], + [0; 32], + SummedMerk(3), + )), + Op::Push(Node::KVValueHashFeatureType( + vec![2], + vec![2], + [0; 32], + SummedMerk(1), + )), + Op::Parent, + Op::Push(Node::KVValueHashFeatureType( + vec![3], + vec![3], + [0; 32], + SummedMerk(1), + )), + Op::Child, + ]; + let tree = execute(sum_merk_tree.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .unwrap(); + + let left_link = tree.left.as_ref().unwrap().as_link(); + let right_link = tree.right.as_ref().unwrap().as_link(); + + assert_eq!( + left_link, + Link::Reference { + hash: tree.left.as_ref().map(|node| node.hash).clone().unwrap(), + sum: Some(3), + child_heights: (0, 0), + key: vec![1] + } + ); + + assert_eq!( + right_link, + Link::Reference { + hash: tree.right.as_ref().map(|node| node.hash).clone().unwrap(), + sum: Some(1), + child_heights: (0, 0), + key: vec![3] + } + ); + } } diff --git a/merk/src/tree/link.rs b/merk/src/tree/link.rs index 56d9f1b0..f1b4b9bd 100644 --- a/merk/src/tree/link.rs +++ b/merk/src/tree/link.rs @@ -46,7 +46,7 @@ use crate::HASH_LENGTH_U32; #[cfg(feature = "full")] /// Represents a reference to a child tree node. Links may or may not contain /// the child's `Tree` instance (storing its key if not). -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub enum Link { /// Represents a child tree node which has been pruned from memory, only /// retaining a reference to it (its key). The child node can always be diff --git a/merk/src/tree/mod.rs b/merk/src/tree/mod.rs index c497b893..e9ac2799 100644 --- a/merk/src/tree/mod.rs +++ b/merk/src/tree/mod.rs @@ -98,7 +98,7 @@ use crate::{error::Error, Error::Overflow}; #[cfg(feature = "full")] /// The fields of the `Tree` type, stored on the heap. -#[derive(Clone, Encode, Decode, Debug)] +#[derive(Clone, Encode, Decode, Debug, PartialEq)] pub struct TreeInner { pub(crate) left: Option, pub(crate) right: Option, @@ -137,7 +137,7 @@ impl Terminated for Box {} /// Trees' inner fields are stored on the heap so that nodes can recursively /// link to each other, and so we can detach nodes from their parents, then /// reattach without allocating or freeing heap memory. -#[derive(Clone)] +#[derive(Clone, PartialEq)] pub struct Tree { pub(crate) inner: Box, pub(crate) old_size_with_parent_to_child_hook: u32, From fcfe5ae5ff8e7d49c996b1d3c06a8acf237e88c9 Mon Sep 17 00:00:00 2001 From: Wisdom Ogwu Date: Mon, 2 Oct 2023 08:34:57 +0100 Subject: [PATCH 3/7] wip --- grovedb/src/lib.rs | 2 - grovedb/src/replication.rs | 989 ------------------------------------- merk/src/merk/mod.rs | 9 - 3 files changed, 1000 deletions(-) delete mode 100644 grovedb/src/replication.rs diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 23840455..0c3c7413 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -215,8 +215,6 @@ use grovedb_storage::{Storage, StorageContext}; use grovedb_visualize::DebugByteVectors; #[cfg(any(feature = "full", feature = "verify"))] pub use query::{PathQuery, SizedQuery}; -// #[cfg(feature = "full")] -// pub use replication::{BufferedRestorer, Restorer, SiblingsChunkProducer, SubtreeChunkProducer}; #[cfg(any(feature = "full", feature = "verify"))] pub use crate::error::Error; diff --git a/grovedb/src/replication.rs b/grovedb/src/replication.rs deleted file mode 100644 index e97f7820..00000000 --- a/grovedb/src/replication.rs +++ /dev/null @@ -1,989 +0,0 @@ -// // MIT LICENSE -// // -// // Copyright (c) 2021 Dash Core Group -// // -// // Permission is hereby granted, free of charge, to any -// // person obtaining a copy of this software and associated -// // documentation files (the "Software"), to deal in the -// // Software without restriction, including without -// // limitation the rights to use, copy, modify, merge, -// // publish, distribute, sublicense, and/or sell copies of -// // the Software, and to permit persons to whom the Software -// // is furnished to do so, subject to the following -// // conditions: -// // -// // The above copyright notice and this permission notice -// // shall be included in all copies or substantial portions -// // of the Software. -// // -// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -// // ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -// // TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -// // PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -// // SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -// // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -// // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -// // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// // DEALINGS IN THE SOFTWARE. -// -// //! Replication -// -// use std::{ -// collections::VecDeque, -// iter::{empty, once}, -// }; -// -// use grovedb_merk::{ -// proofs::{Node, Op}, -// Merk, TreeFeatureType, -// }; -// use grovedb_path::SubtreePath; -// use grovedb_storage::{ -// rocksdb_storage::{PrefixedRocksDbImmediateStorageContext, PrefixedRocksDbStorageContext}, -// Storage, StorageContext, -// }; -// -// use crate::{Element, Error, GroveDb, Hash, Transaction}; -// -// const OPS_PER_CHUNK: usize = 128; -// -// impl GroveDb { -// /// Creates a chunk producer to replicate GroveDb. -// pub fn chunks(&self) -> SubtreeChunkProducer { -// SubtreeChunkProducer::new(self) -// } -// } -// -// /// Subtree chunks producer. -// pub struct SubtreeChunkProducer<'db> { -// grove_db: &'db GroveDb, -// cache: Option>, -// } -// -// struct SubtreeChunkProducerCache<'db> { -// current_merk_path: Vec>, -// current_merk: Merk>, -// // This needed to be an `Option` because it requires a reference on Merk but it's within the -// // same struct and during struct init a referenced Merk would be moved inside a struct, -// // using `Option` this init happens in two steps. -// current_chunk_producer: -// Option>>, -// } -// -// impl<'db> SubtreeChunkProducer<'db> { -// fn new(storage: &'db GroveDb) -> Self { -// SubtreeChunkProducer { -// grove_db: storage, -// cache: None, -// } -// } -// -// /// Chunks in current producer -// pub fn chunks_in_current_producer(&self) -> usize { -// self.cache -// .as_ref() -// .and_then(|c| c.current_chunk_producer.as_ref().map(|p| p.len())) -// .unwrap_or(0) -// } -// -// /// Get chunk -// pub fn get_chunk<'p, P>(&mut self, path: P, index: usize) -> Result, Error> -// where -// P: IntoIterator, -//

::IntoIter: Clone + DoubleEndedIterator, -// { -// let path_iter = path.into_iter(); -// -// if let Some(SubtreeChunkProducerCache { -// current_merk_path, .. -// }) = &self.cache -// { -// if !itertools::equal(current_merk_path, path_iter.clone()) { -// self.cache = None; -// } -// } -// -// if self.cache.is_none() { -// let current_merk = self -// .grove_db -// .open_non_transactional_merk_at_path( -// path_iter.clone().collect::>().as_slice().into(), -// None, -// ) -// .unwrap()?; -// -// if current_merk.root_key().is_none() { -// return Ok(Vec::new()); -// } -// -// self.cache = Some(SubtreeChunkProducerCache { -// current_merk_path: path_iter.map(|p| p.to_vec()).collect(), -// current_merk, -// current_chunk_producer: None, -// }); -// let cache = self.cache.as_mut().expect("exists at this point"); -// cache.current_chunk_producer = Some( -// grovedb_merk::ChunkProducer::new(&cache.current_merk) -// .map_err(|e| Error::CorruptedData(e.to_string()))?, -// ); -// } -// -// self.cache -// .as_mut() -// .expect("must exist at this point") -// .current_chunk_producer -// .as_mut() -// .expect("must exist at this point") -// .chunk(index) -// .map_err(|e| Error::CorruptedData(e.to_string())) -// } -// } -// -// // TODO: make generic over storage_cost context -// type MerkRestorer<'db> = grovedb_merk::Restorer>; -// -// type Path = Vec>; -// -// /// Structure to drive GroveDb restore process. -// pub struct Restorer<'db> { -// current_merk_restorer: Option>, -// current_merk_chunk_index: usize, -// current_merk_path: Path, -// queue: VecDeque<(Path, Vec, Hash, TreeFeatureType)>, -// grove_db: &'db GroveDb, -// tx: &'db Transaction<'db>, -// } -// -// /// Indicates what next piece of information `Restorer` expects or wraps a -// /// successful result. -// #[derive(Debug)] -// pub enum RestorerResponse { -// AwaitNextChunk { path: Vec>, index: usize }, -// Ready, -// } -// -// #[derive(Debug)] -// pub struct RestorerError(String); -// -// impl<'db> Restorer<'db> { -// /// Create a GroveDb restorer using a backing storage_cost and root hash. -// pub fn new( -// grove_db: &'db GroveDb, -// root_hash: Hash, -// tx: &'db Transaction<'db>, -// ) -> Result { -// Ok(Restorer { -// tx, -// current_merk_restorer: Some(MerkRestorer::new( -// Merk::open_base( -// grove_db -// .db -// .get_immediate_storage_context(SubtreePath::empty(), tx) -// .unwrap(), -// false, -// ) -// .unwrap() -// .map_err(|e| RestorerError(e.to_string()))?, -// None, -// root_hash, -// )), -// current_merk_chunk_index: 0, -// current_merk_path: vec![], -// queue: VecDeque::new(), -// grove_db, -// }) -// } -// -// /// Process next chunk and receive instruction on what to do next. -// pub fn process_chunk( -// &mut self, -// chunk_ops: impl IntoIterator, -// ) -> Result { -// if self.current_merk_restorer.is_none() { -// // Last restorer was consumed and no more Merks to process. -// return Ok(RestorerResponse::Ready); -// } -// // First we decode a chunk to take out info about nested trees to add them into -// // todo list. -// let mut ops = Vec::new(); -// for op in chunk_ops { -// ops.push(op); -// match ops.last().expect("just inserted") { -// Op::Push(Node::KVValueHashFeatureType( -// key, -// value_bytes, -// value_hash, -// feature_type, -// )) -// | Op::PushInverted(Node::KVValueHashFeatureType( -// key, -// value_bytes, -// value_hash, -// feature_type, -// )) => { -// if let Element::Tree(root_key, _) | Element::SumTree(root_key, ..) = -// Element::deserialize(value_bytes) -// .map_err(|e| RestorerError(e.to_string()))? -// { -// if root_key.is_none() || self.current_merk_path.last() == Some(key) { -// // We add only subtrees of the current subtree to queue, skipping -// // itself; Also skipping empty Merks. -// continue; -// } -// let mut path = self.current_merk_path.clone(); -// path.push(key.clone()); -// // The value hash is the root tree hash -// self.queue.push_back(( -// path, -// value_bytes.to_owned(), -// *value_hash, -// *feature_type, -// )); -// } -// } -// _ => {} -// } -// } -// -// // Process chunk using Merk's possibilities. -// let remaining = self -// .current_merk_restorer -// .as_mut() -// .expect("restorer exists at this point") -// .process_chunk(ops) -// .map_err(|e| RestorerError(e.to_string()))?; -// -// self.current_merk_chunk_index += 1; -// -// if remaining == 0 { -// // If no more chunks for this Merk required decide if we're done or take a next -// // Merk to process. -// self.current_merk_restorer -// .take() -// .expect("restorer exists at this point") -// .finalize() -// .map_err(|e| RestorerError(e.to_string()))?; -// if let Some((next_path, combining_value, expected_hash, _)) = self.queue.pop_front() { -// // Process next subtree. -// let merk = self -// .grove_db -// .open_merk_for_replication(next_path.as_slice().into(), self.tx) -// .map_err(|e| RestorerError(e.to_string()))?; -// self.current_merk_restorer = Some(MerkRestorer::new( -// merk, -// Some(combining_value), -// expected_hash, -// )); -// self.current_merk_chunk_index = 0; -// self.current_merk_path = next_path; -// -// Ok(RestorerResponse::AwaitNextChunk { -// path: self.current_merk_path.clone(), -// index: self.current_merk_chunk_index, -// }) -// } else { -// Ok(RestorerResponse::Ready) -// } -// } else { -// // Request a chunk at the same path but with incremented index. -// Ok(RestorerResponse::AwaitNextChunk { -// path: self.current_merk_path.clone(), -// index: self.current_merk_chunk_index, -// }) -// } -// } -// } -// -// /// Chunk producer wrapper which uses bigger messages that may include chunks of -// /// requested subtree with its right siblings. -// /// -// /// Because `Restorer` builds GroveDb replica breadth-first way from top to -// /// bottom it makes sense to send a subtree's siblings next instead of its own -// /// subtrees. -// pub struct SiblingsChunkProducer<'db> { -// chunk_producer: SubtreeChunkProducer<'db>, -// } -// -// #[derive(Debug)] -// pub struct GroveChunk { -// subtree_chunks: Vec<(usize, Vec)>, -// } -// -// impl<'db> SiblingsChunkProducer<'db> { -// /// New -// pub fn new(chunk_producer: SubtreeChunkProducer<'db>) -> Self { -// SiblingsChunkProducer { chunk_producer } -// } -// -// /// Get a collection of chunks possibly from different Merks with the first -// /// one as requested. -// pub fn get_chunk<'p, P>(&mut self, path: P, index: usize) -> Result, Error> -// where -// P: IntoIterator, -//

::IntoIter: Clone + DoubleEndedIterator + ExactSizeIterator, -// { -// let path_iter = path.into_iter(); -// let mut result = Vec::new(); -// let mut ops_count = 0; -// -// if path_iter.len() == 0 { -// // We're at the root of GroveDb, no siblings here. -// self.process_subtree_chunks(&mut result, &mut ops_count, empty(), index)?; -// return Ok(result); -// }; -// -// // Get siblings on the right to send chunks of multiple Merks if it meets the -// // limit. -// -// let mut siblings_keys: VecDeque> = VecDeque::new(); -// -// let mut parent_path = path_iter; -// let requested_key = parent_path.next_back(); -// -// let parent_ctx = self -// .chunk_producer -// .grove_db -// .db -// .get_storage_context( -// parent_path.clone().collect::>().as_slice().into(), -// None, -// ) -// .unwrap(); -// let mut siblings_iter = Element::iterator(parent_ctx.raw_iter()).unwrap(); -// -// if let Some(key) = requested_key { -// siblings_iter.fast_forward(key)?; -// } -// -// while let Some(element) = siblings_iter.next_element().unwrap()? { -// if let (key, Element::Tree(..)) | (key, Element::SumTree(..)) = element { -// siblings_keys.push_back(key); -// } -// } -// -// let mut current_index = index; -// // Process each subtree -// while let Some(subtree_key) = siblings_keys.pop_front() { -// #[allow(clippy::map_identity)] -// let subtree_path = parent_path -// .clone() -// .map(|x| x) -// .chain(once(subtree_key.as_slice())); -// -// self.process_subtree_chunks(&mut result, &mut ops_count, subtree_path, current_index)?; -// // Going to a next sibling, should start from 0. -// -// if ops_count >= OPS_PER_CHUNK { -// break; -// } -// current_index = 0; -// } -// -// Ok(result) -// } -// -// /// Process one subtree's chunks -// fn process_subtree_chunks<'p, P>( -// &mut self, -// result: &mut Vec, -// ops_count: &mut usize, -// subtree_path: P, -// from_index: usize, -// ) -> Result<(), Error> -// where -// P: IntoIterator, -//

::IntoIter: Clone + DoubleEndedIterator, -// { -// let path_iter = subtree_path.into_iter(); -// -// let mut current_index = from_index; -// let mut subtree_chunks = Vec::new(); -// -// loop { -// let ops = self -// .chunk_producer -// .get_chunk(path_iter.clone(), current_index)?; -// -// *ops_count += ops.len(); -// subtree_chunks.push((current_index, ops)); -// current_index += 1; -// if current_index >= self.chunk_producer.chunks_in_current_producer() -// || *ops_count >= OPS_PER_CHUNK -// { -// break; -// } -// } -// -// result.push(GroveChunk { subtree_chunks }); -// -// Ok(()) -// } -// } -// -// /// `Restorer` wrapper that applies multiple chunks at once and eventually -// /// returns less requests. It is named by analogy with IO types that do less -// /// syscalls. -// pub struct BufferedRestorer<'db> { -// restorer: Restorer<'db>, -// } -// -// impl<'db> BufferedRestorer<'db> { -// /// New -// pub fn new(restorer: Restorer<'db>) -> Self { -// BufferedRestorer { restorer } -// } -// -// /// Process next chunk and receive instruction on what to do next. -// pub fn process_grove_chunks(&mut self, chunks: I) -> Result -// where -// I: IntoIterator + ExactSizeIterator, -// { -// let mut response = RestorerResponse::Ready; -// -// for c in chunks.into_iter() { -// for ops in c.subtree_chunks.into_iter().map(|x| x.1) { -// if !ops.is_empty() { -// response = self.restorer.process_chunk(ops)?; -// } -// } -// } -// -// Ok(response) -// } -// } -// -// // #[cfg(test)] -// // mod test { -// // use rand::RngCore; -// // use tempfile::TempDir; -// // -// // use super::*; -// // use crate::{ -// // batch::GroveDbOp, -// // reference_path::ReferencePathType, -// // tests::{common::EMPTY_PATH, make_test_grovedb, TempGroveDb, -// // ANOTHER_TEST_LEAF, TEST_LEAF}, }; -// // -// // fn replicate(original_db: &GroveDb) -> TempDir { -// // let replica_tempdir = TempDir::new().unwrap(); -// // -// // { -// // let replica_db = GroveDb::open(replica_tempdir.path()).unwrap(); -// // let mut chunk_producer = original_db.chunks(); -// // let tx = replica_db.start_transaction(); -// // -// // let mut restorer = Restorer::new( -// // &replica_db, -// // original_db.root_hash(None).unwrap().unwrap(), -// // &tx, -// // ) -// // .expect("cannot create restorer"); -// // -// // That means root tree chunk with index 0 -// // let mut next_chunk: (Vec>, usize) = (vec![], 0); -// // -// // loop { -// // let chunk = chunk_producer -// // .get_chunk(next_chunk.0.iter().map(|x| x.as_slice()), next_chunk.1) -// // .expect("cannot get next chunk"); -// // match restorer.process_chunk(chunk).expect("cannot process chunk") { -// // RestorerResponse::Ready => break, -// // RestorerResponse::AwaitNextChunk { path, index } => { -// // next_chunk = (path, index); -// // } -// // } -// // } -// // -// // replica_db.commit_transaction(tx).unwrap().unwrap(); -// // } -// // replica_tempdir -// // } -// // -// // fn replicate_bigger_messages(original_db: &GroveDb) -> TempDir { -// // let replica_tempdir = TempDir::new().unwrap(); -// // -// // { -// // let replica_grove_db = GroveDb::open(replica_tempdir.path()).unwrap(); -// // let mut chunk_producer = SiblingsChunkProducer::new(original_db.chunks()); -// // let tx = replica_grove_db.start_transaction(); -// // -// // let mut restorer = BufferedRestorer::new( -// // Restorer::new( -// // &replica_grove_db, -// // original_db.root_hash(None).unwrap().unwrap(), -// // &tx, -// // ) -// // .expect("cannot create restorer"), -// // ); -// // -// // That means root tree chunk with index 0 -// // let mut next_chunk: (Vec>, usize) = (vec![], 0); -// // -// // loop { -// // let chunks = chunk_producer -// // .get_chunk(next_chunk.0.iter().map(|x| x.as_slice()), next_chunk.1) -// // .expect("cannot get next chunk"); -// // match restorer -// // .process_grove_chunks(chunks.into_iter()) -// // .expect("cannot process chunk") -// // { -// // RestorerResponse::Ready => break, -// // RestorerResponse::AwaitNextChunk { path, index } => { -// // next_chunk = (path, index); -// // } -// // } -// // } -// // -// // replica_grove_db.commit_transaction(tx).unwrap().unwrap(); -// // } -// // -// // replica_tempdir -// // } -// // -// // fn test_replication_internal<'a, I, R, F>( -// // original_db: &TempGroveDb, -// // to_compare: I, -// // replicate_fn: F, -// // ) where -// // R: AsRef<[u8]> + 'a, -// // I: Iterator, -// // F: Fn(&GroveDb) -> TempDir, -// // { -// // let expected_root_hash = original_db.root_hash(None).unwrap().unwrap(); -// // -// // let replica_tempdir = replicate_fn(original_db); -// // -// // let replica = GroveDb::open(replica_tempdir.path()).unwrap(); -// // assert_eq!( -// // replica.root_hash(None).unwrap().unwrap(), -// // expected_root_hash -// // ); -// // -// // for full_path in to_compare { -// // let (key, path) = full_path.split_last().unwrap(); -// // assert_eq!( -// // original_db.get(path, key.as_ref(), None).unwrap().unwrap(), -// // replica.get(path, key.as_ref(), None).unwrap().unwrap() -// // ); -// // } -// // } -// // -// // fn test_replication<'a, I, R>(original_db: &TempGroveDb, to_compare: I) -// // where -// // R: AsRef<[u8]> + 'a, -// // I: Iterator + Clone, -// // { -// // test_replication_internal(original_db, to_compare.clone(), replicate); -// // test_replication_internal(original_db, to_compare, -// // replicate_bigger_messages); } -// // -// // #[test] -// // fn replicate_wrong_root_hash() { -// // let db = make_test_grovedb(); -// // let mut bad_hash = db.root_hash(None).unwrap().unwrap(); -// // bad_hash[0] = bad_hash[0].wrapping_add(1); -// // -// // let tmp_dir = TempDir::new().unwrap(); -// // let restored_db = GroveDb::open(tmp_dir.path()).unwrap(); -// // let tx = restored_db.start_transaction(); -// // let mut restorer = Restorer::new(&restored_db, bad_hash, &tx).unwrap(); -// // let mut chunks = db.chunks(); -// // assert!(restorer -// // .process_chunk(chunks.get_chunk([], 0).unwrap()) -// // .is_err()); -// // } -// // -// // #[test] -// // fn replicate_provide_wrong_tree() { -// // let db = make_test_grovedb(); -// // db.insert( -// // &[TEST_LEAF], -// // b"key1", -// // Element::new_item(b"ayya".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[ANOTHER_TEST_LEAF], -// // b"key1", -// // Element::new_item(b"ayyb".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // -// // let expected_hash = db.root_hash(None).unwrap().unwrap(); -// // -// // let tmp_dir = TempDir::new().unwrap(); -// // let restored_db = GroveDb::open(tmp_dir.path()).unwrap(); -// // let tx = restored_db.start_transaction(); -// // let mut restorer = Restorer::new(&restored_db, expected_hash, &tx).unwrap(); -// // let mut chunks = db.chunks(); -// // -// // let next_op = restorer -// // .process_chunk(chunks.get_chunk([], 0).unwrap()) -// // .unwrap(); -// // match next_op { -// // RestorerResponse::AwaitNextChunk { path, index } => { -// // Feed restorer a wrong Merk! -// // let chunk = if path == [TEST_LEAF] { -// // chunks.get_chunk([ANOTHER_TEST_LEAF], index).unwrap() -// // } else { -// // chunks.get_chunk([TEST_LEAF], index).unwrap() -// // }; -// // assert!(restorer.process_chunk(chunk).is_err()); -// // } -// // _ => {} -// // } -// // } -// // -// // #[test] -// // fn replicate_nested_grovedb() { -// // let db = make_test_grovedb(); -// // db.insert( -// // &[TEST_LEAF], -// // b"key1", -// // Element::new_item(b"ayya".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[TEST_LEAF], -// // b"key2", -// // Element::new_reference(ReferencePathType::SiblingReference(b"key1". -// // to_vec())), None, -// // None, -// // ) -// // .unwrap() -// // .expect("should insert reference"); -// // db.insert( -// // &[ANOTHER_TEST_LEAF], -// // b"key2", -// // Element::empty_tree(), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[ANOTHER_TEST_LEAF, b"key2"], -// // b"key3", -// // Element::empty_tree(), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[ANOTHER_TEST_LEAF, b"key2", b"key3"], -// // b"key4", -// // Element::new_item(b"ayyb".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // -// // let to_compare = [ -// // [TEST_LEAF].as_ref(), -// // [TEST_LEAF, b"key1"].as_ref(), -// // [TEST_LEAF, b"key2"].as_ref(), -// // [ANOTHER_TEST_LEAF].as_ref(), -// // [ANOTHER_TEST_LEAF, b"key2"].as_ref(), -// // [ANOTHER_TEST_LEAF, b"key2", b"key3"].as_ref(), -// // [ANOTHER_TEST_LEAF, b"key2", b"key3", b"key4"].as_ref(), -// // ]; -// // test_replication(&db, to_compare.into_iter()); -// // } -// // -// // #[test] -// // fn replicate_nested_grovedb_with_sum_trees() { -// // let db = make_test_grovedb(); -// // db.insert( -// // &[TEST_LEAF], -// // b"key1", -// // Element::new_item(b"ayya".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[TEST_LEAF], -// // b"key2", -// // Element::new_reference(ReferencePathType::SiblingReference(b"key1". -// // to_vec())), None, -// // None, -// // ) -// // .unwrap() -// // .expect("should insert reference"); -// // db.insert( -// // &[ANOTHER_TEST_LEAF], -// // b"key2", -// // Element::empty_sum_tree(), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[ANOTHER_TEST_LEAF, b"key2"], -// // b"sumitem", -// // Element::new_sum_item(15), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[ANOTHER_TEST_LEAF, b"key2"], -// // b"key3", -// // Element::empty_tree(), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[ANOTHER_TEST_LEAF, b"key2", b"key3"], -// // b"key4", -// // Element::new_item(b"ayyb".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // -// // let to_compare = [ -// // [TEST_LEAF].as_ref(), -// // [TEST_LEAF, b"key1"].as_ref(), -// // [TEST_LEAF, b"key2"].as_ref(), -// // [ANOTHER_TEST_LEAF].as_ref(), -// // [ANOTHER_TEST_LEAF, b"key2"].as_ref(), -// // [ANOTHER_TEST_LEAF, b"key2", b"sumitem"].as_ref(), -// // [ANOTHER_TEST_LEAF, b"key2", b"key3"].as_ref(), -// // [ANOTHER_TEST_LEAF, b"key2", b"key3", b"key4"].as_ref(), -// // ]; -// // test_replication(&db, to_compare.into_iter()); -// // } -// // -// // TODO: Highlights a bug in replication -// // #[test] -// // fn replicate_grovedb_with_sum_tree() { -// // let db = make_test_grovedb(); -// // db.insert(&[TEST_LEAF], b"key1", Element::empty_tree(), None, None) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[TEST_LEAF, b"key1"], -// // b"key2", -// // Element::new_item(vec![4]), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[TEST_LEAF, b"key1"], -// // b"key3", -// // Element::new_item(vec![10]), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // -// // let to_compare = [ -// // [TEST_LEAF].as_ref(), -// // [ANOTHER_TEST_LEAF].as_ref(), -// // [TEST_LEAF, b"key1"].as_ref(), -// // [TEST_LEAF, b"key1", b"key2"].as_ref(), -// // [TEST_LEAF, b"key1", b"key3"].as_ref(), -// // ]; -// // test_replication(&db, to_compare.into_iter()); -// // } -// // -// // #[test] -// // fn replicate_a_big_one() { -// // const HEIGHT: usize = 3; -// // const SUBTREES_FOR_EACH: usize = 3; -// // const SCALARS_FOR_EACH: usize = 600; -// // -// // let db = make_test_grovedb(); -// // let mut to_compare = Vec::new(); -// // -// // let mut rng = rand::thread_rng(); -// // let mut subtrees: VecDeque> = VecDeque::new(); -// // -// // Generate root tree leafs -// // for _ in 0..SUBTREES_FOR_EACH { -// // let mut bytes = [0; 8]; -// // rng.fill_bytes(&mut bytes); -// // db.insert(EMPTY_PATH, &bytes, Element::empty_tree(), None, None) -// // .unwrap() -// // .unwrap(); -// // subtrees.push_front(vec![bytes]); -// // to_compare.push(vec![bytes]); -// // } -// // -// // while let Some(path) = subtrees.pop_front() { -// // let mut batch = Vec::new(); -// // -// // if path.len() < HEIGHT { -// // for _ in 0..SUBTREES_FOR_EACH { -// // let mut bytes = [0; 8]; -// // rng.fill_bytes(&mut bytes); -// // -// // batch.push(GroveDbOp::insert_op( -// // path.iter().map(|x| x.to_vec()).collect(), -// // bytes.to_vec(), -// // Element::empty_tree(), -// // )); -// // -// // let mut new_path = path.clone(); -// // new_path.push(bytes); -// // subtrees.push_front(new_path.clone()); -// // to_compare.push(new_path.clone()); -// // } -// // } -// // -// // for _ in 0..SCALARS_FOR_EACH { -// // let mut bytes = [0; 8]; -// // let mut bytes_val = vec![]; -// // rng.fill_bytes(&mut bytes); -// // rng.fill_bytes(&mut bytes_val); -// // -// // batch.push(GroveDbOp::insert_op( -// // path.iter().map(|x| x.to_vec()).collect(), -// // bytes.to_vec(), -// // Element::new_item(bytes_val), -// // )); -// // -// // let mut new_path = path.clone(); -// // new_path.push(bytes); -// // to_compare.push(new_path.clone()); -// // } -// // -// // db.apply_batch(batch, None, None).unwrap().unwrap(); -// // } -// // -// // test_replication(&db, to_compare.iter().map(|x| x.as_slice())); -// // } -// // -// // #[test] -// // fn replicate_from_checkpoint() { -// // Create a simple GroveDb first -// // let db = make_test_grovedb(); -// // db.insert( -// // &[TEST_LEAF], -// // b"key1", -// // Element::new_item(b"ayya".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .unwrap(); -// // db.insert( -// // &[ANOTHER_TEST_LEAF], -// // b"key2", -// // Element::new_item(b"ayyb".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .unwrap(); -// // -// // Save its state with checkpoint -// // let checkpoint_dir_parent = TempDir::new().unwrap(); -// // let checkpoint_dir = checkpoint_dir_parent.path().join("cp"); -// // db.create_checkpoint(&checkpoint_dir).unwrap(); -// // -// // Alter the db to make difference between current state and checkpoint -// // db.delete(&[TEST_LEAF], b"key1", None, None) -// // .unwrap() -// // .unwrap(); -// // db.insert( -// // &[TEST_LEAF], -// // b"key3", -// // Element::new_item(b"ayyd".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .unwrap(); -// // db.insert( -// // &[ANOTHER_TEST_LEAF], -// // b"key2", -// // Element::new_item(b"ayyc".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .unwrap(); -// // -// // let checkpoint_db = GroveDb::open(&checkpoint_dir).unwrap(); -// // -// // Ensure checkpoint differs from current state -// // assert_ne!( -// // checkpoint_db -// // .get(&[ANOTHER_TEST_LEAF], b"key2", None) -// // .unwrap() -// // .unwrap(), -// // db.get(&[ANOTHER_TEST_LEAF], b"key2", None) -// // .unwrap() -// // .unwrap(), -// // ); -// // -// // Build a replica from checkpoint -// // let replica_dir = replicate(&checkpoint_db); -// // let replica_db = GroveDb::open(&replica_dir).unwrap(); -// // -// // assert_eq!( -// // checkpoint_db.root_hash(None).unwrap().unwrap(), -// // replica_db.root_hash(None).unwrap().unwrap() -// // ); -// // -// // assert_eq!( -// // checkpoint_db -// // .get(&[TEST_LEAF], b"key1", None) -// // .unwrap() -// // .unwrap(), -// // replica_db -// // .get(&[TEST_LEAF], b"key1", None) -// // .unwrap() -// // .unwrap(), -// // ); -// // assert_eq!( -// // checkpoint_db -// // .get(&[ANOTHER_TEST_LEAF], b"key2", None) -// // .unwrap() -// // .unwrap(), -// // replica_db -// // .get(&[ANOTHER_TEST_LEAF], b"key2", None) -// // .unwrap() -// // .unwrap(), -// // ); -// // assert!(matches!( -// // replica_db.get(&[TEST_LEAF], b"key3", None).unwrap(), -// // Err(Error::PathKeyNotFound(_)) -// // )); -// // -// // Drop original db and checkpoint dir too to ensure there is no dependency -// // drop(db); -// // drop(checkpoint_db); -// // drop(checkpoint_dir); -// // -// // assert_eq!( -// // replica_db -// // .get(&[ANOTHER_TEST_LEAF], b"key2", None) -// // .unwrap() -// // .unwrap(), -// // Element::new_item(b"ayyb".to_vec()) -// // ); -// // } -// // } diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index 56308a64..4ddf64a2 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -621,15 +621,6 @@ where }) } - // TODO: remove this - // /// Returns a clone of the Tree instance in Merk - // pub fn get_root_tree(&self) -> Option { - // self.use_tree(|tree| match tree { - // None => None, - // Some(tree) => Some(tree.clone()), - // }) - // } - /// Returns the root non-prefixed key of the tree. If the tree is empty, /// None. pub fn root_key(&self) -> Option> { From cf7963f5b03ff27df54e0d5deb15ef952d6d9668 Mon Sep 17 00:00:00 2001 From: Wisdom Ogwu Date: Tue, 3 Oct 2023 08:25:18 +0100 Subject: [PATCH 4/7] wip --- grovedb/src/lib.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 0c3c7413..6ca3bf80 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -159,8 +159,6 @@ mod query; pub mod query_result_type; #[cfg(any(feature = "full", feature = "verify"))] pub mod reference_path; -#[cfg(feature = "full")] -mod replication; #[cfg(all(test, feature = "full"))] mod tests; #[cfg(feature = "full")] From 467e1215c6f23c17f58d9aad5f442d25bf50dbdc Mon Sep 17 00:00:00 2001 From: Wisdom Ogwu Date: Tue, 3 Oct 2023 08:27:26 +0100 Subject: [PATCH 5/7] wip --- merk/src/lib.rs | 2 +- merk/src/merk/{chunks.rs => chunks2.rs} | 2 +- merk/src/merk/mod.rs | 6 +++--- merk/src/merk/{restore.rs => restore2.rs} | 6 +++--- merk/src/proofs/chunk.rs | 2 +- merk/src/proofs/chunk/{chunk.rs => chunk2.rs} | 2 +- merk/src/proofs/chunk/chunk_op.rs | 2 +- merk/src/proofs/chunk/util.rs | 4 ++-- merk/src/proofs/tree.rs | 2 +- 9 files changed, 14 insertions(+), 14 deletions(-) rename merk/src/merk/{chunks.rs => chunks2.rs} (99%) rename merk/src/merk/{restore.rs => restore2.rs} (99%) rename merk/src/proofs/chunk/{chunk.rs => chunk2.rs} (99%) diff --git a/merk/src/lib.rs b/merk/src/lib.rs index 5a858dfc..5b82876f 100644 --- a/merk/src/lib.rs +++ b/merk/src/lib.rs @@ -38,7 +38,7 @@ extern crate core; mod merk; #[cfg(feature = "full")] -pub use crate::merk::{chunks::ChunkProducer, options::MerkOptions}; +pub use crate::merk::{chunks2::ChunkProducer, options::MerkOptions}; /// Provides a container type that allows temporarily taking ownership of a /// value. diff --git a/merk/src/merk/chunks.rs b/merk/src/merk/chunks2.rs similarity index 99% rename from merk/src/merk/chunks.rs rename to merk/src/merk/chunks2.rs index 51521ced..84d01e3e 100644 --- a/merk/src/merk/chunks.rs +++ b/merk/src/merk/chunks2.rs @@ -432,7 +432,7 @@ mod test { use super::*; use crate::{ proofs::{ - chunk::chunk::{ + chunk::chunk2::{ tests::{traverse_get_kv_feature_type, traverse_get_node_hash}, LEFT, RIGHT, }, diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index 4ddf64a2..a92bc2a8 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -28,10 +28,10 @@ //! Merk -pub mod chunks; +pub mod chunks2; pub(crate) mod defaults; pub mod options; -pub mod restore; +pub mod restore2; use std::{ cell::Cell, @@ -59,7 +59,7 @@ use crate::{ }, proofs::{ chunk::{ - chunk::{LEFT, RIGHT}, + chunk2::{LEFT, RIGHT}, util::traversal_instruction_as_string, }, encode_into, diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore2.rs similarity index 99% rename from merk/src/merk/restore.rs rename to merk/src/merk/restore2.rs index 0b866cac..d82d3e40 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore2.rs @@ -38,7 +38,7 @@ use crate::{ merk::MerkSource, proofs::{ chunk::{ - chunk::{LEFT, RIGHT}, + chunk2::{LEFT, RIGHT}, chunk_op::ChunkOp, error::{ChunkError, ChunkError::InternalError}, util::{ @@ -475,10 +475,10 @@ mod tests { use super::*; use crate::{ execute_proof, - merk::chunks::ChunkProducer, + merk::chunks2::ChunkProducer, proofs::{ chunk::{ - chunk::tests::traverse_get_node_hash, chunk_op::ChunkOp::Chunk, + chunk2::tests::traverse_get_node_hash, chunk_op::ChunkOp::Chunk, error::ChunkError::InvalidChunkProof, }, Query, diff --git a/merk/src/proofs/chunk.rs b/merk/src/proofs/chunk.rs index 28114876..3768559d 100644 --- a/merk/src/proofs/chunk.rs +++ b/merk/src/proofs/chunk.rs @@ -53,7 +53,7 @@ use crate::{ mod binary_range; #[cfg(feature = "full")] -pub mod chunk; +pub mod chunk2; pub mod chunk_op; pub mod error; #[cfg(feature = "full")] diff --git a/merk/src/proofs/chunk/chunk.rs b/merk/src/proofs/chunk/chunk2.rs similarity index 99% rename from merk/src/proofs/chunk/chunk.rs rename to merk/src/proofs/chunk/chunk2.rs index 95c686b7..4b8a0548 100644 --- a/merk/src/proofs/chunk/chunk.rs +++ b/merk/src/proofs/chunk/chunk2.rs @@ -198,7 +198,7 @@ pub mod tests { use crate::{ proofs::{ - chunk::chunk::{verify_height_proof, LEFT, RIGHT}, + chunk::chunk2::{verify_height_proof, LEFT, RIGHT}, tree::execute, Node, Op, Op::Parent, diff --git a/merk/src/proofs/chunk/chunk_op.rs b/merk/src/proofs/chunk/chunk_op.rs index 9402d3d5..535af055 100644 --- a/merk/src/proofs/chunk/chunk_op.rs +++ b/merk/src/proofs/chunk/chunk_op.rs @@ -126,7 +126,7 @@ mod test { use crate::proofs::{ chunk::{ - chunk::{LEFT, RIGHT}, + chunk2::{LEFT, RIGHT}, chunk_op::ChunkOp, }, Node, Op, diff --git a/merk/src/proofs/chunk/util.rs b/merk/src/proofs/chunk/util.rs index 3e430acf..2ba21ee0 100644 --- a/merk/src/proofs/chunk/util.rs +++ b/merk/src/proofs/chunk/util.rs @@ -35,7 +35,7 @@ use std::io::Write; use crate::{proofs::chunk::binary_range::BinaryRange, Error}; use crate::{ proofs::chunk::{ - chunk::{LEFT, RIGHT}, + chunk2::{LEFT, RIGHT}, error::{ChunkError, ChunkError::BadTraversalInstruction}, }, Error::InternalError, @@ -372,7 +372,7 @@ mod test { use byteorder::LE; use super::*; - use crate::proofs::chunk::chunk::{LEFT, RIGHT}; + use crate::proofs::chunk::chunk2::{LEFT, RIGHT}; #[test] fn test_chunk_height_per_layer() { diff --git a/merk/src/proofs/tree.rs b/merk/src/proofs/tree.rs index 99e84827..528288d5 100644 --- a/merk/src/proofs/tree.rs +++ b/merk/src/proofs/tree.rs @@ -44,7 +44,7 @@ use crate::tree::{combine_hash, kv_digest_to_kv_hash, kv_hash, node_hash, value_ #[cfg(any(feature = "full", feature = "verify"))] use crate::{error::Error, tree::CryptoHash}; use crate::{ - proofs::chunk::chunk::{LEFT, RIGHT}, + proofs::chunk::chunk2::{LEFT, RIGHT}, Link, TreeFeatureType::SummedMerk, }; From b59e63aad69c8cda445af55da4014d2d06845122 Mon Sep 17 00:00:00 2001 From: Wisdom Ogwu Date: Tue, 3 Oct 2023 09:27:57 +0100 Subject: [PATCH 6/7] rename job --- merk/src/lib.rs | 2 +- merk/src/merk/{chunks2.rs => chunks.rs} | 2 +- merk/src/merk/mod.rs | 5 +- merk/src/merk/restore.rs | 1248 ++++++++++++++++ merk/src/merk/restore2.rs | 1249 ----------------- merk/src/proofs/chunk.rs | 2 +- merk/src/proofs/chunk/{chunk2.rs => chunk.rs} | 2 +- merk/src/proofs/chunk/chunk_op.rs | 2 +- merk/src/proofs/chunk/util.rs | 4 +- merk/src/proofs/tree.rs | 2 +- 10 files changed, 1258 insertions(+), 1260 deletions(-) rename merk/src/merk/{chunks2.rs => chunks.rs} (99%) delete mode 100644 merk/src/merk/restore2.rs rename merk/src/proofs/chunk/{chunk2.rs => chunk.rs} (99%) diff --git a/merk/src/lib.rs b/merk/src/lib.rs index e7e8bc23..adfde559 100644 --- a/merk/src/lib.rs +++ b/merk/src/lib.rs @@ -38,7 +38,7 @@ extern crate core; mod merk; #[cfg(feature = "full")] -pub use crate::merk::{chunks2::ChunkProducer, options::MerkOptions}; +pub use crate::merk::{chunks::ChunkProducer, options::MerkOptions}; /// Provides a container type that allows temporarily taking ownership of a /// value. diff --git a/merk/src/merk/chunks2.rs b/merk/src/merk/chunks.rs similarity index 99% rename from merk/src/merk/chunks2.rs rename to merk/src/merk/chunks.rs index 84d01e3e..51521ced 100644 --- a/merk/src/merk/chunks2.rs +++ b/merk/src/merk/chunks.rs @@ -432,7 +432,7 @@ mod test { use super::*; use crate::{ proofs::{ - chunk::chunk2::{ + chunk::chunk::{ tests::{traverse_get_kv_feature_type, traverse_get_node_hash}, LEFT, RIGHT, }, diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index e8a2a073..52e28ba0 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -28,11 +28,10 @@ //! Merk -pub mod chunks2; +pub mod chunks; pub(crate) mod defaults; pub mod options; -pub mod restore2; pub mod apply; pub mod clear; @@ -67,7 +66,7 @@ use crate::{ }, proofs::{ chunk::{ - chunk2::{LEFT, RIGHT}, + chunk::{LEFT, RIGHT}, util::traversal_instruction_as_string, }, encode_into, diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index 8b137891..a4c747d1 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -1 +1,1249 @@ +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +//! Provides `Restorer`, which can create a replica of a Merk instance by +//! receiving chunk proofs. + +use std::collections::BTreeMap; + +use grovedb_storage::{Batch, StorageContext}; + +use crate::{ + merk, + merk::MerkSource, + proofs::{ + chunk::{ + chunk::{LEFT, RIGHT}, + chunk_op::ChunkOp, + error::{ChunkError, ChunkError::InternalError}, + util::{ + string_as_traversal_instruction, traversal_instruction_as_string, write_to_vec, + }, + }, + tree::{execute, Child, Tree as ProofTree}, + Node, Op, + }, + tree::{kv::ValueDefinedCostType, RefWalker, TreeNode}, + CryptoHash, Error, + Error::{CostsError, EdError, StorageError}, + Link, Merk, + TreeFeatureType::{BasicMerkNode, SummedMerkNode}, +}; + +/// Restorer handles verification of chunks and replication of Merk trees. +/// Chunks can be processed randomly as long as their parent has been processed +/// already. +pub struct Restorer { + merk: Merk, + chunk_id_to_root_hash: BTreeMap, + // this is used to keep track of parents whose links need to be rewritten + parent_keys: BTreeMap>, +} + +impl<'db, S: StorageContext<'db>> Restorer { + /// Initializes a new chunk restorer with the expected root hash for the + /// first chunk + pub fn new(merk: Merk, expected_root_hash: CryptoHash) -> Self { + let mut chunk_id_to_root_hash = BTreeMap::new(); + chunk_id_to_root_hash.insert(traversal_instruction_as_string(&vec![]), expected_root_hash); + + Self { + merk, + chunk_id_to_root_hash, + parent_keys: BTreeMap::new(), + } + } + + // TODO: consider converting chunk id to a vec + /// Processes a chunk at some chunk id, returns the chunks id's of chunks + /// that can be requested + pub fn process_chunk( + &mut self, + chunk_id: String, + chunk: Vec, + ) -> Result, Error> { + let expected_root_hash = self + .chunk_id_to_root_hash + .get(&chunk_id) + .ok_or(Error::ChunkRestoringError(ChunkError::UnexpectedChunk))?; + + let chunk_tree = Self::verify_chunk(chunk, expected_root_hash)?; + + let mut root_traversal_instruction = string_as_traversal_instruction(&chunk_id)?; + + if root_traversal_instruction.is_empty() { + self.merk.set_base_root_key(Some(chunk_tree.key().to_vec())); + } else { + // every non root chunk has some associated parent with an placeholder link + // here we update the placeholder link to represent the true data + self.rewrite_parent_link(&chunk_id, &root_traversal_instruction, &chunk_tree)?; + } + + // next up, we need to write the chunk and build the map again + let chunk_write_result = self.write_chunk(chunk_tree, &mut root_traversal_instruction); + if chunk_write_result.is_ok() { + // if we were able to successfully write the chunk, we can remove + // the chunk expected root hash from our chunk id map + self.chunk_id_to_root_hash.remove(&chunk_id); + } + + chunk_write_result + } + + /// Process multi chunks (space optimized chunk proofs that can contain + /// multiple singluar chunks) + pub fn process_multi_chunk(&mut self, multi_chunk: Vec) -> Result, Error> { + let mut expect_chunk_id = true; + let mut chunk_ids = vec![]; + let mut current_chunk_id: String = "".to_string(); + + for chunk_op in multi_chunk { + if (matches!(chunk_op, ChunkOp::ChunkId(..)) && !expect_chunk_id) + || (matches!(chunk_op, ChunkOp::Chunk(..)) && expect_chunk_id) + { + return Err(Error::ChunkRestoringError(ChunkError::InvalidMultiChunk( + "invalid multi chunk ordering", + ))); + } + match chunk_op { + ChunkOp::ChunkId(instructions) => { + current_chunk_id = traversal_instruction_as_string(&instructions); + } + ChunkOp::Chunk(chunk) => { + // TODO: remove clone + let next_chunk_ids = self.process_chunk(current_chunk_id.clone(), chunk)?; + chunk_ids.extend(next_chunk_ids); + } + } + expect_chunk_id = !expect_chunk_id; + } + Ok(chunk_ids) + } + + /// Verifies the structure of a chunk and ensures the chunk matches the + /// expected root hash + fn verify_chunk(chunk: Vec, expected_root_hash: &CryptoHash) -> Result { + let chunk_len = chunk.len(); + let mut kv_count = 0; + let mut hash_count = 0; + + // build tree from ops + // ensure only made of KvValueFeatureType and Hash nodes and count them + let tree = execute(chunk.clone().into_iter().map(Ok), false, |node| { + if matches!(node, Node::KVValueHashFeatureType(..)) { + kv_count += 1; + Ok(()) + } else if matches!(node, Node::Hash(..)) { + hash_count += 1; + Ok(()) + } else { + Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + } + }) + .unwrap()?; + + // chunk len must be exactly equal to the kv_count + hash_count + + // parent_branch_count + child_branch_count + debug_assert_eq!(chunk_len, ((kv_count + hash_count) * 2) - 1); + + // chunk structure verified, next verify root hash + if &tree.hash().unwrap() != expected_root_hash { + return Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + "chunk doesn't match expected root hash", + ))); + } + + Ok(tree) + } + + /// Write the verified chunk to storage + fn write_chunk( + &mut self, + chunk_tree: ProofTree, + traversal_instruction: &mut Vec, + ) -> Result, Error> { + // this contains all the elements we want to write to storage + let mut batch = self.merk.storage.new_batch(); + let mut new_chunk_ids = Vec::new(); + + chunk_tree.visit_refs_track_traversal_and_parent( + traversal_instruction, + None, + &mut |proof_node, node_traversal_instruction, parent_key| { + match &proof_node.node { + Node::KVValueHashFeatureType(key, value, value_hash, feature_type) => { + // build tree from node value + let mut tree = TreeNode::new_with_value_hash( + key.clone(), + value.clone(), + value_hash.clone(), + *feature_type, + ) + .unwrap(); + + // update tree links + *tree.slot_mut(LEFT) = proof_node.left.as_ref().map(Child::as_link); + *tree.slot_mut(RIGHT) = proof_node.right.as_ref().map(Child::as_link); + + // encode the node and add it to the batch + let bytes = tree.encode(); + + batch.put(key, &bytes, None, None).map_err(CostsError) + } + Node::Hash(hash) => { + // the node hash points to the root of another chunk + // we get the chunk id and add the hash to restorer state + let chunk_id = traversal_instruction_as_string(node_traversal_instruction); + new_chunk_ids.push(chunk_id.clone()); + self.chunk_id_to_root_hash + .insert(chunk_id.clone(), hash.clone()); + // TODO: handle unwrap + self.parent_keys + .insert(chunk_id, parent_key.unwrap().to_owned()); + Ok(()) + } + _ => { + // we do nothing for other node types + // technically verify chunk will be called before this + // as such this should be be reached + Ok(()) + } + } + }, + )?; + + // write the batch + self.merk + .storage + .commit_batch(batch) + .unwrap() + .map_err(StorageError)?; + + Ok(new_chunk_ids) + } + + /// When we process truncated chunks, the parents of Node::Hash have invalid + /// placeholder for links. + /// When we get the actual chunk associated with the Node::Hash, + /// we need to update the parent link to reflect the correct data. + fn rewrite_parent_link( + &mut self, + chunk_id: &str, + traversal_instruction: &[bool], + chunk_tree: &ProofTree, + ) -> Result<(), Error> { + let parent_key = self + .parent_keys + .get(chunk_id) + .ok_or(Error::ChunkRestoringError(InternalError( + "after successful chunk verification parent key should exist", + )))?; + + let mut parent = merk::fetch_node( + &self.merk.storage, + parent_key.as_slice(), + None::<&fn(&[u8]) -> Option>, + )? + .ok_or(Error::ChunkRestoringError(InternalError( + "cannot find expected parent in memory, most likely state corruption issue", + )))?; + + let is_left = traversal_instruction + .last() + .expect("rewrite is only called when traversal_instruction is not empty"); + + let updated_key = chunk_tree.key(); + let updated_sum = chunk_tree.sum(); + + if let Some(Link::Reference { key, sum, .. }) = parent.link_mut(is_left.clone()) { + *key = updated_key.to_vec(); + *sum = updated_sum; + } + + let parent_bytes = parent.encode(); + self.merk + .storage + .put(parent_key, &parent_bytes, None, None) + .unwrap() + .map_err(StorageError)?; + + self.parent_keys + .remove(chunk_id) + .expect("confirmed parent key exists above"); + + Ok(()) + } + + /// Each nodes height is not added to state as such the producer could lie + /// about the height values after replication we need to verify the + /// heights and if invalid recompute the correct values + fn rewrite_heights(&mut self) -> Result<(), Error> { + fn rewrite_child_heights<'s, 'db, S: StorageContext<'db>>( + mut walker: RefWalker>, + batch: &mut >::Batch, + ) -> Result<(u8, u8), Error> { + // TODO: remove unwrap + let mut cloned_node = TreeNode::decode( + walker.tree().key().to_vec(), + walker.tree().encode().as_slice(), + None::<&fn(&[u8]) -> Option>, + ) + .unwrap(); + + let mut left_height = 0; + let mut right_height = 0; + + if let Some(left_walker) = walker + .walk(LEFT, None::<&fn(&[u8]) -> Option>) + .unwrap()? + { + let left_child_heights = rewrite_child_heights(left_walker, batch)?; + left_height = left_child_heights.0.max(left_child_heights.1) + 1; + *cloned_node.link_mut(LEFT).unwrap().child_heights_mut() = left_child_heights; + } + + if let Some(right_walker) = walker + .walk(RIGHT, None::<&fn(&[u8]) -> Option>) + .unwrap()? + { + let right_child_heights = rewrite_child_heights(right_walker, batch)?; + right_height = right_child_heights.0.max(right_child_heights.1) + 1; + *cloned_node.link_mut(RIGHT).unwrap().child_heights_mut() = right_child_heights; + } + + let bytes = cloned_node.encode(); + batch + .put(walker.tree().key(), &bytes, None, None) + .map_err(CostsError)?; + + return Ok((left_height, right_height)); + } + + let mut batch = self.merk.storage.new_batch(); + // TODO: deal with unwrap + let mut tree = self.merk.tree.take().unwrap(); + let mut walker = RefWalker::new(&mut tree, self.merk.source()); + + rewrite_child_heights(walker, &mut batch)?; + + self.merk.tree.set(Some(tree)); + + self.merk + .storage + .commit_batch(batch) + .unwrap() + .map_err(StorageError) + } + + /// Rebuild restoration state from partial storage state + fn attempt_state_recovery(&mut self) -> Result<(), Error> { + // TODO: think about the return type some more + let (bad_link_map, parent_keys) = self.merk.verify(); + if !bad_link_map.is_empty() { + self.chunk_id_to_root_hash = bad_link_map; + self.parent_keys = parent_keys; + } + + Ok(()) + } + + /// Consumes the `Restorer` and returns a newly created, fully populated + /// Merk instance. This method will return an error if called before + /// processing all chunks. + pub fn finalize(mut self) -> Result, Error> { + // ensure all chunks have been processed + if self.chunk_id_to_root_hash.len() != 0 || self.parent_keys.len() != 0 { + return Err(Error::ChunkRestoringError( + ChunkError::RestorationNotComplete, + )); + } + + // get the latest version of the root node + self.merk + .load_base_root(None::<&fn(&[u8]) -> Option>); + + // if height values are wrong, rewrite height + if self.verify_height().is_err() { + self.rewrite_heights(); + // update the root node after height rewrite + self.merk + .load_base_root(None::<&fn(&[u8]) -> Option>); + } + + if self.merk.verify().0.len() != 0 { + return Err(Error::ChunkRestoringError(ChunkError::InternalError( + "restored tree invalid", + ))); + } + + Ok(self.merk) + } + + /// Verify that the child heights of the merk tree links correctly represent + /// the tree + fn verify_height(&self) -> Result<(), Error> { + let tree = self.merk.tree.take(); + let height_verification_result = if let Some(tree) = &tree { + self.verify_tree_height(&tree, tree.height()) + } else { + Ok(()) + }; + self.merk.tree.set(tree); + height_verification_result + } + + fn verify_tree_height(&self, tree: &TreeNode, parent_height: u8) -> Result<(), Error> { + let (left_height, right_height) = tree.child_heights(); + + if (left_height.abs_diff(right_height)) > 1 { + return Err(Error::CorruptedState( + "invalid child heights, difference greater than 1 for AVL tree", + )); + } + + let max_child_height = left_height.max(right_height); + if parent_height <= max_child_height || parent_height - max_child_height != 1 { + return Err(Error::CorruptedState( + "invalid child heights, parent height is not 1 less than max child height", + )); + } + + let left_link = tree.link(LEFT); + let right_link = tree.link(RIGHT); + + if (left_height == 0 && left_link.is_some()) || (right_height == 0 && right_link.is_some()) + { + return Err(Error::CorruptedState( + "invalid child heights node has child height 0, but hash child", + )); + } + + if let Some(link) = left_link { + let left_tree = link.tree(); + if left_tree.is_none() { + let left_tree = TreeNode::get( + &self.merk.storage, + link.key().to_vec(), + None::<&fn(&[u8]) -> Option>, + ) + .unwrap()? + .ok_or(Error::CorruptedState("link points to non-existent node"))?; + self.verify_tree_height(&left_tree, left_height)?; + } else { + self.verify_tree_height(left_tree.unwrap(), left_height)?; + } + } + + if let Some(link) = right_link { + let right_tree = link.tree(); + if right_tree.is_none() { + let right_tree = TreeNode::get( + &self.merk.storage, + link.key().to_vec(), + None::<&fn(&[u8]) -> Option>, + ) + .unwrap()? + .ok_or(Error::CorruptedState("link points to non-existent node"))?; + self.verify_tree_height(&right_tree, right_height)?; + } else { + self.verify_tree_height(right_tree.unwrap(), right_height)?; + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use grovedb_path::SubtreePath; + use grovedb_storage::{ + rocksdb_storage::{ + test_utils::TempStorage, PrefixedRocksDbImmediateStorageContext, + PrefixedRocksDbStorageContext, + }, + RawIterator, Storage, + }; + + use super::*; + use crate::{ + execute_proof, + merk::chunks::ChunkProducer, + proofs::{ + chunk::{ + chunk::tests::traverse_get_node_hash, chunk_op::ChunkOp::Chunk, + error::ChunkError::InvalidChunkProof, + }, + Query, + }, + test_utils::{make_batch_seq, TempMerk}, + Error::ChunkRestoringError, + KVIterator, Merk, PanicSource, + }; + + #[test] + fn test_chunk_verification_non_avl_tree() { + let non_avl_tree_proof = vec![ + Op::Push(Node::KV(vec![1], vec![1])), + Op::Push(Node::KV(vec![2], vec![2])), + Op::Parent, + Op::Push(Node::KV(vec![3], vec![3])), + Op::Parent, + ]; + assert!(Restorer::::verify_chunk( + non_avl_tree_proof, + &[0; 32] + ) + .is_err()); + } + + #[test] + fn test_chunk_verification_only_kv_feature_and_hash() { + // should not accept kv + let invalid_chunk_proof = vec![Op::Push(Node::KV(vec![1], vec![1]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvhash + let invalid_chunk_proof = vec![Op::Push(Node::KVHash([0; 32]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvdigest + let invalid_chunk_proof = vec![Op::Push(Node::KVDigest(vec![0], [0; 32]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvvaluehash + let invalid_chunk_proof = vec![Op::Push(Node::KVValueHash(vec![0], vec![0], [0; 32]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvrefvaluehash + let invalid_chunk_proof = vec![Op::Push(Node::KVRefValueHash(vec![0], vec![0], [0; 32]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + } + + fn get_node_hash(node: Node) -> Result { + match node { + Node::Hash(hash) => Ok(hash), + _ => Err("expected node hash".to_string()), + } + } + + #[test] + fn test_process_chunk_correct_chunk_id_map() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut merk_tree = merk.tree.take().expect("should have inner tree"); + merk.tree.set(Some(merk_tree.clone())); + let mut tree_walker = RefWalker::new(&mut merk_tree, PanicSource {}); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + // initial restorer state should contain just the root hash of the source merk + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // generate first chunk + let (chunk, _) = chunk_producer.chunk_with_index(1).unwrap(); + // apply first chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![]), chunk) + .expect("should process chunk successfully"); + assert_eq!(new_chunk_ids.len(), 4); + + // after first chunk application + // the chunk_map should contain 4 items + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + // assert all the chunk hash values + assert_eq!( + restorer.chunk_id_to_root_hash.get("11"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[LEFT, LEFT])).unwrap()) + .as_ref() + ); + assert_eq!( + restorer.chunk_id_to_root_hash.get("10"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])).unwrap()) + .as_ref() + ); + assert_eq!( + restorer.chunk_id_to_root_hash.get("01"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT])).unwrap()) + .as_ref() + ); + assert_eq!( + restorer.chunk_id_to_root_hash.get("00"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[RIGHT, RIGHT])).unwrap()) + .as_ref() + ); + + // generate second chunk + let (chunk, _) = chunk_producer.chunk_with_index(2).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![LEFT, LEFT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 3); + assert_eq!(restorer.chunk_id_to_root_hash.get("11"), None); + + // let's try to apply the second chunk again, should not work + let (chunk, _) = chunk_producer.chunk_with_index(2).unwrap(); + // apply second chunk + let chunk_process_result = + restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, LEFT]), chunk); + assert_eq!(chunk_process_result.is_err(), true); + assert!(matches!( + chunk_process_result, + Err(Error::ChunkRestoringError(ChunkError::UnexpectedChunk)) + )); + + // next let's get a random but expected chunk and work with that e.g. chunk 4 + // but let's apply it to the wrong place + let (chunk, _) = chunk_producer.chunk_with_index(4).unwrap(); + let chunk_process_result = + restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, RIGHT]), chunk); + assert_eq!(chunk_process_result.is_err(), true); + assert!(matches!( + chunk_process_result, + Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + .. + ))) + )); + + // correctly apply chunk 5 + let (chunk, _) = chunk_producer.chunk_with_index(5).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![RIGHT, RIGHT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 2); + assert_eq!(restorer.chunk_id_to_root_hash.get("00"), None); + + // correctly apply chunk 3 + let (chunk, _) = chunk_producer.chunk_with_index(3).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![LEFT, RIGHT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!(restorer.chunk_id_to_root_hash.get("10"), None); + + // correctly apply chunk 4 + let (chunk, _) = chunk_producer.chunk_with_index(4).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![RIGHT, LEFT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.get("01"), None); + + // finalize merk + let mut restored_merk = restorer.finalize().expect("should finalized successfully"); + + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + fn assert_raw_db_entries_eq( + restored: &Merk, + original: &Merk, + length: usize, + ) { + assert_eq!(restored.root_hash().unwrap(), original.root_hash().unwrap()); + + let mut original_entries = original.storage.raw_iter(); + let mut restored_entries = restored.storage.raw_iter(); + original_entries.seek_to_first().unwrap(); + restored_entries.seek_to_first().unwrap(); + + let mut i = 0; + loop { + assert_eq!( + restored_entries.valid().unwrap(), + original_entries.valid().unwrap() + ); + if !restored_entries.valid().unwrap() { + break; + } + + assert_eq!(restored_entries.key(), original_entries.key()); + assert_eq!(restored_entries.value(), original_entries.value()); + + restored_entries.next().unwrap(); + original_entries.next().unwrap(); + + i += 1; + } + + assert_eq!(i, length); + } + + // Builds a source merk with batch_size number of elements + // attempts restoration on some empty merk + // verifies that restoration was performed correctly. + fn test_restoration_single_chunk_strategy(batch_size: u64) { + // build the source merk + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut source_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + let batch = make_batch_seq(0..batch_size); + source_merk + .apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + + // build the restoration merk + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // at the start + // restoration merk should have empty root hash + // and source merk should have a different root hash + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + assert_ne!( + source_merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + // instantiate chunk producer and restorer + let mut chunk_producer = + ChunkProducer::new(&source_merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap()); + + // perform chunk production and processing + let mut chunk_id_opt = Some("".to_string()); + while let Some(chunk_id) = chunk_id_opt { + let (chunk, next_chunk_id) = chunk_producer + .chunk(chunk_id.as_str()) + .expect("should get chunk"); + restorer + .process_chunk(chunk_id.to_string(), chunk) + .expect("should process chunk successfully"); + chunk_id_opt = next_chunk_id; + } + + // after chunk processing we should be able to finalize + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + let restored_merk = restorer.finalize().expect("should finalize"); + + // compare root hash values + assert_eq!( + source_merk.root_hash().unwrap(), + restored_merk.root_hash().unwrap() + ); + + assert_raw_db_entries_eq(&restored_merk, &source_merk, batch_size as usize); + } + + #[test] + fn restore_single_chunk_20() { + test_restoration_single_chunk_strategy(20); + } + + #[test] + fn restore_single_chunk_1000() { + test_restoration_single_chunk_strategy(1000); + } + + #[test] + fn test_process_multi_chunk_no_limit() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // generate multi chunk from root with no limit + let chunk = chunk_producer + .multi_chunk_with_limit("", None) + .expect("should generate multichunk"); + + assert_eq!(chunk.chunk.len(), 2); + assert_eq!(chunk.next_index, None); + assert_eq!(chunk.remaining_limit, None); + + let next_ids = restorer + .process_multi_chunk(chunk.chunk) + .expect("should process chunk"); + // should have replicated all chunks + assert_eq!(next_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + + let restored_merk = restorer.finalize().expect("should be able to finalize"); + + // compare root hash values + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + #[test] + fn test_process_multi_chunk_no_limit_but_non_root() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // first restore the first chunk + let (chunk, next_chunk_index) = chunk_producer.chunk_with_index(1).unwrap(); + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![]), chunk) + .expect("should process chunk"); + assert_eq!(new_chunk_ids.len(), 4); + assert_eq!(next_chunk_index, Some(2)); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // generate multi chunk from the 2nd chunk with no limit + let multi_chunk = chunk_producer + .multi_chunk_with_limit_and_index(next_chunk_index.unwrap(), None) + .unwrap(); + // tree of height 4 has 5 chunks + // we have restored the first leaving 4 chunks + // each chunk has an extra chunk id, since they are disjoint + // hence the size of the multi chunk should be 8 + assert_eq!(multi_chunk.chunk.len(), 8); + let new_chunk_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + + let restored_merk = restorer.finalize().expect("should be able to finalize"); + + // compare root hash values + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + #[test] + fn test_process_multi_chunk_with_limit() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + // build multi chunk with with limit of 325 + let multi_chunk = chunk_producer + .multi_chunk_with_limit("", Some(600)) + .unwrap(); + // should only contain the first chunk + assert_eq!(multi_chunk.chunk.len(), 2); + // should point to chunk 2 + assert_eq!(multi_chunk.next_index, Some("11".to_string())); + let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + assert_eq!(next_ids.len(), 4); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // subsequent chunks are of size 321 + // with limit just above 642 should get 2 chunks (2 and 3) + // disjoint, so multi chunk len should be 4 + let multi_chunk = chunk_producer + .multi_chunk_with_limit(multi_chunk.next_index.unwrap().as_str(), Some(645)) + .unwrap(); + assert_eq!(multi_chunk.chunk.len(), 4); + assert_eq!(multi_chunk.next_index, Some("01".to_string())); + let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + // chunks 2 and 3 are leaf chunks + assert_eq!(next_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 2); + assert_eq!(restorer.parent_keys.len(), 2); + + // get the last 2 chunks + let multi_chunk = chunk_producer + .multi_chunk_with_limit(multi_chunk.next_index.unwrap().as_str(), Some(645)) + .unwrap(); + assert_eq!(multi_chunk.chunk.len(), 4); + assert_eq!(multi_chunk.next_index, None); + let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + // chunks 2 and 3 are leaf chunks + assert_eq!(next_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + + // finalize merk + let restored_merk = restorer.finalize().unwrap(); + + // compare root hash values + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + // Builds a source merk with batch_size number of elements + // attempts restoration on some empty merk, with multi chunks + // verifies that restoration was performed correctly. + fn test_restoration_multi_chunk_strategy(batch_size: u64, limit: Option) { + // build the source merk + let mut source_merk = TempMerk::new(); + let batch = make_batch_seq(0..batch_size); + source_merk + .apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + + // build the restoration merk + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // at the start + // restoration merk should have empty root hash + // and source merk should have a different root hash + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + assert_ne!( + source_merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + // instantiate chunk producer and restorer + let mut chunk_producer = + ChunkProducer::new(&source_merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap()); + + // perform chunk production and processing + let mut chunk_id_opt = Some("".to_string()); + while let Some(chunk_id) = chunk_id_opt { + let multi_chunk = chunk_producer + .multi_chunk_with_limit(chunk_id.as_str(), limit.clone()) + .expect("should get chunk"); + restorer + .process_multi_chunk(multi_chunk.chunk) + .expect("should process chunk successfully"); + chunk_id_opt = multi_chunk.next_index; + } + + // after chunk processing we should be able to finalize + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + let restored_merk = restorer.finalize().expect("should finalize"); + + // compare root hash values + assert_eq!( + source_merk.root_hash().unwrap(), + restored_merk.root_hash().unwrap() + ); + } + + #[test] + fn restore_multi_chunk_20_no_limit() { + test_restoration_multi_chunk_strategy(20, None); + } + + #[test] + #[should_panic] + fn restore_multi_chunk_20_tiny_limit() { + test_restoration_multi_chunk_strategy(20, Some(1)); + } + + #[test] + fn restore_multi_chunk_20_limit() { + test_restoration_multi_chunk_strategy(20, Some(1200)); + } + + #[test] + fn restore_multi_chunk_10000_limit() { + test_restoration_multi_chunk_strategy(10000, Some(1200)); + } + + #[test] + fn test_restoration_interruption() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // first restore the first chunk + let (chunk, next_chunk_index) = chunk_producer.chunk_with_index(1).unwrap(); + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![]), chunk) + .expect("should process chunk"); + assert_eq!(new_chunk_ids.len(), 4); + assert_eq!(next_chunk_index, Some(2)); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // store old state for later reference + let old_chunk_id_to_root_hash = restorer.chunk_id_to_root_hash.clone(); + let old_parent_keys = restorer.parent_keys.clone(); + + // drop the restorer and the restoration merk + drop(restorer); + // open the restoration merk again and build a restorer from it + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + // assert the state of the restorer + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!(restorer.parent_keys.len(), 0); + + // recover state + let recovery_attempt = restorer.attempt_state_recovery(); + assert_eq!(recovery_attempt.is_ok(), true); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // assert equality to old state + assert_eq!(old_chunk_id_to_root_hash, restorer.chunk_id_to_root_hash); + assert_eq!(old_parent_keys, restorer.parent_keys); + } +} diff --git a/merk/src/merk/restore2.rs b/merk/src/merk/restore2.rs deleted file mode 100644 index b8dfe813..00000000 --- a/merk/src/merk/restore2.rs +++ /dev/null @@ -1,1249 +0,0 @@ -// MIT LICENSE -// -// Copyright (c) 2021 Dash Core Group -// -// Permission is hereby granted, free of charge, to any -// person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the -// Software without restriction, including without -// limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of -// the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice -// shall be included in all copies or substantial portions -// of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS IN THE SOFTWARE. - -//! Provides `Restorer`, which can create a replica of a Merk instance by -//! receiving chunk proofs. - -use std::collections::BTreeMap; - -use grovedb_storage::{Batch, StorageContext}; - -use crate::{ - merk, - merk::MerkSource, - proofs::{ - chunk::{ - chunk2::{LEFT, RIGHT}, - chunk_op::ChunkOp, - error::{ChunkError, ChunkError::InternalError}, - util::{ - string_as_traversal_instruction, traversal_instruction_as_string, write_to_vec, - }, - }, - tree::{execute, Child, Tree as ProofTree}, - Node, Op, - }, - tree::{kv::ValueDefinedCostType, RefWalker, TreeNode}, - CryptoHash, Error, - Error::{CostsError, EdError, StorageError}, - Link, Merk, - TreeFeatureType::{BasicMerkNode, SummedMerkNode}, -}; - -/// Restorer handles verification of chunks and replication of Merk trees. -/// Chunks can be processed randomly as long as their parent has been processed -/// already. -pub struct Restorer { - merk: Merk, - chunk_id_to_root_hash: BTreeMap, - // this is used to keep track of parents whose links need to be rewritten - parent_keys: BTreeMap>, -} - -impl<'db, S: StorageContext<'db>> Restorer { - /// Initializes a new chunk restorer with the expected root hash for the - /// first chunk - pub fn new(merk: Merk, expected_root_hash: CryptoHash) -> Self { - let mut chunk_id_to_root_hash = BTreeMap::new(); - chunk_id_to_root_hash.insert(traversal_instruction_as_string(&vec![]), expected_root_hash); - - Self { - merk, - chunk_id_to_root_hash, - parent_keys: BTreeMap::new(), - } - } - - // TODO: consider converting chunk id to a vec - /// Processes a chunk at some chunk id, returns the chunks id's of chunks - /// that can be requested - pub fn process_chunk( - &mut self, - chunk_id: String, - chunk: Vec, - ) -> Result, Error> { - let expected_root_hash = self - .chunk_id_to_root_hash - .get(&chunk_id) - .ok_or(Error::ChunkRestoringError(ChunkError::UnexpectedChunk))?; - - let chunk_tree = Self::verify_chunk(chunk, expected_root_hash)?; - - let mut root_traversal_instruction = string_as_traversal_instruction(&chunk_id)?; - - if root_traversal_instruction.is_empty() { - self.merk.set_base_root_key(Some(chunk_tree.key().to_vec())); - } else { - // every non root chunk has some associated parent with an placeholder link - // here we update the placeholder link to represent the true data - self.rewrite_parent_link(&chunk_id, &root_traversal_instruction, &chunk_tree)?; - } - - // next up, we need to write the chunk and build the map again - let chunk_write_result = self.write_chunk(chunk_tree, &mut root_traversal_instruction); - if chunk_write_result.is_ok() { - // if we were able to successfully write the chunk, we can remove - // the chunk expected root hash from our chunk id map - self.chunk_id_to_root_hash.remove(&chunk_id); - } - - chunk_write_result - } - - /// Process multi chunks (space optimized chunk proofs that can contain - /// multiple singluar chunks) - pub fn process_multi_chunk(&mut self, multi_chunk: Vec) -> Result, Error> { - let mut expect_chunk_id = true; - let mut chunk_ids = vec![]; - let mut current_chunk_id: String = "".to_string(); - - for chunk_op in multi_chunk { - if (matches!(chunk_op, ChunkOp::ChunkId(..)) && !expect_chunk_id) - || (matches!(chunk_op, ChunkOp::Chunk(..)) && expect_chunk_id) - { - return Err(Error::ChunkRestoringError(ChunkError::InvalidMultiChunk( - "invalid multi chunk ordering", - ))); - } - match chunk_op { - ChunkOp::ChunkId(instructions) => { - current_chunk_id = traversal_instruction_as_string(&instructions); - } - ChunkOp::Chunk(chunk) => { - // TODO: remove clone - let next_chunk_ids = self.process_chunk(current_chunk_id.clone(), chunk)?; - chunk_ids.extend(next_chunk_ids); - } - } - expect_chunk_id = !expect_chunk_id; - } - Ok(chunk_ids) - } - - /// Verifies the structure of a chunk and ensures the chunk matches the - /// expected root hash - fn verify_chunk(chunk: Vec, expected_root_hash: &CryptoHash) -> Result { - let chunk_len = chunk.len(); - let mut kv_count = 0; - let mut hash_count = 0; - - // build tree from ops - // ensure only made of KvValueFeatureType and Hash nodes and count them - let tree = execute(chunk.clone().into_iter().map(Ok), false, |node| { - if matches!(node, Node::KVValueHashFeatureType(..)) { - kv_count += 1; - Ok(()) - } else if matches!(node, Node::Hash(..)) { - hash_count += 1; - Ok(()) - } else { - Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( - "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", - ))) - } - }) - .unwrap()?; - - // chunk len must be exactly equal to the kv_count + hash_count + - // parent_branch_count + child_branch_count - debug_assert_eq!(chunk_len, ((kv_count + hash_count) * 2) - 1); - - // chunk structure verified, next verify root hash - if &tree.hash().unwrap() != expected_root_hash { - return Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( - "chunk doesn't match expected root hash", - ))); - } - - Ok(tree) - } - - /// Write the verified chunk to storage - fn write_chunk( - &mut self, - chunk_tree: ProofTree, - traversal_instruction: &mut Vec, - ) -> Result, Error> { - // this contains all the elements we want to write to storage - let mut batch = self.merk.storage.new_batch(); - let mut new_chunk_ids = Vec::new(); - - chunk_tree.visit_refs_track_traversal_and_parent( - traversal_instruction, - None, - &mut |proof_node, node_traversal_instruction, parent_key| { - match &proof_node.node { - Node::KVValueHashFeatureType(key, value, value_hash, feature_type) => { - // build tree from node value - let mut tree = TreeNode::new_with_value_hash( - key.clone(), - value.clone(), - value_hash.clone(), - *feature_type, - ) - .unwrap(); - - // update tree links - *tree.slot_mut(LEFT) = proof_node.left.as_ref().map(Child::as_link); - *tree.slot_mut(RIGHT) = proof_node.right.as_ref().map(Child::as_link); - - // encode the node and add it to the batch - let bytes = tree.encode(); - - batch.put(key, &bytes, None, None).map_err(CostsError) - } - Node::Hash(hash) => { - // the node hash points to the root of another chunk - // we get the chunk id and add the hash to restorer state - let chunk_id = traversal_instruction_as_string(node_traversal_instruction); - new_chunk_ids.push(chunk_id.clone()); - self.chunk_id_to_root_hash - .insert(chunk_id.clone(), hash.clone()); - // TODO: handle unwrap - self.parent_keys - .insert(chunk_id, parent_key.unwrap().to_owned()); - Ok(()) - } - _ => { - // we do nothing for other node types - // technically verify chunk will be called before this - // as such this should be be reached - Ok(()) - } - } - }, - )?; - - // write the batch - self.merk - .storage - .commit_batch(batch) - .unwrap() - .map_err(StorageError)?; - - Ok(new_chunk_ids) - } - - /// When we process truncated chunks, the parents of Node::Hash have invalid - /// placeholder for links. - /// When we get the actual chunk associated with the Node::Hash, - /// we need to update the parent link to reflect the correct data. - fn rewrite_parent_link( - &mut self, - chunk_id: &str, - traversal_instruction: &[bool], - chunk_tree: &ProofTree, - ) -> Result<(), Error> { - let parent_key = self - .parent_keys - .get(chunk_id) - .ok_or(Error::ChunkRestoringError(InternalError( - "after successful chunk verification parent key should exist", - )))?; - - let mut parent = merk::fetch_node( - &self.merk.storage, - parent_key.as_slice(), - None::<&fn(&[u8]) -> Option>, - )? - .ok_or(Error::ChunkRestoringError(InternalError( - "cannot find expected parent in memory, most likely state corruption issue", - )))?; - - let is_left = traversal_instruction - .last() - .expect("rewrite is only called when traversal_instruction is not empty"); - - let updated_key = chunk_tree.key(); - let updated_sum = chunk_tree.sum(); - - if let Some(Link::Reference { key, sum, .. }) = parent.link_mut(is_left.clone()) { - *key = updated_key.to_vec(); - *sum = updated_sum; - } - - let parent_bytes = parent.encode(); - self.merk - .storage - .put(parent_key, &parent_bytes, None, None) - .unwrap() - .map_err(StorageError)?; - - self.parent_keys - .remove(chunk_id) - .expect("confirmed parent key exists above"); - - Ok(()) - } - - /// Each nodes height is not added to state as such the producer could lie - /// about the height values after replication we need to verify the - /// heights and if invalid recompute the correct values - fn rewrite_heights(&mut self) -> Result<(), Error> { - fn rewrite_child_heights<'s, 'db, S: StorageContext<'db>>( - mut walker: RefWalker>, - batch: &mut >::Batch, - ) -> Result<(u8, u8), Error> { - // TODO: remove unwrap - let mut cloned_node = TreeNode::decode( - walker.tree().key().to_vec(), - walker.tree().encode().as_slice(), - None::<&fn(&[u8]) -> Option>, - ) - .unwrap(); - - let mut left_height = 0; - let mut right_height = 0; - - if let Some(left_walker) = walker - .walk(LEFT, None::<&fn(&[u8]) -> Option>) - .unwrap()? - { - let left_child_heights = rewrite_child_heights(left_walker, batch)?; - left_height = left_child_heights.0.max(left_child_heights.1) + 1; - *cloned_node.link_mut(LEFT).unwrap().child_heights_mut() = left_child_heights; - } - - if let Some(right_walker) = walker - .walk(RIGHT, None::<&fn(&[u8]) -> Option>) - .unwrap()? - { - let right_child_heights = rewrite_child_heights(right_walker, batch)?; - right_height = right_child_heights.0.max(right_child_heights.1) + 1; - *cloned_node.link_mut(RIGHT).unwrap().child_heights_mut() = right_child_heights; - } - - let bytes = cloned_node.encode(); - batch - .put(walker.tree().key(), &bytes, None, None) - .map_err(CostsError)?; - - return Ok((left_height, right_height)); - } - - let mut batch = self.merk.storage.new_batch(); - // TODO: deal with unwrap - let mut tree = self.merk.tree.take().unwrap(); - let mut walker = RefWalker::new(&mut tree, self.merk.source()); - - rewrite_child_heights(walker, &mut batch)?; - - self.merk.tree.set(Some(tree)); - - self.merk - .storage - .commit_batch(batch) - .unwrap() - .map_err(StorageError) - } - - /// Rebuild restoration state from partial storage state - fn attempt_state_recovery(&mut self) -> Result<(), Error> { - // TODO: think about the return type some more - let (bad_link_map, parent_keys) = self.merk.verify(); - if !bad_link_map.is_empty() { - self.chunk_id_to_root_hash = bad_link_map; - self.parent_keys = parent_keys; - } - - Ok(()) - } - - /// Consumes the `Restorer` and returns a newly created, fully populated - /// Merk instance. This method will return an error if called before - /// processing all chunks. - pub fn finalize(mut self) -> Result, Error> { - // ensure all chunks have been processed - if self.chunk_id_to_root_hash.len() != 0 || self.parent_keys.len() != 0 { - return Err(Error::ChunkRestoringError( - ChunkError::RestorationNotComplete, - )); - } - - // get the latest version of the root node - self.merk - .load_base_root(None::<&fn(&[u8]) -> Option>); - - // if height values are wrong, rewrite height - if self.verify_height().is_err() { - self.rewrite_heights(); - // update the root node after height rewrite - self.merk - .load_base_root(None::<&fn(&[u8]) -> Option>); - } - - if self.merk.verify().0.len() != 0 { - return Err(Error::ChunkRestoringError(ChunkError::InternalError( - "restored tree invalid", - ))); - } - - Ok(self.merk) - } - - /// Verify that the child heights of the merk tree links correctly represent - /// the tree - fn verify_height(&self) -> Result<(), Error> { - let tree = self.merk.tree.take(); - let height_verification_result = if let Some(tree) = &tree { - self.verify_tree_height(&tree, tree.height()) - } else { - Ok(()) - }; - self.merk.tree.set(tree); - height_verification_result - } - - fn verify_tree_height(&self, tree: &TreeNode, parent_height: u8) -> Result<(), Error> { - let (left_height, right_height) = tree.child_heights(); - - if (left_height.abs_diff(right_height)) > 1 { - return Err(Error::CorruptedState( - "invalid child heights, difference greater than 1 for AVL tree", - )); - } - - let max_child_height = left_height.max(right_height); - if parent_height <= max_child_height || parent_height - max_child_height != 1 { - return Err(Error::CorruptedState( - "invalid child heights, parent height is not 1 less than max child height", - )); - } - - let left_link = tree.link(LEFT); - let right_link = tree.link(RIGHT); - - if (left_height == 0 && left_link.is_some()) || (right_height == 0 && right_link.is_some()) - { - return Err(Error::CorruptedState( - "invalid child heights node has child height 0, but hash child", - )); - } - - if let Some(link) = left_link { - let left_tree = link.tree(); - if left_tree.is_none() { - let left_tree = TreeNode::get( - &self.merk.storage, - link.key().to_vec(), - None::<&fn(&[u8]) -> Option>, - ) - .unwrap()? - .ok_or(Error::CorruptedState("link points to non-existent node"))?; - self.verify_tree_height(&left_tree, left_height)?; - } else { - self.verify_tree_height(left_tree.unwrap(), left_height)?; - } - } - - if let Some(link) = right_link { - let right_tree = link.tree(); - if right_tree.is_none() { - let right_tree = TreeNode::get( - &self.merk.storage, - link.key().to_vec(), - None::<&fn(&[u8]) -> Option>, - ) - .unwrap()? - .ok_or(Error::CorruptedState("link points to non-existent node"))?; - self.verify_tree_height(&right_tree, right_height)?; - } else { - self.verify_tree_height(right_tree.unwrap(), right_height)?; - } - } - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use grovedb_path::SubtreePath; - use grovedb_storage::{ - rocksdb_storage::{ - test_utils::TempStorage, PrefixedRocksDbImmediateStorageContext, - PrefixedRocksDbStorageContext, - }, - RawIterator, Storage, - }; - - use super::*; - use crate::{ - execute_proof, - merk::chunks2::ChunkProducer, - proofs::{ - chunk::{ - chunk2::tests::traverse_get_node_hash, chunk_op::ChunkOp::Chunk, - error::ChunkError::InvalidChunkProof, - }, - Query, - }, - test_utils::{make_batch_seq, TempMerk}, - Error::ChunkRestoringError, - KVIterator, Merk, PanicSource, - }; - - #[test] - fn test_chunk_verification_non_avl_tree() { - let non_avl_tree_proof = vec![ - Op::Push(Node::KV(vec![1], vec![1])), - Op::Push(Node::KV(vec![2], vec![2])), - Op::Parent, - Op::Push(Node::KV(vec![3], vec![3])), - Op::Parent, - ]; - assert!(Restorer::::verify_chunk( - non_avl_tree_proof, - &[0; 32] - ) - .is_err()); - } - - #[test] - fn test_chunk_verification_only_kv_feature_and_hash() { - // should not accept kv - let invalid_chunk_proof = vec![Op::Push(Node::KV(vec![1], vec![1]))]; - let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); - assert!(matches!( - verification_result, - Err(ChunkRestoringError(InvalidChunkProof( - "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", - ))) - )); - - // should not accept kvhash - let invalid_chunk_proof = vec![Op::Push(Node::KVHash([0; 32]))]; - let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); - assert!(matches!( - verification_result, - Err(ChunkRestoringError(InvalidChunkProof( - "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", - ))) - )); - - // should not accept kvdigest - let invalid_chunk_proof = vec![Op::Push(Node::KVDigest(vec![0], [0; 32]))]; - let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); - assert!(matches!( - verification_result, - Err(ChunkRestoringError(InvalidChunkProof( - "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", - ))) - )); - - // should not accept kvvaluehash - let invalid_chunk_proof = vec![Op::Push(Node::KVValueHash(vec![0], vec![0], [0; 32]))]; - let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); - assert!(matches!( - verification_result, - Err(ChunkRestoringError(InvalidChunkProof( - "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", - ))) - )); - - // should not accept kvrefvaluehash - let invalid_chunk_proof = vec![Op::Push(Node::KVRefValueHash(vec![0], vec![0], [0; 32]))]; - let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); - assert!(matches!( - verification_result, - Err(ChunkRestoringError(InvalidChunkProof( - "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", - ))) - )); - } - - fn get_node_hash(node: Node) -> Result { - match node { - Node::Hash(hash) => Ok(hash), - _ => Err("expected node hash".to_string()), - } - } - - #[test] - fn test_process_chunk_correct_chunk_id_map() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let mut merk_tree = merk.tree.take().expect("should have inner tree"); - merk.tree.set(Some(merk_tree.clone())); - let mut tree_walker = RefWalker::new(&mut merk_tree, PanicSource {}); - - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - - // restorer root hash should be empty - assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - - // at the start both merks should have different root hash values - assert_ne!( - merk.root_hash().unwrap(), - restoration_merk.root_hash().unwrap() - ); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); - - // initial restorer state should contain just the root hash of the source merk - assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); - assert_eq!( - restorer.chunk_id_to_root_hash.get(""), - Some(merk.root_hash().unwrap()).as_ref() - ); - - // generate first chunk - let (chunk, _) = chunk_producer.chunk_with_index(1).unwrap(); - // apply first chunk - let new_chunk_ids = restorer - .process_chunk(traversal_instruction_as_string(&vec![]), chunk) - .expect("should process chunk successfully"); - assert_eq!(new_chunk_ids.len(), 4); - - // after first chunk application - // the chunk_map should contain 4 items - assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); - // assert all the chunk hash values - assert_eq!( - restorer.chunk_id_to_root_hash.get("11"), - Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[LEFT, LEFT])).unwrap()) - .as_ref() - ); - assert_eq!( - restorer.chunk_id_to_root_hash.get("10"), - Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])).unwrap()) - .as_ref() - ); - assert_eq!( - restorer.chunk_id_to_root_hash.get("01"), - Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT])).unwrap()) - .as_ref() - ); - assert_eq!( - restorer.chunk_id_to_root_hash.get("00"), - Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[RIGHT, RIGHT])).unwrap()) - .as_ref() - ); - - // generate second chunk - let (chunk, _) = chunk_producer.chunk_with_index(2).unwrap(); - // apply second chunk - let new_chunk_ids = restorer - .process_chunk(traversal_instruction_as_string(&vec![LEFT, LEFT]), chunk) - .unwrap(); - assert_eq!(new_chunk_ids.len(), 0); - // chunk_map should have 1 less element - assert_eq!(restorer.chunk_id_to_root_hash.len(), 3); - assert_eq!(restorer.chunk_id_to_root_hash.get("11"), None); - - // let's try to apply the second chunk again, should not work - let (chunk, _) = chunk_producer.chunk_with_index(2).unwrap(); - // apply second chunk - let chunk_process_result = - restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, LEFT]), chunk); - assert_eq!(chunk_process_result.is_err(), true); - assert!(matches!( - chunk_process_result, - Err(Error::ChunkRestoringError(ChunkError::UnexpectedChunk)) - )); - - // next let's get a random but expected chunk and work with that e.g. chunk 4 - // but let's apply it to the wrong place - let (chunk, _) = chunk_producer.chunk_with_index(4).unwrap(); - let chunk_process_result = - restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, RIGHT]), chunk); - assert_eq!(chunk_process_result.is_err(), true); - assert!(matches!( - chunk_process_result, - Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( - .. - ))) - )); - - // correctly apply chunk 5 - let (chunk, _) = chunk_producer.chunk_with_index(5).unwrap(); - // apply second chunk - let new_chunk_ids = restorer - .process_chunk(traversal_instruction_as_string(&vec![RIGHT, RIGHT]), chunk) - .unwrap(); - assert_eq!(new_chunk_ids.len(), 0); - // chunk_map should have 1 less element - assert_eq!(restorer.chunk_id_to_root_hash.len(), 2); - assert_eq!(restorer.chunk_id_to_root_hash.get("00"), None); - - // correctly apply chunk 3 - let (chunk, _) = chunk_producer.chunk_with_index(3).unwrap(); - // apply second chunk - let new_chunk_ids = restorer - .process_chunk(traversal_instruction_as_string(&vec![LEFT, RIGHT]), chunk) - .unwrap(); - assert_eq!(new_chunk_ids.len(), 0); - // chunk_map should have 1 less element - assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); - assert_eq!(restorer.chunk_id_to_root_hash.get("10"), None); - - // correctly apply chunk 4 - let (chunk, _) = chunk_producer.chunk_with_index(4).unwrap(); - // apply second chunk - let new_chunk_ids = restorer - .process_chunk(traversal_instruction_as_string(&vec![RIGHT, LEFT]), chunk) - .unwrap(); - assert_eq!(new_chunk_ids.len(), 0); - // chunk_map should have 1 less element - assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); - assert_eq!(restorer.chunk_id_to_root_hash.get("01"), None); - - // finalize merk - let mut restored_merk = restorer.finalize().expect("should finalized successfully"); - - assert_eq!( - restored_merk.root_hash().unwrap(), - merk.root_hash().unwrap() - ); - } - - fn assert_raw_db_entries_eq( - restored: &Merk, - original: &Merk, - length: usize, - ) { - assert_eq!(restored.root_hash().unwrap(), original.root_hash().unwrap()); - - let mut original_entries = original.storage.raw_iter(); - let mut restored_entries = restored.storage.raw_iter(); - original_entries.seek_to_first().unwrap(); - restored_entries.seek_to_first().unwrap(); - - let mut i = 0; - loop { - assert_eq!( - restored_entries.valid().unwrap(), - original_entries.valid().unwrap() - ); - if !restored_entries.valid().unwrap() { - break; - } - - assert_eq!(restored_entries.key(), original_entries.key()); - assert_eq!(restored_entries.value(), original_entries.value()); - - restored_entries.next().unwrap(); - original_entries.next().unwrap(); - - i += 1; - } - - assert_eq!(i, length); - } - - // Builds a source merk with batch_size number of elements - // attempts restoration on some empty merk - // verifies that restoration was performed correctly. - fn test_restoration_single_chunk_strategy(batch_size: u64) { - // build the source merk - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut source_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - let batch = make_batch_seq(0..batch_size); - source_merk - .apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - - // build the restoration merk - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - - // at the start - // restoration merk should have empty root hash - // and source merk should have a different root hash - assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - assert_ne!( - source_merk.root_hash().unwrap(), - restoration_merk.root_hash().unwrap() - ); - - // instantiate chunk producer and restorer - let mut chunk_producer = - ChunkProducer::new(&source_merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap()); - - // perform chunk production and processing - let mut chunk_id_opt = Some("".to_string()); - while let Some(chunk_id) = chunk_id_opt { - let (chunk, next_chunk_id) = chunk_producer - .chunk(chunk_id.as_str()) - .expect("should get chunk"); - restorer - .process_chunk(chunk_id.to_string(), chunk) - .expect("should process chunk successfully"); - chunk_id_opt = next_chunk_id; - } - - // after chunk processing we should be able to finalize - assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); - assert_eq!(restorer.parent_keys.len(), 0); - let restored_merk = restorer.finalize().expect("should finalize"); - - // compare root hash values - assert_eq!( - source_merk.root_hash().unwrap(), - restored_merk.root_hash().unwrap() - ); - - assert_raw_db_entries_eq(&restored_merk, &source_merk, batch_size as usize); - } - - #[test] - fn restore_single_chunk_20() { - test_restoration_single_chunk_strategy(20); - } - - #[test] - fn restore_single_chunk_1000() { - test_restoration_single_chunk_strategy(1000); - } - - #[test] - fn test_process_multi_chunk_no_limit() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - - // restorer root hash should be empty - assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - - // at the start both merks should have different root hash values - assert_ne!( - merk.root_hash().unwrap(), - restoration_merk.root_hash().unwrap() - ); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); - - assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); - assert_eq!( - restorer.chunk_id_to_root_hash.get(""), - Some(merk.root_hash().unwrap()).as_ref() - ); - - // generate multi chunk from root with no limit - let chunk = chunk_producer - .multi_chunk_with_limit("", None) - .expect("should generate multichunk"); - - assert_eq!(chunk.chunk.len(), 2); - assert_eq!(chunk.next_index, None); - assert_eq!(chunk.remaining_limit, None); - - let next_ids = restorer - .process_multi_chunk(chunk.chunk) - .expect("should process chunk"); - // should have replicated all chunks - assert_eq!(next_ids.len(), 0); - assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); - assert_eq!(restorer.parent_keys.len(), 0); - - let restored_merk = restorer.finalize().expect("should be able to finalize"); - - // compare root hash values - assert_eq!( - restored_merk.root_hash().unwrap(), - merk.root_hash().unwrap() - ); - } - - #[test] - fn test_process_multi_chunk_no_limit_but_non_root() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - - // restorer root hash should be empty - assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - - // at the start both merks should have different root hash values - assert_ne!( - merk.root_hash().unwrap(), - restoration_merk.root_hash().unwrap() - ); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); - - assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); - assert_eq!( - restorer.chunk_id_to_root_hash.get(""), - Some(merk.root_hash().unwrap()).as_ref() - ); - - // first restore the first chunk - let (chunk, next_chunk_index) = chunk_producer.chunk_with_index(1).unwrap(); - let new_chunk_ids = restorer - .process_chunk(traversal_instruction_as_string(&vec![]), chunk) - .expect("should process chunk"); - assert_eq!(new_chunk_ids.len(), 4); - assert_eq!(next_chunk_index, Some(2)); - assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); - assert_eq!(restorer.parent_keys.len(), 4); - - // generate multi chunk from the 2nd chunk with no limit - let multi_chunk = chunk_producer - .multi_chunk_with_limit_and_index(next_chunk_index.unwrap(), None) - .unwrap(); - // tree of height 4 has 5 chunks - // we have restored the first leaving 4 chunks - // each chunk has an extra chunk id, since they are disjoint - // hence the size of the multi chunk should be 8 - assert_eq!(multi_chunk.chunk.len(), 8); - let new_chunk_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); - assert_eq!(new_chunk_ids.len(), 0); - assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); - assert_eq!(restorer.parent_keys.len(), 0); - - let restored_merk = restorer.finalize().expect("should be able to finalize"); - - // compare root hash values - assert_eq!( - restored_merk.root_hash().unwrap(), - merk.root_hash().unwrap() - ); - } - - #[test] - fn test_process_multi_chunk_with_limit() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - - // restorer root hash should be empty - assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - - // at the start both merks should have different root hash values - assert_ne!( - merk.root_hash().unwrap(), - restoration_merk.root_hash().unwrap() - ); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); - - // build multi chunk with with limit of 325 - let multi_chunk = chunk_producer - .multi_chunk_with_limit("", Some(600)) - .unwrap(); - // should only contain the first chunk - assert_eq!(multi_chunk.chunk.len(), 2); - // should point to chunk 2 - assert_eq!(multi_chunk.next_index, Some("11".to_string())); - let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); - assert_eq!(next_ids.len(), 4); - assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); - assert_eq!(restorer.parent_keys.len(), 4); - - // subsequent chunks are of size 321 - // with limit just above 642 should get 2 chunks (2 and 3) - // disjoint, so multi chunk len should be 4 - let multi_chunk = chunk_producer - .multi_chunk_with_limit(multi_chunk.next_index.unwrap().as_str(), Some(645)) - .unwrap(); - assert_eq!(multi_chunk.chunk.len(), 4); - assert_eq!(multi_chunk.next_index, Some("01".to_string())); - let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); - // chunks 2 and 3 are leaf chunks - assert_eq!(next_ids.len(), 0); - assert_eq!(restorer.chunk_id_to_root_hash.len(), 2); - assert_eq!(restorer.parent_keys.len(), 2); - - // get the last 2 chunks - let multi_chunk = chunk_producer - .multi_chunk_with_limit(multi_chunk.next_index.unwrap().as_str(), Some(645)) - .unwrap(); - assert_eq!(multi_chunk.chunk.len(), 4); - assert_eq!(multi_chunk.next_index, None); - let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); - // chunks 2 and 3 are leaf chunks - assert_eq!(next_ids.len(), 0); - assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); - assert_eq!(restorer.parent_keys.len(), 0); - - // finalize merk - let restored_merk = restorer.finalize().unwrap(); - - // compare root hash values - assert_eq!( - restored_merk.root_hash().unwrap(), - merk.root_hash().unwrap() - ); - } - - // Builds a source merk with batch_size number of elements - // attempts restoration on some empty merk, with multi chunks - // verifies that restoration was performed correctly. - fn test_restoration_multi_chunk_strategy(batch_size: u64, limit: Option) { - // build the source merk - let mut source_merk = TempMerk::new(); - let batch = make_batch_seq(0..batch_size); - source_merk - .apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - - // build the restoration merk - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - - // at the start - // restoration merk should have empty root hash - // and source merk should have a different root hash - assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - assert_ne!( - source_merk.root_hash().unwrap(), - restoration_merk.root_hash().unwrap() - ); - - // instantiate chunk producer and restorer - let mut chunk_producer = - ChunkProducer::new(&source_merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap()); - - // perform chunk production and processing - let mut chunk_id_opt = Some("".to_string()); - while let Some(chunk_id) = chunk_id_opt { - let multi_chunk = chunk_producer - .multi_chunk_with_limit(chunk_id.as_str(), limit.clone()) - .expect("should get chunk"); - restorer - .process_multi_chunk(multi_chunk.chunk) - .expect("should process chunk successfully"); - chunk_id_opt = multi_chunk.next_index; - } - - // after chunk processing we should be able to finalize - assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); - assert_eq!(restorer.parent_keys.len(), 0); - let restored_merk = restorer.finalize().expect("should finalize"); - - // compare root hash values - assert_eq!( - source_merk.root_hash().unwrap(), - restored_merk.root_hash().unwrap() - ); - } - - #[test] - fn restore_multi_chunk_20_no_limit() { - test_restoration_multi_chunk_strategy(20, None); - } - - #[test] - #[should_panic] - fn restore_multi_chunk_20_tiny_limit() { - test_restoration_multi_chunk_strategy(20, Some(1)); - } - - #[test] - fn restore_multi_chunk_20_limit() { - test_restoration_multi_chunk_strategy(20, Some(1200)); - } - - #[test] - fn restore_multi_chunk_10000_limit() { - test_restoration_multi_chunk_strategy(10000, Some(1200)); - } - - #[test] - fn test_restoration_interruption() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - - // restorer root hash should be empty - assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - - // at the start both merks should have different root hash values - assert_ne!( - merk.root_hash().unwrap(), - restoration_merk.root_hash().unwrap() - ); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); - - assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); - assert_eq!( - restorer.chunk_id_to_root_hash.get(""), - Some(merk.root_hash().unwrap()).as_ref() - ); - - // first restore the first chunk - let (chunk, next_chunk_index) = chunk_producer.chunk_with_index(1).unwrap(); - let new_chunk_ids = restorer - .process_chunk(traversal_instruction_as_string(&vec![]), chunk) - .expect("should process chunk"); - assert_eq!(new_chunk_ids.len(), 4); - assert_eq!(next_chunk_index, Some(2)); - assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); - assert_eq!(restorer.parent_keys.len(), 4); - - // store old state for later reference - let old_chunk_id_to_root_hash = restorer.chunk_id_to_root_hash.clone(); - let old_parent_keys = restorer.parent_keys.clone(); - - // drop the restorer and the restoration merk - drop(restorer); - // open the restoration merk again and build a restorer from it - let mut restoration_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); - - // assert the state of the restorer - assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); - assert_eq!(restorer.parent_keys.len(), 0); - - // recover state - let recovery_attempt = restorer.attempt_state_recovery(); - assert_eq!(recovery_attempt.is_ok(), true); - assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); - assert_eq!(restorer.parent_keys.len(), 4); - - // assert equality to old state - assert_eq!(old_chunk_id_to_root_hash, restorer.chunk_id_to_root_hash); - assert_eq!(old_parent_keys, restorer.parent_keys); - } -} diff --git a/merk/src/proofs/chunk.rs b/merk/src/proofs/chunk.rs index 20fa7c05..2ea820ef 100644 --- a/merk/src/proofs/chunk.rs +++ b/merk/src/proofs/chunk.rs @@ -53,7 +53,7 @@ use crate::{ mod binary_range; #[cfg(feature = "full")] -pub mod chunk2; +pub mod chunk; pub mod chunk_op; pub mod error; #[cfg(feature = "full")] diff --git a/merk/src/proofs/chunk/chunk2.rs b/merk/src/proofs/chunk/chunk.rs similarity index 99% rename from merk/src/proofs/chunk/chunk2.rs rename to merk/src/proofs/chunk/chunk.rs index e556ee1f..5258dffa 100644 --- a/merk/src/proofs/chunk/chunk2.rs +++ b/merk/src/proofs/chunk/chunk.rs @@ -210,7 +210,7 @@ pub mod tests { use crate::{ proofs::{ - chunk::chunk2::{verify_height_proof, LEFT, RIGHT}, + chunk::chunk::{verify_height_proof, LEFT, RIGHT}, tree::execute, Node, Op, Op::Parent, diff --git a/merk/src/proofs/chunk/chunk_op.rs b/merk/src/proofs/chunk/chunk_op.rs index 535af055..9402d3d5 100644 --- a/merk/src/proofs/chunk/chunk_op.rs +++ b/merk/src/proofs/chunk/chunk_op.rs @@ -126,7 +126,7 @@ mod test { use crate::proofs::{ chunk::{ - chunk2::{LEFT, RIGHT}, + chunk::{LEFT, RIGHT}, chunk_op::ChunkOp, }, Node, Op, diff --git a/merk/src/proofs/chunk/util.rs b/merk/src/proofs/chunk/util.rs index a00041fc..986b24c7 100644 --- a/merk/src/proofs/chunk/util.rs +++ b/merk/src/proofs/chunk/util.rs @@ -35,7 +35,7 @@ use std::io::Write; use crate::{proofs::chunk::binary_range::BinaryRange, Error}; use crate::{ proofs::chunk::{ - chunk2::{LEFT, RIGHT}, + chunk::{LEFT, RIGHT}, error::{ChunkError, ChunkError::BadTraversalInstruction}, }, Error::InternalError, @@ -372,7 +372,7 @@ mod test { use byteorder::LE; use super::*; - use crate::proofs::chunk::chunk2::{LEFT, RIGHT}; + use crate::proofs::chunk::chunk::{LEFT, RIGHT}; #[test] fn test_chunk_height_per_layer() { diff --git a/merk/src/proofs/tree.rs b/merk/src/proofs/tree.rs index 47f96d2b..b91bd68f 100644 --- a/merk/src/proofs/tree.rs +++ b/merk/src/proofs/tree.rs @@ -44,7 +44,7 @@ use crate::tree::{combine_hash, kv_digest_to_kv_hash, kv_hash, node_hash, value_ #[cfg(any(feature = "full", feature = "verify"))] use crate::{error::Error, tree::CryptoHash}; use crate::{ - proofs::chunk::chunk2::{LEFT, RIGHT}, + proofs::chunk::chunk::{LEFT, RIGHT}, Link, TreeFeatureType::SummedMerkNode, }; From cd57d8b21909e35bc755ecff8b2f31efe1a4a59f Mon Sep 17 00:00:00 2001 From: Wisdom Ogwu Date: Tue, 3 Oct 2023 10:07:09 +0100 Subject: [PATCH 7/7] clippy fixes --- grovedb/src/batch/mod.rs | 13 +++--- grovedb/src/operations/delete/mod.rs | 10 +---- grovedb/src/tests/mod.rs | 13 +++--- grovedb/src/tests/query_tests.rs | 20 ++++----- grovedb/src/versioning.rs | 2 +- merk/src/merk/chunks.rs | 64 +++++++++++++------------- merk/src/merk/mod.rs | 36 +++++---------- merk/src/merk/restore.rs | 65 ++++++++++++--------------- merk/src/proofs/chunk.rs | 23 ---------- merk/src/proofs/chunk/binary_range.rs | 32 ++++++------- merk/src/proofs/chunk/chunk.rs | 29 ++++++------ merk/src/proofs/chunk/chunk_op.rs | 2 +- merk/src/proofs/chunk/util.rs | 32 ++++++------- merk/src/proofs/tree.rs | 15 +++---- 14 files changed, 147 insertions(+), 209 deletions(-) diff --git a/grovedb/src/batch/mod.rs b/grovedb/src/batch/mod.rs index a3b2d502..70c47619 100644 --- a/grovedb/src/batch/mod.rs +++ b/grovedb/src/batch/mod.rs @@ -2432,8 +2432,8 @@ mod tests { Element::empty_tree(), ), ]; - assert!(matches!( - db.apply_batch( + assert!(db + .apply_batch( ops, Some(BatchApplyOptions { validate_insertion_does_not_override: false, @@ -2446,9 +2446,8 @@ mod tests { }), None ) - .unwrap(), - Ok(_) - )); + .unwrap() + .is_ok()); } #[test] @@ -3489,7 +3488,7 @@ mod tests { elem.clone(), ), ]; - assert!(matches!(db.apply_batch(batch, None, None).unwrap(), Ok(_))); + assert!(db.apply_batch(batch, None, None).unwrap().is_ok()); assert_eq!( db.get([TEST_LEAF].as_ref(), b"key1", None) .unwrap() @@ -3506,7 +3505,7 @@ mod tests { .unwrap() .expect("should generate proof"); let verification_result = GroveDb::verify_query_raw(&proof, &path_query); - assert!(matches!(verification_result, Ok(_))); + assert!(verification_result.is_ok()); // Hit reference limit when you specify max reference hop, lower than actual hop // count diff --git a/grovedb/src/operations/delete/mod.rs b/grovedb/src/operations/delete/mod.rs index 6d7a34d0..350dd2df 100644 --- a/grovedb/src/operations/delete/mod.rs +++ b/grovedb/src/operations/delete/mod.rs @@ -1029,10 +1029,7 @@ mod tests { db.get([TEST_LEAF].as_ref(), b"key1", None).unwrap(), Err(Error::PathKeyNotFound(_)) )); - assert!(matches!( - db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap(), - Ok(_) - )); + assert!(db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap().is_ok()); } #[test] @@ -1397,10 +1394,7 @@ mod tests { db.get([TEST_LEAF].as_ref(), b"key1", None).unwrap(), Err(Error::PathKeyNotFound(_)) )); - assert!(matches!( - db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap(), - Ok(_) - )); + assert!(db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap().is_ok()); } #[test] diff --git a/grovedb/src/tests/mod.rs b/grovedb/src/tests/mod.rs index 451b2307..cbb0d195 100644 --- a/grovedb/src/tests/mod.rs +++ b/grovedb/src/tests/mod.rs @@ -465,7 +465,7 @@ fn test_element_with_flags() { let db = make_test_grovedb(); db.insert( - [TEST_LEAF.as_ref()].as_ref(), + [TEST_LEAF].as_ref(), b"key1", Element::empty_tree(), None, @@ -2803,7 +2803,7 @@ fn test_root_hash() { #[test] fn test_get_non_existing_root_leaf() { let db = make_test_grovedb(); - assert!(matches!(db.get(EMPTY_PATH, b"ayy", None).unwrap(), Err(_))); + assert!(db.get(EMPTY_PATH, b"ayy", None).unwrap().is_err()); } #[test] @@ -2830,7 +2830,7 @@ fn test_check_subtree_exists_function() { // Empty tree path means root always exist assert!(db - .check_subtree_exists_invalid_path(EMPTY_PATH.into(), None) + .check_subtree_exists_invalid_path(EMPTY_PATH, None) .unwrap() .is_ok()); @@ -2943,17 +2943,14 @@ fn test_storage_wipe() { .expect("cannot insert item"); // retrieve key before wipe - let elem = db - .get(&[TEST_LEAF.as_ref()], b"key", None) - .unwrap() - .unwrap(); + let elem = db.get(&[TEST_LEAF], b"key", None).unwrap().unwrap(); assert_eq!(elem, Element::new_item(b"ayy".to_vec())); // wipe the database db.grove_db.wipe().unwrap(); // retrieve key after wipe - let elem_result = db.get(&[TEST_LEAF.as_ref()], b"key", None).unwrap(); + let elem_result = db.get(&[TEST_LEAF], b"key", None).unwrap(); assert!(elem_result.is_err()); assert!(matches!( elem_result, diff --git a/grovedb/src/tests/query_tests.rs b/grovedb/src/tests/query_tests.rs index 0bb6a1f0..0092b444 100644 --- a/grovedb/src/tests/query_tests.rs +++ b/grovedb/src/tests/query_tests.rs @@ -46,7 +46,7 @@ use crate::{ fn populate_tree_for_non_unique_range_subquery(db: &TempGroveDb) { // Insert a couple of subtrees first for i in 1985u32..2000 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF].as_ref(), &i_vec, @@ -70,7 +70,7 @@ fn populate_tree_for_non_unique_range_subquery(db: &TempGroveDb) { for j in 100u32..150 { let mut j_vec = i_vec.clone(); - j_vec.append(&mut (j as u32).to_be_bytes().to_vec()); + j_vec.append(&mut j.to_be_bytes().to_vec()); db.insert( [TEST_LEAF, i_vec.as_slice(), b"\0"].as_ref(), &j_vec.clone(), @@ -87,7 +87,7 @@ fn populate_tree_for_non_unique_range_subquery(db: &TempGroveDb) { fn populate_tree_for_non_unique_double_range_subquery(db: &TempGroveDb) { // Insert a couple of subtrees first for i in 0u32..10 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF].as_ref(), &i_vec, @@ -110,7 +110,7 @@ fn populate_tree_for_non_unique_double_range_subquery(db: &TempGroveDb) { .expect("successful subtree insert"); for j in 25u32..50 { - let j_vec = (j as u32).to_be_bytes().to_vec(); + let j_vec = j.to_be_bytes().to_vec(); db.insert( [TEST_LEAF, i_vec.as_slice(), b"a"].as_ref(), &j_vec, @@ -134,7 +134,7 @@ fn populate_tree_for_non_unique_double_range_subquery(db: &TempGroveDb) { .expect("successful subtree insert"); for k in 100u32..110 { - let k_vec = (k as u32).to_be_bytes().to_vec(); + let k_vec = k.to_be_bytes().to_vec(); db.insert( [TEST_LEAF, i_vec.as_slice(), b"a", &j_vec, b"\0"].as_ref(), &k_vec.clone(), @@ -173,7 +173,7 @@ fn populate_tree_by_reference_for_non_unique_range_subquery(db: &TempGroveDb) { .expect("successful subtree insert"); // Insert a couple of subtrees first for i in 1985u32..2000 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF, b"1"].as_ref(), &i_vec, @@ -198,7 +198,7 @@ fn populate_tree_by_reference_for_non_unique_range_subquery(db: &TempGroveDb) { for j in 100u32..150 { let random_key = rand::thread_rng().gen::<[u8; 32]>(); let mut j_vec = i_vec.clone(); - j_vec.append(&mut (j as u32).to_be_bytes().to_vec()); + j_vec.append(&mut j.to_be_bytes().to_vec()); // We should insert every item to the tree holding items db.insert( @@ -231,7 +231,7 @@ fn populate_tree_by_reference_for_non_unique_range_subquery(db: &TempGroveDb) { fn populate_tree_for_unique_range_subquery(db: &TempGroveDb) { // Insert a couple of subtrees first for i in 1985u32..2000 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF].as_ref(), &i_vec, @@ -278,7 +278,7 @@ fn populate_tree_by_reference_for_unique_range_subquery(db: &TempGroveDb) { .expect("successful subtree insert"); for i in 1985u32..2000 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF, b"1"].as_ref(), &i_vec, @@ -333,7 +333,7 @@ fn populate_tree_for_unique_range_subquery_with_non_unique_null_values(db: &mut .expect("successful subtree insert"); // Insert a couple of subtrees first for i in 100u32..200 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF, &[], b"\0"].as_ref(), &i_vec, diff --git a/grovedb/src/versioning.rs b/grovedb/src/versioning.rs index a041b3d8..5a724afc 100644 --- a/grovedb/src/versioning.rs +++ b/grovedb/src/versioning.rs @@ -52,7 +52,7 @@ mod tests { assert_eq!(new_data, [244, 3, 1, 2, 3]); // show that read_version doesn't consume - assert_eq!(read_proof_version(&mut new_data.as_slice()).unwrap(), 500); + assert_eq!(read_proof_version(new_data.as_slice()).unwrap(), 500); assert_eq!(new_data, [244, 3, 1, 2, 3]); // show that we consume the version number and return the remaining vector diff --git a/merk/src/merk/chunks.rs b/merk/src/merk/chunks.rs index 51521ced..1b014365 100644 --- a/merk/src/merk/chunks.rs +++ b/merk/src/merk/chunks.rs @@ -26,35 +26,28 @@ // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. -use std::{ - cmp::max, - collections::{LinkedList, VecDeque}, - path::Iter, -}; +use std::collections::VecDeque; use ed::Encode; -use grovedb_costs::{CostResult, CostsExt, OperationCost}; use grovedb_storage::StorageContext; -use integer_encoding::VarInt; use crate::{ error::Error, proofs::{ chunk::{ chunk_op::ChunkOp, - error::{ChunkError, ChunkError::InternalError}, + error::ChunkError, util::{ chunk_height, chunk_id_from_traversal_instruction, chunk_id_from_traversal_instruction_with_recovery, generate_traversal_instruction, generate_traversal_instruction_as_string, number_of_chunks, - string_as_traversal_instruction, traversal_instruction_as_string, write_to_vec, + string_as_traversal_instruction, }, }, Node, Op, }, - tree::RefWalker, Error::ChunkingError, - Merk, PanicSource, + Merk, }; /// ChunkProof for replication of a single subtree @@ -221,7 +214,7 @@ where // generate as many subtree chunks as we can // until we have exhausted all or hit a limit restriction - while current_index != None { + while current_index.is_some() { let current_index_traversal_instruction = generate_traversal_instruction( self.height, current_index.expect("confirmed is Some"), @@ -230,7 +223,7 @@ where // factor in the ChunkId encoding length in limit calculations let temp_limit = if let Some(limit) = current_limit { - let chunk_id_op_encoding_len = chunk_id_op.encoding_length().map_err(|e| { + let chunk_id_op_encoding_len = chunk_id_op.encoding_length().map_err(|_e| { Error::ChunkingError(ChunkError::InternalError("cannot get encoding length")) })?; if limit >= chunk_id_op_encoding_len { @@ -297,7 +290,7 @@ where // we first get the chunk at the given index // TODO: use the returned chunk index rather than tracking let (chunk_ops, _) = self.chunk_with_index(chunk_index)?; - chunk_byte_length = chunk_ops.encoding_length().map_err(|e| { + chunk_byte_length = chunk_ops.encoding_length().map_err(|_e| { Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) })?; chunk_index += 1; @@ -322,10 +315,10 @@ where let (replacement_chunk, _) = self.chunk_with_index(chunk_index)?; // calculate the new total - let new_total = replacement_chunk.encoding_length().map_err(|e| { + let new_total = replacement_chunk.encoding_length().map_err(|_e| { Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) })? + chunk_byte_length - - chunk[iteration_index].encoding_length().map_err(|e| { + - chunk[iteration_index].encoding_length().map_err(|_e| { Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) })?; @@ -368,7 +361,7 @@ where /// Returns the total number of chunks for the underlying Merk tree. pub fn len(&self) -> usize { - number_of_chunks(self.height as usize) + number_of_chunks(self.height) } /// Gets the next chunk based on the `ChunkProducer`'s internal index state. @@ -390,7 +383,7 @@ where chunk_index .map(|index| generate_traversal_instruction_as_string(self.height, index)) .transpose() - .and_then(|v| Ok((chunk, v))) + .map(|v| (chunk, v)) }), ) } @@ -432,14 +425,19 @@ mod test { use super::*; use crate::{ proofs::{ - chunk::chunk::{ - tests::{traverse_get_kv_feature_type, traverse_get_node_hash}, - LEFT, RIGHT, + chunk::{ + chunk::{ + tests::{traverse_get_kv_feature_type, traverse_get_node_hash}, + LEFT, RIGHT, + }, + util::traversal_instruction_as_string, }, tree::execute, Tree, }, test_utils::{make_batch_seq, TempMerk}, + tree::RefWalker, + PanicSource, }; #[derive(Default)] @@ -455,13 +453,13 @@ mod test { impl NodeCounts { fn sum(&self) -> usize { - return self.hash + self.hash + self.kv_hash + self.kv + self.kv_value_hash + self.kv_digest + self.kv_ref_value_hash - + self.kv_value_hash_feature_type; + + self.kv_value_hash_feature_type } } @@ -548,7 +546,7 @@ mod test { } // returns None after max - assert_eq!(chunks.next().is_none(), true); + assert!(chunks.next().is_none()); } #[test] @@ -582,8 +580,8 @@ mod test { assert_eq!(chunk_producer.len(), 5); // assert bounds - assert_eq!(chunk_producer.chunk_with_index(0).is_err(), true); - assert_eq!(chunk_producer.chunk_with_index(6).is_err(), true); + assert!(chunk_producer.chunk_with_index(0).is_err()); + assert!(chunk_producer.chunk_with_index(6).is_err()); // first chunk // expected: @@ -750,7 +748,7 @@ mod test { // generate multi chunk with no limit let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut chunk_result = chunk_producer + let chunk_result = chunk_producer .subtree_multi_chunk_with_limit(1, None) .expect("should generate chunk with limit"); @@ -798,7 +796,7 @@ mod test { assert_eq!(chunk_result.remaining_limit, Some(0)); assert_eq!(chunk_result.next_index, Some(2)); - let mut chunk = chunk_result.chunk; + let chunk = chunk_result.chunk; assert_eq!(chunk.encoding_length().unwrap(), 453); assert_eq!(chunk.len(), 13); // op count let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) @@ -818,7 +816,7 @@ mod test { assert_eq!(chunk_result.remaining_limit, Some(0)); assert_eq!(chunk_result.next_index, Some(3)); - let mut chunk = chunk_result.chunk; + let chunk = chunk_result.chunk; assert_eq!(chunk.encoding_length().unwrap(), 737); assert_eq!(chunk.len(), 17); // op count let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) @@ -838,7 +836,7 @@ mod test { assert_eq!(chunk_result.remaining_limit, Some(0)); assert_eq!(chunk_result.next_index, Some(4)); - let mut chunk = chunk_result.chunk; + let chunk = chunk_result.chunk; assert_eq!(chunk.encoding_length().unwrap(), 1021); assert_eq!(chunk.len(), 21); // op count let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) @@ -858,7 +856,7 @@ mod test { assert_eq!(chunk_result.remaining_limit, Some(0)); assert_eq!(chunk_result.next_index, Some(5)); - let mut chunk = chunk_result.chunk; + let chunk = chunk_result.chunk; assert_eq!(chunk.encoding_length().unwrap(), 1305); assert_eq!(chunk.len(), 25); // op count let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) @@ -878,7 +876,7 @@ mod test { assert_eq!(chunk_result.remaining_limit, Some(0)); assert_eq!(chunk_result.next_index, None); - let mut chunk = chunk_result.chunk; + let chunk = chunk_result.chunk; assert_eq!(chunk.encoding_length().unwrap(), 1589); assert_eq!(chunk.len(), 29); // op count let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) @@ -898,7 +896,7 @@ mod test { assert_eq!(chunk_result.remaining_limit, Some(18446744073709550026)); assert_eq!(chunk_result.next_index, None); - let mut chunk = chunk_result.chunk; + let chunk = chunk_result.chunk; assert_eq!(chunk.encoding_length().unwrap(), 1589); assert_eq!(chunk.len(), 29); // op count let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index 52e28ba0..cea9b2b6 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -44,7 +44,6 @@ pub mod source; use std::{ cell::Cell, - cmp::Ordering, collections::{BTreeMap, BTreeSet, LinkedList}, fmt, }; @@ -60,18 +59,14 @@ use source::MerkSource; use crate::{ error::Error, - merk::{ - defaults::{MAX_UPDATE_VALUE_BASED_ON_COSTS_TIMES, ROOT_KEY_KEY}, - options::MerkOptions, - }, + merk::{defaults::ROOT_KEY_KEY, options::MerkOptions}, proofs::{ chunk::{ chunk::{LEFT, RIGHT}, util::traversal_instruction_as_string, }, - encode_into, query::query_item::QueryItem, - Op as ProofOp, Query, + Query, }, tree::{ kv::ValueDefinedCostType, AuxMerkBatch, CryptoHash, Op, RefWalker, TreeNode, NULL_HASH, @@ -290,10 +285,7 @@ where /// Returns the height of the Merk tree pub fn height(&self) -> Option { - self.use_tree(|tree| match tree { - None => None, - Some(tree) => Some(tree.height()), - }) + self.use_tree(|tree| tree.map(|tree| tree.height())) } /// Returns the root non-prefixed key of the tree. If the tree is empty, @@ -578,7 +570,7 @@ where ); self.tree.set(tree); - return (bad_link_map, parent_keys); + (bad_link_map, parent_keys) } fn verify_tree( @@ -625,25 +617,21 @@ where Link::Reference { hash, key, sum, .. } => { (hash.to_owned(), key.to_owned(), sum.to_owned()) } - Link::Modified { - tree, - child_heights, - .. - } => ( + Link::Modified { tree, .. } => ( tree.hash().unwrap(), tree.key().to_vec(), tree.sum().unwrap(), ), Link::Loaded { hash, - child_heights, + child_heights: _, sum, tree, } => (hash.to_owned(), tree.key().to_vec(), sum.to_owned()), _ => todo!(), }; - let instruction_id = traversal_instruction_as_string(&traversal_instruction); + let instruction_id = traversal_instruction_as_string(traversal_instruction); let node = TreeNode::get( &self.storage, key, @@ -652,27 +640,27 @@ where .unwrap(); if node.is_err() { - bad_link_map.insert(instruction_id.clone(), hash.clone()); + bad_link_map.insert(instruction_id.clone(), hash); parent_keys.insert(instruction_id, parent_key.to_vec()); return; } let node = node.unwrap(); if node.is_none() { - bad_link_map.insert(instruction_id.clone(), hash.clone()); + bad_link_map.insert(instruction_id.clone(), hash); parent_keys.insert(instruction_id, parent_key.to_vec()); return; } let node = node.unwrap(); if &node.hash().unwrap() != &hash { - bad_link_map.insert(instruction_id.clone(), hash.clone()); + bad_link_map.insert(instruction_id.clone(), hash); parent_keys.insert(instruction_id, parent_key.to_vec()); return; } if node.sum().unwrap() != sum { - bad_link_map.insert(instruction_id.clone(), hash.clone()); + bad_link_map.insert(instruction_id.clone(), hash); parent_keys.insert(instruction_id, parent_key.to_vec()); return; } @@ -702,7 +690,7 @@ fn fetch_node<'db>( #[cfg(test)] mod test { - use grovedb_costs::OperationCost; + use grovedb_path::SubtreePath; use grovedb_storage::{ rocksdb_storage::{PrefixedRocksDbStorageContext, RocksDbStorage}, diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index a4c747d1..98e70672 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -41,18 +41,15 @@ use crate::{ chunk::{LEFT, RIGHT}, chunk_op::ChunkOp, error::{ChunkError, ChunkError::InternalError}, - util::{ - string_as_traversal_instruction, traversal_instruction_as_string, write_to_vec, - }, + util::{string_as_traversal_instruction, traversal_instruction_as_string}, }, tree::{execute, Child, Tree as ProofTree}, Node, Op, }, tree::{kv::ValueDefinedCostType, RefWalker, TreeNode}, CryptoHash, Error, - Error::{CostsError, EdError, StorageError}, + Error::{CostsError, StorageError}, Link, Merk, - TreeFeatureType::{BasicMerkNode, SummedMerkNode}, }; /// Restorer handles verification of chunks and replication of Merk trees. @@ -203,7 +200,7 @@ impl<'db, S: StorageContext<'db>> Restorer { let mut tree = TreeNode::new_with_value_hash( key.clone(), value.clone(), - value_hash.clone(), + *value_hash, *feature_type, ) .unwrap(); @@ -222,8 +219,7 @@ impl<'db, S: StorageContext<'db>> Restorer { // we get the chunk id and add the hash to restorer state let chunk_id = traversal_instruction_as_string(node_traversal_instruction); new_chunk_ids.push(chunk_id.clone()); - self.chunk_id_to_root_hash - .insert(chunk_id.clone(), hash.clone()); + self.chunk_id_to_root_hash.insert(chunk_id.clone(), *hash); // TODO: handle unwrap self.parent_keys .insert(chunk_id, parent_key.unwrap().to_owned()); @@ -282,7 +278,7 @@ impl<'db, S: StorageContext<'db>> Restorer { let updated_key = chunk_tree.key(); let updated_sum = chunk_tree.sum(); - if let Some(Link::Reference { key, sum, .. }) = parent.link_mut(is_left.clone()) { + if let Some(Link::Reference { key, sum, .. }) = parent.link_mut(*is_left) { *key = updated_key.to_vec(); *sum = updated_sum; } @@ -343,13 +339,13 @@ impl<'db, S: StorageContext<'db>> Restorer { .put(walker.tree().key(), &bytes, None, None) .map_err(CostsError)?; - return Ok((left_height, right_height)); + Ok((left_height, right_height)) } let mut batch = self.merk.storage.new_batch(); // TODO: deal with unwrap let mut tree = self.merk.tree.take().unwrap(); - let mut walker = RefWalker::new(&mut tree, self.merk.source()); + let walker = RefWalker::new(&mut tree, self.merk.source()); rewrite_child_heights(walker, &mut batch)?; @@ -379,7 +375,7 @@ impl<'db, S: StorageContext<'db>> Restorer { /// processing all chunks. pub fn finalize(mut self) -> Result, Error> { // ensure all chunks have been processed - if self.chunk_id_to_root_hash.len() != 0 || self.parent_keys.len() != 0 { + if !self.chunk_id_to_root_hash.is_empty() || !self.parent_keys.is_empty() { return Err(Error::ChunkRestoringError( ChunkError::RestorationNotComplete, )); @@ -397,7 +393,7 @@ impl<'db, S: StorageContext<'db>> Restorer { .load_base_root(None::<&fn(&[u8]) -> Option>); } - if self.merk.verify().0.len() != 0 { + if !self.merk.verify().0.is_empty() { return Err(Error::ChunkRestoringError(ChunkError::InternalError( "restored tree invalid", ))); @@ -411,7 +407,7 @@ impl<'db, S: StorageContext<'db>> Restorer { fn verify_height(&self) -> Result<(), Error> { let tree = self.merk.tree.take(); let height_verification_result = if let Some(tree) = &tree { - self.verify_tree_height(&tree, tree.height()) + self.verify_tree_height(tree, tree.height()) } else { Ok(()) }; @@ -450,7 +446,7 @@ impl<'db, S: StorageContext<'db>> Restorer { if left_tree.is_none() { let left_tree = TreeNode::get( &self.merk.storage, - link.key().to_vec(), + link.key(), None::<&fn(&[u8]) -> Option>, ) .unwrap()? @@ -466,7 +462,7 @@ impl<'db, S: StorageContext<'db>> Restorer { if right_tree.is_none() { let right_tree = TreeNode::get( &self.merk.storage, - link.key().to_vec(), + link.key(), None::<&fn(&[u8]) -> Option>, ) .unwrap()? @@ -494,18 +490,13 @@ mod tests { use super::*; use crate::{ - execute_proof, merk::chunks::ChunkProducer, - proofs::{ - chunk::{ - chunk::tests::traverse_get_node_hash, chunk_op::ChunkOp::Chunk, - error::ChunkError::InvalidChunkProof, - }, - Query, + proofs::chunk::{ + chunk::tests::traverse_get_node_hash, error::ChunkError::InvalidChunkProof, }, test_utils::{make_batch_seq, TempMerk}, Error::ChunkRestoringError, - KVIterator, Merk, PanicSource, + Merk, PanicSource, }; #[test] @@ -604,7 +595,7 @@ mod tests { let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -682,7 +673,7 @@ mod tests { // apply second chunk let chunk_process_result = restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, LEFT]), chunk); - assert_eq!(chunk_process_result.is_err(), true); + assert!(chunk_process_result.is_err()); assert!(matches!( chunk_process_result, Err(Error::ChunkRestoringError(ChunkError::UnexpectedChunk)) @@ -693,7 +684,7 @@ mod tests { let (chunk, _) = chunk_producer.chunk_with_index(4).unwrap(); let chunk_process_result = restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, RIGHT]), chunk); - assert_eq!(chunk_process_result.is_err(), true); + assert!(chunk_process_result.is_err()); assert!(matches!( chunk_process_result, Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( @@ -735,7 +726,7 @@ mod tests { assert_eq!(restorer.chunk_id_to_root_hash.get("01"), None); // finalize merk - let mut restored_merk = restorer.finalize().expect("should finalized successfully"); + let restored_merk = restorer.finalize().expect("should finalized successfully"); assert_eq!( restored_merk.root_hash().unwrap(), @@ -802,7 +793,7 @@ mod tests { // build the restoration merk let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -873,7 +864,7 @@ mod tests { let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -938,7 +929,7 @@ mod tests { let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -1010,7 +1001,7 @@ mod tests { let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -1096,7 +1087,7 @@ mod tests { // build the restoration merk let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -1124,7 +1115,7 @@ mod tests { let mut chunk_id_opt = Some("".to_string()); while let Some(chunk_id) = chunk_id_opt { let multi_chunk = chunk_producer - .multi_chunk_with_limit(chunk_id.as_str(), limit.clone()) + .multi_chunk_with_limit(chunk_id.as_str(), limit) .expect("should get chunk"); restorer .process_multi_chunk(multi_chunk.chunk) @@ -1176,7 +1167,7 @@ mod tests { let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -1221,7 +1212,7 @@ mod tests { // drop the restorer and the restoration merk drop(restorer); // open the restoration merk again and build a restorer from it - let mut restoration_merk = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -1238,7 +1229,7 @@ mod tests { // recover state let recovery_attempt = restorer.attempt_state_recovery(); - assert_eq!(recovery_attempt.is_ok(), true); + assert!(recovery_attempt.is_ok()); assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); assert_eq!(restorer.parent_keys.len(), 4); diff --git a/merk/src/proofs/chunk.rs b/merk/src/proofs/chunk.rs index 2ea820ef..22334688 100644 --- a/merk/src/proofs/chunk.rs +++ b/merk/src/proofs/chunk.rs @@ -28,29 +28,6 @@ //! Chunk proofs -#[cfg(feature = "full")] -use grovedb_costs::{ - cost_return_on_error, cost_return_on_error_no_add, CostResult, CostsExt, OperationCost, -}; -#[cfg(feature = "full")] -use grovedb_storage::RawIterator; -#[cfg(feature = "full")] -use { - super::tree::{execute, Tree as ProofTree}, - crate::tree::CryptoHash, - crate::tree::TreeNode, -}; - -#[cfg(feature = "full")] -use super::{Node, Op}; -#[cfg(feature = "full")] -use crate::{ - error::Error, - tree::{Fetch, RefWalker}, - Error::EdError, - TreeFeatureType::BasicMerkNode, -}; - mod binary_range; #[cfg(feature = "full")] pub mod chunk; diff --git a/merk/src/proofs/chunk/binary_range.rs b/merk/src/proofs/chunk/binary_range.rs index 01a20531..2acaa728 100644 --- a/merk/src/proofs/chunk/binary_range.rs +++ b/merk/src/proofs/chunk/binary_range.rs @@ -55,7 +55,7 @@ impl BinaryRange { )); } - return Ok(Self { start, end }); + Ok(Self { start, end }) } /// Returns the len of the current range @@ -103,7 +103,7 @@ impl BinaryRange { let half_size = self.len() / 2; let second_half_start = self.start + half_size; - return Ok(if left { + Ok(if left { Self { start: self.start, end: second_half_start - 1, @@ -113,7 +113,7 @@ impl BinaryRange { start: second_half_start, end: self.end, } - }); + }) } /// Returns a new range that increments the start value @@ -144,36 +144,36 @@ mod test { #[test] fn cannot_create_invalid_range() { let invalid_range = BinaryRange::new(5, 3); - assert_eq!(invalid_range.is_err(), true); + assert!(invalid_range.is_err()); } #[test] fn can_get_range_len() { let range = BinaryRange::new(2, 5).expect("should create range"); assert_eq!(range.len(), 4); - assert_eq!(range.odd(), false); + assert!(!range.odd()); let range = BinaryRange::new(2, 2).expect("should create range"); assert_eq!(range.len(), 1); - assert_eq!(range.odd(), true); + assert!(range.odd()); } #[test] fn can_determine_correct_half() { let range = BinaryRange::new(3, 7).expect("should create range"); assert_eq!(range.len(), 5); - assert_eq!(range.odd(), true); + assert!(range.odd()); // cannot determine half for value outside a range - assert_eq!(range.which_half(1).is_none(), true); - assert_eq!(range.which_half(7).is_none(), true); + assert!(range.which_half(1).is_none()); + assert!(range.which_half(7).is_none()); // cannot determine half when range is odd - assert_eq!(range.which_half(3).is_none(), true); + assert!(range.which_half(3).is_none()); let range = BinaryRange::new(3, 6).expect("should create range"); assert_eq!(range.len(), 4); - assert_eq!(range.odd(), false); + assert!(!range.odd()); assert_eq!(range.which_half(3), Some(LEFT)); assert_eq!(range.which_half(4), Some(LEFT)); @@ -207,19 +207,19 @@ mod test { // should not be allowed to advance the range anymore let advance_result = range.advance_range_start(); - assert_eq!(advance_result.is_err(), true); + assert!(advance_result.is_err()); } #[test] fn can_break_range_into_halves() { let range = BinaryRange::new(2, 10).expect("should create range"); assert_eq!(range.len(), 9); - assert_eq!(range.odd(), true); - assert_eq!(range.get_half(LEFT).is_err(), true); + assert!(range.odd()); + assert!(range.get_half(LEFT).is_err()); let range = BinaryRange::new(2, 11).expect("should create range"); assert_eq!(range.len(), 10); - assert_eq!(range.odd(), false); + assert!(!range.odd()); let left_range = range.get_half(LEFT).expect("should get sub range"); assert_eq!(left_range.start, 2); @@ -230,7 +230,7 @@ mod test { assert_eq!(right_range.end, 11); // right_range is false, advance to make even - let (right_range, prev) = right_range.advance_range_start().expect("should advance"); + let (right_range, _prev) = right_range.advance_range_start().expect("should advance"); let right_left_range = right_range.get_half(LEFT).expect("should get sub range"); assert_eq!(right_left_range.len(), 2); assert_eq!(right_left_range.start, 8); diff --git a/merk/src/proofs/chunk/chunk.rs b/merk/src/proofs/chunk/chunk.rs index 5258dffa..95d888ec 100644 --- a/merk/src/proofs/chunk/chunk.rs +++ b/merk/src/proofs/chunk/chunk.rs @@ -103,7 +103,7 @@ where depth: usize, ) -> Result, Error> { // base case - if instructions.len() == 0 { + if instructions.is_empty() { // we are at the desired node return self.create_chunk(depth); } @@ -213,11 +213,10 @@ pub mod tests { chunk::chunk::{verify_height_proof, LEFT, RIGHT}, tree::execute, Node, Op, - Op::Parent, }, - test_utils::{make_tree_seq, make_tree_seq_with_start_key}, + test_utils::make_tree_seq_with_start_key, tree::{kv::ValueDefinedCostType, RefWalker, TreeNode}, - CryptoHash, PanicSource, TreeFeatureType, + PanicSource, TreeFeatureType, }; fn build_tree_10_nodes() -> TreeNode { @@ -234,39 +233,39 @@ pub mod tests { /// Traverses a tree to a certain node and returns the node hash of that /// node pub fn traverse_get_node_hash( - mut walker: &mut RefWalker, + walker: &mut RefWalker, traverse_instructions: &[bool], ) -> Node { - return traverse_and_apply(walker, traverse_instructions, |walker| { + traverse_and_apply(walker, traverse_instructions, |walker| { walker.to_hash_node().unwrap() - }); + }) } /// Traverses a tree to a certain node and returns the kv_feature_type of /// that node pub fn traverse_get_kv_feature_type( - mut walker: &mut RefWalker, + walker: &mut RefWalker, traverse_instructions: &[bool], ) -> Node { - return traverse_and_apply(walker, traverse_instructions, |walker| { + traverse_and_apply(walker, traverse_instructions, |walker| { walker.to_kv_value_hash_feature_type_node() - }); + }) } /// Traverses a tree to a certain node and returns the kv_hash of /// that node pub fn traverse_get_kv_hash( - mut walker: &mut RefWalker, + walker: &mut RefWalker, traverse_instructions: &[bool], ) -> Node { - return traverse_and_apply(walker, traverse_instructions, |walker| { + traverse_and_apply(walker, traverse_instructions, |walker| { walker.to_kvhash_node() - }); + }) } /// Traverses a tree to a certain node and returns the result of applying /// some arbitrary function pub fn traverse_and_apply( - mut walker: &mut RefWalker, + walker: &mut RefWalker, traverse_instructions: &[bool], apply_fn: T, ) -> Node @@ -285,7 +284,7 @@ pub mod tests { .unwrap() .unwrap() .unwrap(); - return traverse_and_apply(&mut child, &traverse_instructions[1..], apply_fn); + traverse_and_apply(&mut child, &traverse_instructions[1..], apply_fn) } #[test] diff --git a/merk/src/proofs/chunk/chunk_op.rs b/merk/src/proofs/chunk/chunk_op.rs index 9402d3d5..55f5751b 100644 --- a/merk/src/proofs/chunk/chunk_op.rs +++ b/merk/src/proofs/chunk/chunk_op.rs @@ -31,7 +31,7 @@ use std::io::{Read, Write}; use ed::{Decode, Encode}; use integer_encoding::{VarInt, VarIntReader}; -use crate::proofs::{chunk::chunk_op::ChunkOp::Chunk, Op}; +use crate::proofs::Op; /// Represents the chunk generated from a given starting chunk id #[derive(PartialEq, Debug)] diff --git a/merk/src/proofs/chunk/util.rs b/merk/src/proofs/chunk/util.rs index 986b24c7..530e00e7 100644 --- a/merk/src/proofs/chunk/util.rs +++ b/merk/src/proofs/chunk/util.rs @@ -65,7 +65,7 @@ fn chunk_height_per_layer(height: usize) -> Vec { // reduce the three_count by 1 // so the remainder becomes 3 + 1 // which is equivalent to 2 + 2 - three_count = three_count - 1; + three_count -= 1; two_count += 2; } 2 => { @@ -100,8 +100,8 @@ pub fn chunk_layer(height: usize, chunk_id: usize) -> Result { // and remaining depth points to a chunk debug_assert!(remaining_depth > layer_heights[layer - 1]); - remaining_depth = remaining_depth - layer_heights[layer - 1]; - layer = layer + 1; + remaining_depth -= layer_heights[layer - 1]; + layer += 1; } Ok(layer - 1) @@ -162,7 +162,7 @@ fn number_of_chunks_internal(layer_heights: Vec) -> usize { chunk_counts_per_layer.push(current_layer_chunk_count); } - return chunk_counts_per_layer.into_iter().sum(); + chunk_counts_per_layer.into_iter().sum() } /// Calculates the maximum number of exit nodes for a tree of height h. @@ -193,7 +193,7 @@ pub fn generate_traversal_instruction(height: usize, chunk_id: usize) -> Result< // from the initial chunk (1) we have an even number of // exit nodes, and they have even numbers of exit nodes ... // so total_chunk_count = 1 + some_even_number = odd - debug_assert_eq!(chunk_range.odd(), true); + debug_assert!(chunk_range.odd()); // bisect and reduce the chunk range until we get to the desired chunk // we keep track of every left right decision we make @@ -223,7 +223,7 @@ pub fn generate_traversal_instruction(height: usize, chunk_id: usize) -> Result< // chunk range len is exactly 1 // this must be the desired chunk id // return instructions that got us here - return Ok(instructions); + Ok(instructions) } /// Determine the chunk id given the traversal instruction and the max height of @@ -296,12 +296,12 @@ pub fn chunk_id_from_traversal_instruction( if chunk_count % 2 != 0 { // remove the current chunk from the chunk count - chunk_count = chunk_count - 1; + chunk_count -= 1; } - chunk_count = chunk_count / exit_node_count(layer_height); + chunk_count /= exit_node_count(layer_height); - current_chunk_id = current_chunk_id + offset_multiplier as usize * chunk_count + 1; + current_chunk_id = current_chunk_id + offset_multiplier * chunk_count + 1; start_index = end_index; } @@ -324,7 +324,7 @@ pub fn chunk_id_from_traversal_instruction_with_recovery( height, ); } - return chunk_id_result; + chunk_id_result } /// Generate instruction for traversing to a given chunk in a binary tree, @@ -369,7 +369,6 @@ pub fn write_to_vec(dest: &mut W, value: &[u8]) -> Result<(), Error> { #[cfg(test)] mod test { - use byteorder::LE; use super::*; use crate::proofs::chunk::chunk::{LEFT, RIGHT}; @@ -534,8 +533,8 @@ mod test { assert_eq!(instruction, &[RIGHT, RIGHT]); // out of bound tests - assert_eq!(generate_traversal_instruction(4, 6).is_err(), true); - assert_eq!(generate_traversal_instruction(4, 0).is_err(), true); + assert!(generate_traversal_instruction(4, 6).is_err()); + assert!(generate_traversal_instruction(4, 0).is_err()); } #[test] @@ -586,7 +585,7 @@ mod test { string_as_traversal_instruction("001").unwrap(), vec![RIGHT, RIGHT, LEFT] ); - assert_eq!(string_as_traversal_instruction("002").is_err(), true); + assert!(string_as_traversal_instruction("002").is_err()); assert_eq!( string_as_traversal_instruction("").unwrap(), Vec::::new() @@ -675,10 +674,7 @@ mod test { // function with recovery we expect this to backtrack to the last chunk // boundary e.g. [left] should backtrack to [] // [left, left, right, left] should backtrack to [left, left, right] - assert_eq!( - chunk_id_from_traversal_instruction(&[LEFT], 5).is_err(), - true - ); + assert!(chunk_id_from_traversal_instruction(&[LEFT], 5).is_err()); assert_eq!( chunk_id_from_traversal_instruction_with_recovery(&[LEFT], 5).unwrap(), 1 diff --git a/merk/src/proofs/tree.rs b/merk/src/proofs/tree.rs index b91bd68f..b3db0d77 100644 --- a/merk/src/proofs/tree.rs +++ b/merk/src/proofs/tree.rs @@ -66,7 +66,7 @@ impl Child { Node::KV(key, _) | Node::KVValueHash(key, ..) => (key.as_slice(), None), Node::KVValueHashFeatureType(key, _, _, feature_type) => { let sum_value = match feature_type { - SummedMerkNode(sum) => Some(sum.clone()), + SummedMerkNode(sum) => Some(*sum), _ => None, }; (key.as_slice(), sum_value) @@ -358,8 +358,7 @@ impl<'a> Iterator for LayerIter<'a> { type Item = &'a Tree; fn next(&mut self) -> Option { - while !self.stack.is_empty() { - let (item, item_depth) = self.stack.pop().expect("confirmed not None"); + while let Some((item, item_depth)) = self.stack.pop() { if item_depth != self.depth { if let Some(right_child) = item.child(false) { self.stack.push((&right_child.tree, item_depth + 1)) @@ -372,7 +371,7 @@ impl<'a> Iterator for LayerIter<'a> { } } - return None; + None } } @@ -665,7 +664,7 @@ mod test { assert_eq!( left_link, Link::Reference { - hash: tree.left.as_ref().map(|node| node.hash).clone().unwrap(), + hash: tree.left.as_ref().map(|node| node.hash).unwrap(), sum: None, child_heights: (0, 0), key: vec![1] @@ -675,7 +674,7 @@ mod test { assert_eq!( right_link, Link::Reference { - hash: tree.right.as_ref().map(|node| node.hash).clone().unwrap(), + hash: tree.right.as_ref().map(|node| node.hash).unwrap(), sum: None, child_heights: (0, 0), key: vec![3] @@ -714,7 +713,7 @@ mod test { assert_eq!( left_link, Link::Reference { - hash: tree.left.as_ref().map(|node| node.hash).clone().unwrap(), + hash: tree.left.as_ref().map(|node| node.hash).unwrap(), sum: Some(3), child_heights: (0, 0), key: vec![1] @@ -724,7 +723,7 @@ mod test { assert_eq!( right_link, Link::Reference { - hash: tree.right.as_ref().map(|node| node.hash).clone().unwrap(), + hash: tree.right.as_ref().map(|node| node.hash).unwrap(), sum: Some(1), child_heights: (0, 0), key: vec![3]