diff --git a/rust/blockstore/src/arrow/block/types.rs b/rust/blockstore/src/arrow/block/types.rs index 34a2f1ec453..6764591c86f 100644 --- a/rust/blockstore/src/arrow/block/types.rs +++ b/rust/blockstore/src/arrow/block/types.rs @@ -346,26 +346,6 @@ impl Block { } } - pub fn get_all_data<'me, K: ArrowReadableKey<'me>, V: ArrowReadableValue<'me>>( - &'me self, - ) -> Vec<(&'me str, K, V)> { - let prefix_arr = self - .data - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - let mut result = Vec::new(); - for i in 0..self.data.num_rows() { - result.push(( - prefix_arr.value(i), - K::get(self.data.column(1), i), - V::get(self.data.column(2), i), - )); - } - result - } - /// Get all the values for a given prefix & key range in the block /// ### Panics /// - If the underlying data types are not the same as the types specified in the function signature diff --git a/rust/blockstore/src/arrow/blockfile.rs b/rust/blockstore/src/arrow/blockfile.rs index c60a1e5eb7d..762d62a6fde 100644 --- a/rust/blockstore/src/arrow/blockfile.rs +++ b/rust/blockstore/src/arrow/blockfile.rs @@ -682,25 +682,6 @@ impl<'me, K: ArrowReadableKey<'me> + Into, V: ArrowReadableValue<'me true } - - pub async fn get_all_data(&'me self) -> Vec<(&'me str, K, V)> { - let block_ids = self.root.sparse_index.get_all_block_ids(); - let mut result = vec![]; - for block_id in block_ids { - let block = match self.get_block(block_id).await { - Ok(Some(block)) => block, - Ok(None) => { - continue; - } - Err(_) => { - continue; - } - }; - - result.extend(block.get_all_data()); - } - result - } } #[cfg(test)] diff --git a/rust/blockstore/src/arrow/sparse_index.rs b/rust/blockstore/src/arrow/sparse_index.rs index 3a7cac2c9ed..3d4124ab447 100644 --- a/rust/blockstore/src/arrow/sparse_index.rs +++ b/rust/blockstore/src/arrow/sparse_index.rs @@ -321,10 +321,6 @@ impl SparseIndexReader { get_target_block(search_key, forward).id } - pub(super) fn get_all_block_ids(&self) -> Vec { - self.data.forward.values().map(|v| v.id).collect() - } - /// Get all the block ids that contain keys in the given input search keys pub(super) fn get_all_target_block_ids(&self, mut search_keys: Vec) -> Vec { // Sort so that we can search in one iteration. diff --git a/rust/blockstore/src/types/reader.rs b/rust/blockstore/src/types/reader.rs index 7b8e70c2bf0..43311d979d4 100644 --- a/rust/blockstore/src/types/reader.rs +++ b/rust/blockstore/src/types/reader.rs @@ -131,11 +131,4 @@ impl< } } } - - pub async fn get_all_data(&'referred_data self) -> Vec<(&'referred_data str, K, V)> { - match self { - BlockfileReader::MemoryBlockfileReader(_) => todo!(), - BlockfileReader::ArrowBlockfileReader(reader) => reader.get_all_data().await, - } - } } diff --git a/rust/index/src/spann/types.rs b/rust/index/src/spann/types.rs index ed5a529e274..c78c64c36c0 100644 --- a/rust/index/src/spann/types.rs +++ b/rust/index/src/spann/types.rs @@ -33,6 +33,8 @@ pub enum SpannIndexWriterConstructionError { BlockfileReaderConstructionError, #[error("Blockfile writer construction error")] BlockfileWriterConstructionError, + #[error("Error loading version data from blockfile")] + BlockfileVersionDataLoadError, } impl ChromaError for SpannIndexWriterConstructionError { @@ -41,6 +43,7 @@ impl ChromaError for SpannIndexWriterConstructionError { Self::HnswIndexConstructionError => ErrorCodes::Internal, Self::BlockfileReaderConstructionError => ErrorCodes::Internal, Self::BlockfileWriterConstructionError => ErrorCodes::Internal, + Self::BlockfileVersionDataLoadError => ErrorCodes::Internal, } } } @@ -114,8 +117,11 @@ impl SpannIndexWriter { } }; // Load data using the reader. - let versions_data = reader.get_all_data().await; - versions_data.iter().for_each(|(_, key, value)| { + let versions_data = reader + .get_range(.., ..) + .await + .map_err(|_| SpannIndexWriterConstructionError::BlockfileVersionDataLoadError)?; + versions_data.iter().for_each(|(key, value)| { versions_map.insert(*key, *value); }); Ok(versions_map)