Skip to content

Commit

Permalink
[ENH] Add uint blockfile key/val (#1854)
Browse files Browse the repository at this point in the history
## Description of changes

*Summarize the changes made by this PR.*
 - Improvements & Bug fixes
	 - Lets the blockfile use uint vals and keys.
	 - Some name cleanup / unification
 - New functionality
	 - /

## Test plan
*How are these changes tested?*
Basic sanity tests.
- [x] Tests pass locally with `pytest` for python, `yarn test` for js,
`cargo test` for rust

## Documentation Changes
None
  • Loading branch information
HammadB authored Mar 14, 2024
1 parent b31a07c commit 21e5fcc
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 21 deletions.
22 changes: 20 additions & 2 deletions rust/worker/src/blockstore/arrow_blockfile/block/delta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ impl BlockDeltaInner {
Value::Int32ArrayValue(arr) => acc + arr.len(),
Value::StringValue(s) => acc + s.len(),
Value::RoaringBitmapValue(bitmap) => acc + bitmap.serialized_size(),
Value::UintValue(_) => acc + 1,
_ => unimplemented!("Value type not implemented"),
})
}
Expand Down Expand Up @@ -238,14 +239,15 @@ impl BlockDeltaInner {
ValueType::Int32Array | ValueType::String | ValueType::RoaringBitmap => {
bit_util::round_upto_multiple_of_64((item_count + 1) * 4)
}
ValueType::Uint => 0,
_ => unimplemented!("Value type not implemented"),
}
}

fn offset_size_for_key_type(&self, item_count: usize, key_type: KeyType) -> usize {
match key_type {
KeyType::String => bit_util::round_upto_multiple_of_64((item_count + 1) * 4),
KeyType::Float => 0,
KeyType::Float | KeyType::Uint => 0,
_ => unimplemented!("Key type not implemented"),
}
}
Expand Down Expand Up @@ -429,7 +431,23 @@ mod test {
let size = delta.get_size();
let block_data = BlockData::try_from(&delta).unwrap();
assert_eq!(size, block_data.get_size());
}

let (split_key, delta) = delta.split(&block_provider);
#[test]
fn test_sizing_uint_key_val() {
let block_provider = ArrowBlockProvider::new();
let block = block_provider.create_block(KeyType::Uint, ValueType::Uint);
let delta = BlockDelta::from(block.clone());

let n = 2000;
for i in 0..n {
let key = BlockfileKey::new("prefix".to_string(), Key::Uint(i as u32));
let value = Value::UintValue(i as u32);
delta.add(key, value);
}

let size = delta.get_size();
let block_data = BlockData::try_from(&delta).unwrap();
assert_eq!(size, block_data.get_size());
}
}
6 changes: 5 additions & 1 deletion rust/worker/src/blockstore/arrow_blockfile/block/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::types::Block;
use crate::blockstore::types::{BlockfileKey, Key, KeyType, Value, ValueType};
use arrow::array::{Array, BooleanArray, Int32Array, ListArray, StringArray};
use arrow::array::{Array, BooleanArray, Int32Array, ListArray, StringArray, UInt32Array};

/// An iterator over the contents of a block.
/// This is a simple wrapper around the Arrow array data that is stored in the block.
Expand Down Expand Up @@ -77,6 +77,10 @@ impl Iterator for BlockIterator {
Some(key) => Key::Bool(key.value(self.index)),
None => return None,
},
KeyType::Uint => match key.as_any().downcast_ref::<UInt32Array>() {
Some(key) => Key::Uint(key.value(self.index) as u32),
None => return None,
},
};

let value = match self.value_type {
Expand Down
46 changes: 45 additions & 1 deletion rust/worker/src/blockstore/arrow_blockfile/block/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::blockstore::types::{BlockfileKey, Key, KeyType, Value, ValueType};
use crate::errors::{ChromaError, ErrorCodes};
use arrow::array::{
BinaryArray, BinaryBuilder, BooleanArray, BooleanBuilder, Float32Array, Float32Builder,
GenericByteBuilder,
GenericByteBuilder, UInt32Array, UInt32Builder,
};
use arrow::{
array::{Array, Int32Array, Int32Builder, ListArray, ListBuilder, StringArray, StringBuilder},
Expand Down Expand Up @@ -125,6 +125,11 @@ impl Block {
.unwrap()
.value(i)
}
Key::Uint(inner_key) => {
*inner_key
== key.as_any().downcast_ref::<UInt32Array>().unwrap().value(i)
as u32
}
};
if key_matches {
match self.get_value_type() {
Expand Down Expand Up @@ -166,6 +171,15 @@ impl Block {
Err(_) => return None,
}
}
ValueType::Uint => {
return Some(Value::UintValue(
value
.as_any()
.downcast_ref::<UInt32Array>()
.unwrap()
.value(i),
))
}
// TODO: Add support for other types
_ => unimplemented!(),
}
Expand Down Expand Up @@ -285,12 +299,14 @@ enum KeyBuilder {
StringBuilder(StringBuilder),
FloatBuilder(Float32Builder),
BoolBuilder(BooleanBuilder),
UintBuilder(UInt32Builder),
}

enum ValueBuilder {
Int32ArrayValueBuilder(ListBuilder<Int32Builder>),
StringValueBuilder(StringBuilder),
RoaringBitmapBuilder(BinaryBuilder),
UintValueBuilder(UInt32Builder),
}

/// BlockDataBuilder is used to build a block. It is used to add data to a block and then build the BlockData once all data has been added.
Expand Down Expand Up @@ -367,6 +383,9 @@ impl BlockDataBuilder {
KeyType::Bool => {
KeyBuilder::BoolBuilder(BooleanBuilder::with_capacity(options.item_count))
}
KeyType::Uint => {
KeyBuilder::UintBuilder(UInt32Builder::with_capacity(options.item_count))
}
};
let value_builder = match value_type {
ValueType::Int32Array => {
Expand All @@ -379,6 +398,9 @@ impl BlockDataBuilder {
options.item_count,
options.total_value_capacity,
)),
ValueType::Uint => {
ValueBuilder::UintValueBuilder(UInt32Builder::with_capacity(options.item_count))
}
ValueType::RoaringBitmap => ValueBuilder::RoaringBitmapBuilder(
BinaryBuilder::with_capacity(options.item_count, options.total_value_capacity),
),
Expand Down Expand Up @@ -428,6 +450,12 @@ impl BlockDataBuilder {
}
_ => unreachable!("Invalid key type for block"),
},
KeyBuilder::UintBuilder(ref mut builder) => match key.key {
Key::Uint(key) => {
builder.append_value(key);
}
_ => unreachable!("Invalid key type for block"),
},
}

match self.value_builder {
Expand All @@ -443,6 +471,12 @@ impl BlockDataBuilder {
}
_ => unreachable!("Invalid value type for block"),
},
ValueBuilder::UintValueBuilder(ref mut builder) => match value {
Value::UintValue(uint) => {
builder.append_value(uint);
}
_ => unreachable!("Invalid value type for block"),
},
ValueBuilder::RoaringBitmapBuilder(ref mut builder) => match value {
Value::RoaringBitmapValue(bitmap) => {
let mut bytes = Vec::with_capacity(bitmap.serialized_size());
Expand Down Expand Up @@ -481,6 +515,11 @@ impl BlockDataBuilder {
let arr = builder.finish();
(&arr as &dyn Array).slice(0, arr.len())
}
KeyBuilder::UintBuilder(ref mut builder) => {
key_field = Field::new("key", DataType::UInt32, true);
let arr = builder.finish();
(&arr as &dyn Array).slice(0, arr.len())
}
};

let value_field;
Expand All @@ -499,6 +538,11 @@ impl BlockDataBuilder {
let arr = builder.finish();
(&arr as &dyn Array).slice(0, arr.len())
}
ValueBuilder::UintValueBuilder(ref mut builder) => {
value_field = Field::new("value", DataType::UInt32, true);
let arr = builder.finish();
(&arr as &dyn Array).slice(0, arr.len())
}
ValueBuilder::RoaringBitmapBuilder(ref mut builder) => {
value_field = Field::new("value", DataType::Binary, true);
let arr = builder.finish();
Expand Down
39 changes: 37 additions & 2 deletions rust/worker/src/blockstore/arrow_blockfile/blockfile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ impl Blockfile for ArrowBlockfile {
return Err(Box::new(BlockfileError::InvalidKeyType));
}
}
Key::Uint(_) => {
if self.key_type != KeyType::Uint {
return Err(Box::new(BlockfileError::InvalidKeyType));
}
}
}

// Validate value type
Expand All @@ -175,8 +180,13 @@ impl Blockfile for ArrowBlockfile {
return Err(Box::new(BlockfileError::InvalidValueType));
}
}
Value::Int32Value(_) => {
if self.value_type != ValueType::Int32 {
Value::IntValue(_) => {
if self.value_type != ValueType::Int {
return Err(Box::new(BlockfileError::InvalidValueType));
}
}
Value::UintValue(_) => {
if self.value_type != ValueType::Uint {
return Err(Box::new(BlockfileError::InvalidValueType));
}
}
Expand Down Expand Up @@ -570,4 +580,29 @@ mod tests {
}
}
}

#[test]
fn test_uint_key_val() {
let block_provider = ArrowBlockProvider::new();
let mut blockfile = ArrowBlockfile::new(KeyType::Uint, ValueType::Uint, block_provider);

blockfile.begin_transaction().unwrap();
let n = 2000;
for i in 0..n {
let key = BlockfileKey::new("key".to_string(), Key::Uint(i as u32));
blockfile.set(key, Value::UintValue(i as u32)).unwrap();
}
blockfile.commit_transaction().unwrap();

for i in 0..n {
let key = BlockfileKey::new("key".to_string(), Key::Uint(i as u32));
let res = blockfile.get(key).unwrap();
match res {
Value::UintValue(val) => {
assert_eq!(val, i as u32);
}
_ => panic!("Unexpected value type"),
}
}
}
}
29 changes: 21 additions & 8 deletions rust/worker/src/blockstore/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ impl Key {
Key::String(s) => s.len(),
Key::Float(_) => 4,
Key::Bool(_) => 1,
Key::Uint(_) => 4,
}
}
}
Expand All @@ -69,6 +70,7 @@ impl From<&BlockfileKey> for KeyType {
Key::String(_) => KeyType::String,
Key::Float(_) => KeyType::Float,
Key::Bool(_) => KeyType::Bool,
Key::Uint(_) => KeyType::Uint,
}
}
}
Expand All @@ -78,13 +80,15 @@ pub(crate) enum Key {
String(String),
Float(f32),
Bool(bool),
Uint(u32),
}

#[derive(Debug, Clone, Copy, PartialEq)]
pub(crate) enum KeyType {
String,
Float,
Bool,
Uint,
}

impl Display for Key {
Expand All @@ -93,6 +97,7 @@ impl Display for Key {
Key::String(s) => write!(f, "{}", s),
Key::Float(fl) => write!(f, "{}", fl),
Key::Bool(b) => write!(f, "{}", b),
Key::Uint(u) => write!(f, "{}", u),
}
}
}
Expand Down Expand Up @@ -146,15 +151,19 @@ impl Ord for BlockfileKey {
match self.key {
Key::String(ref s1) => match &other.key {
Key::String(s2) => s1.cmp(s2),
_ => panic!("Cannot compare string to float or bool"),
_ => panic!("Cannot compare string to float, bool, or uint"),
},
Key::Float(f1) => match &other.key {
Key::Float(f2) => f1.partial_cmp(f2).unwrap(),
_ => panic!("Cannot compare float to string or bool"),
_ => panic!("Cannot compare float to string, bool, or uint"),
},
Key::Bool(b1) => match &other.key {
Key::Bool(b2) => b1.cmp(b2),
_ => panic!("Cannot compare bool to string or float"),
_ => panic!("Cannot compare bool to string, float, or uint"),
},
Key::Uint(u1) => match &other.key {
Key::Uint(u2) => u1.cmp(u2),
_ => panic!("Cannot compare uint to string, float, or bool"),
},
}
} else {
Expand All @@ -170,7 +179,8 @@ pub(crate) enum Value {
Int32ArrayValue(Int32Array),
PositionalPostingListValue(PositionalPostingList),
StringValue(String),
Int32Value(i32),
IntValue(i32),
UintValue(u32),
RoaringBitmapValue(RoaringBitmap),
}

Expand Down Expand Up @@ -199,7 +209,8 @@ impl Clone for Value {
}
Value::StringValue(s) => Value::StringValue(s.clone()),
Value::RoaringBitmapValue(bitmap) => Value::RoaringBitmapValue(bitmap.clone()),
Value::Int32Value(i) => Value::Int32Value(*i),
Value::IntValue(i) => Value::IntValue(*i),
Value::UintValue(u) => Value::UintValue(*u),
}
}
}
Expand All @@ -213,7 +224,7 @@ impl Value {
}
Value::StringValue(s) => s.len(),
Value::RoaringBitmapValue(bitmap) => bitmap.serialized_size(),
Value::Int32Value(_) => 4,
Value::IntValue(_) | Value::UintValue(_) => 4,
}
}
}
Expand All @@ -225,7 +236,8 @@ impl From<&Value> for ValueType {
Value::PositionalPostingListValue(_) => ValueType::PositionalPostingList,
Value::RoaringBitmapValue(_) => ValueType::RoaringBitmap,
Value::StringValue(_) => ValueType::String,
Value::Int32Value(_) => ValueType::Int32,
Value::IntValue(_) => ValueType::Int,
Value::UintValue(_) => ValueType::Uint,
}
}
}
Expand All @@ -236,7 +248,8 @@ pub(crate) enum ValueType {
PositionalPostingList,
RoaringBitmap,
String,
Int32,
Int,
Uint,
}

pub(crate) trait Blockfile: BlockfileClone {
Expand Down
Loading

0 comments on commit 21e5fcc

Please sign in to comment.