Skip to content

Commit

Permalink
Add feature for SSTable
Browse files Browse the repository at this point in the history
  • Loading branch information
summerxwu committed Aug 21, 2023
1 parent 6ca3470 commit fdcde61
Show file tree
Hide file tree
Showing 12 changed files with 160 additions and 57 deletions.
8 changes: 7 additions & 1 deletion src/blocks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,15 @@ impl Blocks {
num_of_elements,
}
}
pub fn largest_key(&self) -> &[u8]{
todo!()
}
pub fn smallest_key(&self) -> &[u8]{
todo!()
}
}

mod block_builder;
pub mod record_iterator;
pub mod iterator;
#[cfg(test)]
mod tests;
9 changes: 8 additions & 1 deletion src/blocks/block_builder.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::thread::sleep;
use crate::blocks::{Blocks, SIZE_U16};
use bytes::BufMut;

Expand Down Expand Up @@ -37,7 +38,7 @@ impl BlockBuilder {
}
}
/// return the length of bytes sequence after encoding the origin one
fn evaluate_record_encoded_length(key: &[u8], value: &[u8]) -> usize {
pub fn evaluate_record_encoded_length(key: &[u8], value: &[u8]) -> usize {
SIZE_U16 + key.len() + SIZE_U16 + value.len()
}

Expand Down Expand Up @@ -84,4 +85,10 @@ impl BlockBuilder {
num_of_elements: self.offsets.len(),
}
}

pub fn clean_up(&mut self){
self.data.clear();
self.offsets.clear();
self.amount = 0;
}
}
26 changes: 15 additions & 11 deletions src/blocks/record_iterator.rs → src/blocks/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,18 @@
use crate::blocks::{Blocks, SIZE_U16};
use crate::util::Iterator;
use crate::iterator::Iterator;
use bytes::Buf;
use std::cmp::Ordering;

/// RecordIterator yields the records in related blocks if the
/// iterator it self is valid after invoking next()

pub struct RecordIterator<'a> {
pub struct BlockRecordIterator<'a> {
block: &'a Blocks,
is_valid: bool,
current_index: usize,
}
impl<'a> RecordIterator<'a> {
pub fn new(block: &'a Blocks) -> Self {
RecordIterator {
block,
is_valid: false,
current_index: 0,
}
}
impl<'a> BlockRecordIterator<'a> {

fn key_at_index(&self, index: usize) -> Result<&'a [u8], String> {
if index >= self.block.num_of_elements {
return Err(format!("given index out of range of block`:"));
Expand All @@ -32,7 +26,17 @@ impl<'a> RecordIterator<'a> {
}
}

impl<'a> Iterator for RecordIterator<'a> {
impl<'a> Iterator for BlockRecordIterator<'a> {
type Item = &'a Blocks;

fn new(arg: Self::Item) -> Self {
BlockRecordIterator {
block: arg,
is_valid: false,
current_index: 0,
}
}

fn seek_to_first(&mut self) {
if self.block.offsets.len() == 0 {
self.is_valid = false;
Expand Down
14 changes: 7 additions & 7 deletions src/blocks/tests.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::blocks::block_builder::BlockBuilder;
use crate::blocks::record_iterator::RecordIterator;
use crate::blocks::iterator::BlockRecordIterator;
use crate::blocks::Blocks;
use crate::util::Iterator;
use crate::iterator::Iterator;

fn create_block_with_rec_num(size: u8) -> Blocks {
let mut builder = BlockBuilder::new();
Expand All @@ -27,35 +27,35 @@ fn test_build_block() {
#[test]
fn test_iterator_create() {
let block = create_block_with_rec_num(1);
let iter = RecordIterator::new(&block);
let iter = BlockRecordIterator::new(&block);
}

#[test]
fn test_iterator_seek_to_first() {
let block = create_block_with_rec_num(10);
let mut iter = RecordIterator::new(&block);
let mut iter = BlockRecordIterator::new(&block);
iter.seek_to_first();
assert_eq!("key_1".as_bytes(), iter.key());
}
#[test]
fn test_iterator_seek_to_last() {
let block = create_block_with_rec_num(10);
let mut iter = RecordIterator::new(&block);
let mut iter = BlockRecordIterator::new(&block);
iter.seek_to_last();
assert_eq!("key_10".as_bytes(), iter.key());
}

#[test]
fn test_iterator_seek_to_key(){
let block = create_block_with_rec_num(10);
let mut iter = RecordIterator::new(&block);
let mut iter = BlockRecordIterator::new(&block);
iter.seek_to_key("key_7".as_ref());
assert_eq!("key_7".as_bytes(),iter.key());
}
#[test]
fn test_iterator_next() {
let block = create_block_with_rec_num(10);
let mut iter = RecordIterator::new(&block);
let mut iter = BlockRecordIterator::new(&block);
iter.seek_to_key("key_5".as_ref());
assert!(iter.is_valid());
assert_eq!("key_5".as_bytes(),iter.key());
Expand Down
12 changes: 12 additions & 0 deletions src/iterator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
pub trait Iterator {
type Item;
fn new(arg: Self::Item) -> Self;
fn seek_to_first(&mut self);
fn seek_to_last(&mut self);
fn seek_to_key(&mut self, key: &[u8]) -> bool;
fn is_valid(&self) -> bool;
fn next(&mut self);
fn prev(&mut self);
fn key(&self) -> &[u8];
fn value(&self) -> &[u8];
}
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
mod blocks;
mod sstable;
mod util;

mod iterator;
pub fn add(left: usize, right: usize) -> usize {
left + right
}
Expand Down
11 changes: 6 additions & 5 deletions src/sstable.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use crate::blocks::record_iterator::RecordIterator;
use crate::blocks::iterator::BlockRecordIterator;
use crate::blocks::{Blocks, SIZE_U16};
use crate::util::{env, Iterator};
use crate::util::env;
use anyhow::Result;
use bytes::{Buf, BufMut, Bytes, BytesMut};
use crate::iterator::Iterator;

mod block_iterator;
mod iterator;
mod sstable_builder;
pub type KVPair = (Bytes, Bytes);
/// # SSTable format
Expand Down Expand Up @@ -85,7 +86,7 @@ impl SSTable {
//read the index block
let buf = file_object.read(index_block_pointer.0, index_block_pointer.1)?;
let index_block_obj = Blocks::decode(buf.as_ref());
let mut record_iter = RecordIterator::new(&index_block_obj);
let mut record_iter = BlockRecordIterator::new(&index_block_obj);
while record_iter.is_valid() {
let record = IndexBlockRecord {
largest_key: record_iter.key().to_vec().clone(),
Expand All @@ -101,7 +102,7 @@ impl SSTable {
seq,
})
}
fn get(key: &[u8]) -> Result<KVPair> {
fn get(&self, key: &[u8]) -> Result<&[u8]> {
todo!()
}
}
Expand Down
15 changes: 6 additions & 9 deletions src/sstable/block_iterator.rs → src/sstable/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,18 @@
use crate::sstable::SSTable;
use crate::util::Iterator;
pub struct BlockIterator<'a>{
use crate::iterator::Iterator;
pub struct SSTableRecordIterator<'a>{
sstable : &'a SSTable,
is_valid: bool,

}

impl<'a> BlockIterator<'a> {
pub fn new( sstable : &'a SSTable) -> Self{
BlockIterator{
impl<'a> Iterator for SSTableRecordIterator<'a> {
type Item = &'a SSTable;
fn new( sstable : Self::Item) -> Self{
SSTableRecordIterator {
sstable,
is_valid: false
}
}
}

impl<'a> Iterator for BlockIterator<'a> {
fn seek_to_first(&mut self) {
todo!()
}
Expand Down
63 changes: 60 additions & 3 deletions src/sstable/sstable_builder.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,75 @@
use crate::sstable::SSTable;
use crate::blocks::{BlockBuilder, Blocks};
use crate::sstable::{BlockPointer, Footer, IndexBlockRecord, SSTable};
use crate::util::env::{get_global_sequence_number, sstfile_path, FileObject};
use anyhow::Result;

pub const SSTABLE_SIZE_LIMIT: usize = 4 * 1024 * 1024; // 4MB
pub struct SSTableBuilder {}
pub struct SSTableBuilder {
data_blocks: Vec<Blocks>,
block_builder: BlockBuilder,
}

impl SSTableBuilder {
pub fn new() -> Self {
SSTableBuilder {
data_blocks: Vec::new(),
block_builder: BlockBuilder::new(),
}
}
pub fn approximate_size_after_add(&self, key: &[u8], value: &[u8]) -> usize {
todo!()
}
// TODO(summerxwu): Maybe need a return value to indicate the result
pub fn add(&mut self, key: &[u8], value: &[u8]) {
todo!()
if self.approximate_size_after_add(key, value) <= SSTABLE_SIZE_LIMIT
&& self.block_builder.add(key, value).is_ok()
{
return;
}
// finish current data_block
let data_block_holder = self.block_builder.build();
self.data_blocks.push(data_block_holder);
self.block_builder.clean_up();
// Add the failed KV pair agine
// panic if failed
self.block_builder
.add(key, value)
.expect("The build has already been reset, it should not failed to add content");
}
/// build will return the `SSTable` object and serializable the content to disk file
pub fn build(&self) -> Result<SSTable> {
let seq = get_global_sequence_number();
let mut file_obj = FileObject::create(sstfile_path(seq).as_str())?;

let mut indexes_records: Vec<IndexBlockRecord> = Vec::new();
let mut offset_counter = 0;
// Write data portion of SSTable
for data_block in self.data_blocks {
let buf = data_block.encode();

let data_block_pointer = BlockPointer(offset_counter, buf.len());
let largest_key = data_block.largest_key().to_vec();
let item = IndexBlockRecord {
largest_key,
data_block_pointer,
};
indexes_records.push(item);

file_obj.write(buf.as_ref())?;
offset_counter = offset_counter + buf.len();
}


// Write index portion of SSTable
for indexes_record in indexes_records {
file_obj.write(indexes_record.encode().as_ref())?;
}
// Write Footer
let footer = Footer{
index_block_pointers: indexes_records,
num_of_index_block: indexes_records.len(),
}

todo!()
}
fn evaluate_sstable_size(&self) -> usize {
Expand Down
42 changes: 33 additions & 9 deletions src/sstable/tests.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use crate::sstable::iterator::SSTableRecordIterator;
use crate::sstable::sstable_builder::SSTableBuilder;
use crate::sstable::SSTable;
use crate::util::env::sstfile_path;
use std::fmt::format;
use std::fs;
use crate::sstable::block_iterator::BlockIterator;
use crate::iterator::Iterator;

struct TestSSTable {
sstable: SSTable,
Expand Down Expand Up @@ -34,16 +34,16 @@ impl Drop for TestSSTable {
}
#[test]
fn test_build_sstable_one_record() {
TestSSTable::create_for_test(1,1);
TestSSTable::create_for_test(1, 1);
}
#[test]
fn test_build_sstable_multi_records() {
TestSSTable::create_for_test(1,100);
TestSSTable::create_for_test(1, 100);
}

#[test]
fn test_open_exists_sstable() {
let test_sstable = TestSSTable::create_for_test(1,10);
let test_sstable = TestSSTable::create_for_test(1, 10);
SSTable::open(test_sstable.sstable.seq).unwrap();
}
#[test]
Expand All @@ -53,10 +53,34 @@ fn test_open_non_exists_sstable() {
}
#[test]
fn test_sstable_iterator() {
let test = TestSSTable::create_for_test(1,100);
let iter = BlockIterator::new(&(test.sstable));
let test_sstable = TestSSTable::create_for_test(1,100);
let sstable = SSTable::open(test_sstable.sstable.seq).unwrap();
let mut sstable_iter =SSTableRecordIterator::new(& sstable);
sstable_iter.seek_to_first();
assert_eq!(sstable_iter.key(),b"key_1".as_slice());
sstable_iter.next();
assert!(sstable_iter.is_valid());
assert_eq!(sstable_iter.key(),b"key_2".as_slice());
assert_eq!(sstable_iter.value(),b"value_2".as_slice());
}
#[test]
fn test_sstable_seek() {}
fn test_sstable_seek() {
let test_sstable = TestSSTable::create_for_test(1,100);
let sstable = SSTable::open(test_sstable.sstable.seq).unwrap();
let mut sstable_iter =SSTableRecordIterator::new(& sstable);
sstable_iter.seek_to_first();
assert_eq!(sstable_iter.key(),b"key_1".as_slice());
sstable_iter.seek_to_key(b"key_53".as_slice());
assert!(sstable_iter.is_valid());
assert_eq!(sstable_iter.key(),b"key_53".as_slice());
assert_eq!(sstable_iter.value(),b"value_53".as_slice());
}
#[test]
fn test_sstable_get() {}
fn test_sstable_get() {
let test_sstable = TestSSTable::create_for_test(1, 10);
let sstable = SSTable::open(test_sstable.sstable.seq).unwrap();
let value = sstable.get(b"key_5").unwrap();
assert_eq!(value,b"value_5".as_slice());
let value = sstable.get(b"key_not_found");
assert!(value.is_err());
}
10 changes: 0 additions & 10 deletions src/util.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
pub mod error;
pub mod env;

pub trait Iterator {
fn seek_to_first(&mut self);
fn seek_to_last(&mut self);
fn seek_to_key(&mut self, key: &[u8])->bool;
fn is_valid(&self) -> bool;
fn next(&mut self);
fn prev(&mut self);
fn key(&self) -> &[u8];
fn value(&self) -> &[u8];
}

5 changes: 4 additions & 1 deletion src/util/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ impl FileObject {
self.size
}
}
pub fn sstfile_path(seq: u8) -> String {
pub fn sstfile_path(seq: usize) -> String {
todo!()
}
pub fn get_global_sequence_number() -> usize {
todo!()
}

0 comments on commit fdcde61

Please sign in to comment.