From 739e942b165dd7fae7888ea34656d79e584e045f Mon Sep 17 00:00:00 2001
From: Liquan Pei <liquanpei@gmail.com>
Date: Tue, 26 Mar 2024 17:01:24 -0700
Subject: [PATCH] [ENH] DataChunk abstraction with PullLog, Group and BF
 (#1929)

## Description of changes

*Summarize the changes made by this PR.*
 - Improvements & Bug fixes
	 - ...
 - New functionality
	 - This PR adds DataChunk abstraction
	 - PullLog, Group and BF are reimplemented with DataChunk
	 - Compaction orchestrator is implemented with DataChunk

## Test plan
*How are these changes tested?*

- [ ] Tests pass locally with `pytest` for python, `yarn test` for js,
`cargo test` for rust

## Documentation Changes
*Are all docstrings for user-facing APIs updated if required? Do we need
to make documentation changes in the [docs
repository](https://github.com/chroma-core/docs)?*

---------

Co-authored-by: Hammad Bashir <HammadB@users.noreply.github.com>
---
 .vscode/settings.json                         |   2 +-
 rust/worker/src/compactor/scheduler.rs        |   8 +-
 rust/worker/src/execution/data/data_chunk.rs  | 170 ++++++++++++++
 rust/worker/src/execution/data/mod.rs         |   1 +
 rust/worker/src/execution/mod.rs              |   1 +
 .../execution/operators/brute_force_knn.rs    | 186 +++++++++++++--
 rust/worker/src/execution/operators/mod.rs    |   1 +
 .../src/execution/operators/partition.rs      | 221 ++++++++++++++++++
 .../src/execution/operators/pull_log.rs       |  29 +--
 .../src/execution/orchestration/compact.rs    | 221 ++++++++++++++++++
 .../src/execution/orchestration/hnsw.rs       |  21 +-
 .../worker/src/execution/orchestration/mod.rs |   2 +-
 rust/worker/src/log/log.rs                    |   8 +-
 rust/worker/src/types/embedding_record.rs     |   8 +-
 14 files changed, 816 insertions(+), 63 deletions(-)
 create mode 100644 rust/worker/src/execution/data/data_chunk.rs
 create mode 100644 rust/worker/src/execution/data/mod.rs
 create mode 100644 rust/worker/src/execution/operators/partition.rs
 create mode 100644 rust/worker/src/execution/orchestration/compact.rs

diff --git a/.vscode/settings.json b/.vscode/settings.json
index ccddc8d4c8c..a5def08ba63 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -128,4 +128,4 @@
     "unordered_set": "cpp",
     "algorithm": "cpp"
   },
-}
+}
\ No newline at end of file
diff --git a/rust/worker/src/compactor/scheduler.rs b/rust/worker/src/compactor/scheduler.rs
index bd51ed0320b..1ec961a077f 100644
--- a/rust/worker/src/compactor/scheduler.rs
+++ b/rust/worker/src/compactor/scheduler.rs
@@ -281,7 +281,7 @@ mod tests {
                 collection_id: collection_id_1.clone(),
                 log_id: 1,
                 log_id_ts: 1,
-                record: Box::new(EmbeddingRecord {
+                record: EmbeddingRecord {
                     id: "embedding_id_1".to_string(),
                     seq_id: BigInt::from(1),
                     embedding: None,
@@ -289,7 +289,7 @@ mod tests {
                     metadata: None,
                     operation: Operation::Add,
                     collection_id: collection_uuid_1,
-                }),
+                },
             }),
         );
 
@@ -301,7 +301,7 @@ mod tests {
                 collection_id: collection_id_2.clone(),
                 log_id: 2,
                 log_id_ts: 2,
-                record: Box::new(EmbeddingRecord {
+                record: EmbeddingRecord {
                     id: "embedding_id_2".to_string(),
                     seq_id: BigInt::from(2),
                     embedding: None,
@@ -309,7 +309,7 @@ mod tests {
                     metadata: None,
                     operation: Operation::Add,
                     collection_id: collection_uuid_2,
-                }),
+                },
             }),
         );
 
diff --git a/rust/worker/src/execution/data/data_chunk.rs b/rust/worker/src/execution/data/data_chunk.rs
new file mode 100644
index 00000000000..5f13d57cb2a
--- /dev/null
+++ b/rust/worker/src/execution/data/data_chunk.rs
@@ -0,0 +1,170 @@
+use std::sync::Arc;
+
+use crate::types::EmbeddingRecord;
+
+#[derive(Clone, Debug)]
+pub(crate) struct DataChunk {
+    data: Arc<[EmbeddingRecord]>,
+    visibility: Arc<[bool]>,
+}
+
+impl DataChunk {
+    pub fn new(data: Arc<[EmbeddingRecord]>) -> Self {
+        let len = data.len();
+        DataChunk {
+            data,
+            visibility: vec![true; len].into(),
+        }
+    }
+
+    /// Returns the total length of the data chunk
+    pub fn total_len(&self) -> usize {
+        self.data.len()
+    }
+
+    /// Returns the number of visible elements in the data chunk
+    pub fn len(&self) -> usize {
+        self.visibility.iter().filter(|&v| *v).count()
+    }
+
+    /// Returns the element at the given index
+    /// if the index is out of bounds, it returns None
+    /// # Arguments
+    /// * `index` - The index of the element
+    pub fn get(&self, index: usize) -> Option<&EmbeddingRecord> {
+        if index < self.data.len() {
+            Some(&self.data[index])
+        } else {
+            None
+        }
+    }
+
+    /// Returns the visibility of the element at the given index
+    /// if the index is out of bounds, it returns None
+    /// # Arguments
+    /// * `index` - The index of the element
+    pub fn get_visibility(&self, index: usize) -> Option<bool> {
+        if index < self.visibility.len() {
+            Some(self.visibility[index])
+        } else {
+            None
+        }
+    }
+
+    /// Sets the visibility of the elements in the data chunk.
+    /// Note that the length of the visibility vector should be
+    /// equal to the length of the data chunk.
+    ///
+    /// Note that this is the only way to change the visibility of the elements in the data chunk,
+    /// the data chunk does not provide a way to change the visibility of individual elements.
+    /// This is to ensure that the visibility of the elements is always in sync with the data.
+    /// If you want to change the visibility of individual elements, you should create a new data chunk.
+    ///
+    /// # Arguments
+    /// * `visibility` - A vector of boolean values indicating the visibility of the elements
+    pub fn set_visibility(&mut self, visibility: Vec<bool>) {
+        self.visibility = visibility.into();
+    }
+
+    /// Returns an iterator over the visible elements in the data chunk
+    /// The iterator returns a tuple of the element and its index
+    /// # Returns
+    /// An iterator over the visible elements in the data chunk
+    pub fn iter(&self) -> DataChunkIteraror<'_> {
+        DataChunkIteraror {
+            chunk: self,
+            index: 0,
+        }
+    }
+}
+
+pub(crate) struct DataChunkIteraror<'a> {
+    chunk: &'a DataChunk,
+    index: usize,
+}
+
+impl<'a> Iterator for DataChunkIteraror<'a> {
+    type Item = (&'a EmbeddingRecord, usize);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        while self.index < self.chunk.total_len() {
+            let index = self.index;
+            match self.chunk.get_visibility(index) {
+                Some(true) => {
+                    self.index += 1;
+                    return self.chunk.get(index).map(|record| (record, index));
+                }
+                Some(false) => {
+                    self.index += 1;
+                }
+                None => {
+                    break;
+                }
+            }
+        }
+        None
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::EmbeddingRecord;
+    use crate::types::Operation;
+    use num_bigint::BigInt;
+    use std::str::FromStr;
+    use uuid::Uuid;
+
+    #[test]
+    fn test_data_chunk() {
+        let collection_uuid_1 = Uuid::from_str("00000000-0000-0000-0000-000000000001").unwrap();
+        let data = vec![
+            EmbeddingRecord {
+                id: "embedding_id_1".to_string(),
+                seq_id: BigInt::from(1),
+                embedding: None,
+                encoding: None,
+                metadata: None,
+                operation: Operation::Add,
+                collection_id: collection_uuid_1,
+            },
+            EmbeddingRecord {
+                id: "embedding_id_2".to_string(),
+                seq_id: BigInt::from(2),
+                embedding: None,
+                encoding: None,
+                metadata: None,
+                operation: Operation::Add,
+                collection_id: collection_uuid_1,
+            },
+        ];
+        let data = data.into();
+        let mut chunk = DataChunk::new(data);
+        assert_eq!(chunk.len(), 2);
+        let mut iter = chunk.iter();
+        let elem = iter.next();
+        assert_eq!(elem.is_some(), true);
+        let (record, index) = elem.unwrap();
+        assert_eq!(record.id, "embedding_id_1");
+        assert_eq!(index, 0);
+        let elem = iter.next();
+        assert_eq!(elem.is_some(), true);
+        let (record, index) = elem.unwrap();
+        assert_eq!(record.id, "embedding_id_2");
+        assert_eq!(index, 1);
+        let elem = iter.next();
+        assert_eq!(elem.is_none(), true);
+
+        let visibility = vec![true, false].into();
+        chunk.set_visibility(visibility);
+        assert_eq!(chunk.len(), 1);
+        let mut iter = chunk.iter();
+        let elem = iter.next();
+        assert_eq!(elem.is_some(), true);
+        let (record, index) = elem.unwrap();
+        assert_eq!(record.id, "embedding_id_1");
+        assert_eq!(index, 0);
+        let elem = iter.next();
+        assert_eq!(elem.is_none(), true);
+    }
+}
diff --git a/rust/worker/src/execution/data/mod.rs b/rust/worker/src/execution/data/mod.rs
new file mode 100644
index 00000000000..ecbe39f3445
--- /dev/null
+++ b/rust/worker/src/execution/data/mod.rs
@@ -0,0 +1 @@
+pub(crate) mod data_chunk;
diff --git a/rust/worker/src/execution/mod.rs b/rust/worker/src/execution/mod.rs
index 0000e23f3a3..1d361780d77 100644
--- a/rust/worker/src/execution/mod.rs
+++ b/rust/worker/src/execution/mod.rs
@@ -1,4 +1,5 @@
 pub(crate) mod config;
+mod data;
 pub(crate) mod dispatcher;
 pub(crate) mod operator;
 mod operators;
diff --git a/rust/worker/src/execution/operators/brute_force_knn.rs b/rust/worker/src/execution/operators/brute_force_knn.rs
index 3a9d05c8426..13e02dc9af9 100644
--- a/rust/worker/src/execution/operators/brute_force_knn.rs
+++ b/rust/worker/src/execution/operators/brute_force_knn.rs
@@ -1,6 +1,10 @@
+use crate::execution::data::data_chunk::DataChunk;
 use crate::{distance::DistanceFunction, execution::operator::Operator};
 use async_trait::async_trait;
-use std::cmp;
+use std::cmp::Ord;
+use std::cmp::Ordering;
+use std::cmp::PartialOrd;
+use std::collections::BinaryHeap;
 
 /// The brute force k-nearest neighbors operator is responsible for computing the k-nearest neighbors
 /// of a given query vector against a set of vectors using brute force calculation.
@@ -17,7 +21,7 @@ pub struct BruteForceKnnOperator {}
 /// * `distance_metric` - The distance metric to use.
 #[derive(Debug)]
 pub struct BruteForceKnnOperatorInput {
-    pub data: Vec<Vec<f32>>,
+    pub data: DataChunk,
     pub query: Vec<f32>,
     pub k: usize,
     pub distance_metric: DistanceFunction,
@@ -25,37 +29,95 @@ pub struct BruteForceKnnOperatorInput {
 
 /// The output of the brute force k-nearest neighbors operator.
 /// # Parameters
+/// * `data` - The vectors to query against. Only the vectors that are nearest neighbors are visible.
 /// * `indices` - The indices of the nearest neighbors. This is a mask against the `query_vecs` input.
 /// One row for each query vector.
 /// * `distances` - The distances of the nearest neighbors.
 /// One row for each query vector.
 #[derive(Debug)]
 pub struct BruteForceKnnOperatorOutput {
+    pub data: DataChunk,
     pub indices: Vec<usize>,
     pub distances: Vec<f32>,
 }
 
 pub type BruteForceKnnOperatorResult = Result<BruteForceKnnOperatorOutput, ()>;
 
+#[derive(Debug)]
+struct Entry {
+    index: usize,
+    distance: f32,
+}
+
+impl Ord for Entry {
+    fn cmp(&self, other: &Self) -> Ordering {
+        if self.distance == other.distance {
+            Ordering::Equal
+        } else if self.distance > other.distance {
+            // This is a min heap, so we need to reverse the ordering.
+            Ordering::Less
+        } else {
+            // This is a min heap, so we need to reverse the ordering.
+            Ordering::Greater
+        }
+    }
+}
+
+impl PartialOrd for Entry {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl PartialEq for Entry {
+    fn eq(&self, other: &Self) -> bool {
+        self.distance == other.distance
+    }
+}
+
+impl Eq for Entry {}
+
 #[async_trait]
 impl Operator<BruteForceKnnOperatorInput, BruteForceKnnOperatorOutput> for BruteForceKnnOperator {
     type Error = ();
 
     async fn run(&self, input: &BruteForceKnnOperatorInput) -> BruteForceKnnOperatorResult {
-        // We could use a heap approach here, but for now we just sort the distances and take the
-        // first k.
-        let mut sorted_indices_distances = input
-            .data
-            .iter()
-            .map(|data| input.distance_metric.distance(&input.query, data))
-            .enumerate()
-            .collect::<Vec<(usize, f32)>>();
-        sorted_indices_distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
-        let (sorted_indices, sorted_distances) = sorted_indices_distances
-            .drain(..cmp::min(input.k, input.data.len()))
-            .unzip();
+        let mut heap = BinaryHeap::with_capacity(input.k);
+        let data_chunk = &input.data;
+        for data in data_chunk.iter() {
+            let embedding_record = data.0;
+            let index = data.1;
 
+            let embedding = match &embedding_record.embedding {
+                Some(embedding) => embedding,
+                None => {
+                    continue;
+                }
+            };
+            let distance = input.distance_metric.distance(&embedding[..], &input.query);
+            heap.push(Entry { index, distance });
+        }
+
+        let mut visibility = vec![false; data_chunk.total_len()];
+        let mut sorted_indices = Vec::with_capacity(input.k);
+        let mut sorted_distances = Vec::with_capacity(input.k);
+        let mut i = 0;
+        while i < input.k {
+            let entry = match heap.pop() {
+                Some(entry) => entry,
+                None => {
+                    break;
+                }
+            };
+            sorted_indices.push(entry.index);
+            sorted_distances.push(entry.distance);
+            visibility[entry.index] = true;
+            i += 1;
+        }
+        let mut data_chunk = data_chunk.clone();
+        data_chunk.set_visibility(visibility);
         Ok(BruteForceKnnOperatorOutput {
+            data: data_chunk,
             indices: sorted_indices,
             distances: sorted_distances,
         })
@@ -64,17 +126,49 @@ impl Operator<BruteForceKnnOperatorInput, BruteForceKnnOperatorOutput> for Brute
 
 #[cfg(test)]
 mod tests {
+    use crate::types::EmbeddingRecord;
+    use crate::types::Operation;
+    use num_bigint::BigInt;
+    use uuid::Uuid;
+
     use super::*;
 
     #[tokio::test]
     async fn test_brute_force_knn_l2sqr() {
         let operator = BruteForceKnnOperator {};
+        let data = vec![
+            EmbeddingRecord {
+                id: "embedding_id_1".to_string(),
+                seq_id: BigInt::from(0),
+                embedding: Some(vec![0.0, 0.0, 0.0]),
+                encoding: None,
+                metadata: None,
+                operation: Operation::Add,
+                collection_id: Uuid::new_v4(),
+            },
+            EmbeddingRecord {
+                id: "embedding_id_2".to_string(),
+                seq_id: BigInt::from(1),
+                embedding: Some(vec![0.0, 1.0, 1.0]),
+                encoding: None,
+                metadata: None,
+                operation: Operation::Add,
+                collection_id: Uuid::new_v4(),
+            },
+            EmbeddingRecord {
+                id: "embedding_id_3".to_string(),
+                seq_id: BigInt::from(2),
+                embedding: Some(vec![7.0, 8.0, 9.0]),
+                encoding: None,
+                metadata: None,
+                operation: Operation::Add,
+                collection_id: Uuid::new_v4(),
+            },
+        ];
+        let data_chunk = DataChunk::new(data.into());
+
         let input = BruteForceKnnOperatorInput {
-            data: vec![
-                vec![0.0, 0.0, 0.0],
-                vec![0.0, 1.0, 1.0],
-                vec![7.0, 8.0, 9.0],
-            ],
+            data: data_chunk,
             query: vec![0.0, 0.0, 0.0],
             k: 2,
             distance_metric: DistanceFunction::Euclidean,
@@ -83,6 +177,9 @@ mod tests {
         assert_eq!(output.indices, vec![0, 1]);
         let distance_1 = 0.0_f32.powi(2) + 1.0_f32.powi(2) + 1.0_f32.powi(2);
         assert_eq!(output.distances, vec![0.0, distance_1]);
+        assert_eq!(output.data.get_visibility(0), Some(true));
+        assert_eq!(output.data.get_visibility(1), Some(true));
+        assert_eq!(output.data.get_visibility(2), Some(false));
     }
 
     #[tokio::test]
@@ -95,8 +192,39 @@ mod tests {
         let norm_2 = (0.0_f32.powi(2) + -1.0_f32.powi(2) + 6.0_f32.powi(2)).sqrt();
         let data_2 = vec![0.0 / norm_2, -1.0 / norm_2, 6.0 / norm_2];
 
+        let data = vec![
+            EmbeddingRecord {
+                id: "embedding_id_1".to_string(),
+                seq_id: BigInt::from(0),
+                embedding: Some(vec![0.0, 1.0, 0.0]),
+                encoding: None,
+                metadata: None,
+                operation: Operation::Add,
+                collection_id: Uuid::new_v4(),
+            },
+            EmbeddingRecord {
+                id: "embedding_id_2".to_string(),
+                seq_id: BigInt::from(1),
+                embedding: Some(data_1.clone()),
+                encoding: None,
+                metadata: None,
+                operation: Operation::Add,
+                collection_id: Uuid::new_v4(),
+            },
+            EmbeddingRecord {
+                id: "embedding_id_3".to_string(),
+                seq_id: BigInt::from(2),
+                embedding: Some(data_2.clone()),
+                encoding: None,
+                metadata: None,
+                operation: Operation::Add,
+                collection_id: Uuid::new_v4(),
+            },
+        ];
+        let data_chunk = DataChunk::new(data.into());
+
         let input = BruteForceKnnOperatorInput {
-            data: vec![vec![0.0, 1.0, 0.0], data_1.clone(), data_2.clone()],
+            data: data_chunk,
             query: vec![0.0, 1.0, 0.0],
             k: 2,
             distance_metric: DistanceFunction::InnerProduct,
@@ -107,14 +235,29 @@ mod tests {
         let expected_distance_1 =
             1.0f32 - ((data_1[0] * 0.0) + (data_1[1] * 1.0) + (data_1[2] * 0.0));
         assert_eq!(output.distances, vec![0.0, expected_distance_1]);
+        assert_eq!(output.data.get_visibility(0), Some(true));
+        assert_eq!(output.data.get_visibility(1), Some(true));
+        assert_eq!(output.data.get_visibility(2), Some(false));
     }
 
     #[tokio::test]
     async fn test_data_less_than_k() {
         // If we have less data than k, we should return all the data, sorted by distance.
         let operator = BruteForceKnnOperator {};
+        let data = vec![EmbeddingRecord {
+            id: "embedding_id_1".to_string(),
+            seq_id: BigInt::from(0),
+            embedding: Some(vec![0.0, 0.0, 0.0]),
+            encoding: None,
+            metadata: None,
+            operation: Operation::Add,
+            collection_id: Uuid::new_v4(),
+        }];
+
+        let data_chunk = DataChunk::new(data.into());
+
         let input = BruteForceKnnOperatorInput {
-            data: vec![vec![0.0, 0.0, 0.0]],
+            data: data_chunk,
             query: vec![0.0, 0.0, 0.0],
             k: 2,
             distance_metric: DistanceFunction::Euclidean,
@@ -122,5 +265,6 @@ mod tests {
         let output = operator.run(&input).await.unwrap();
         assert_eq!(output.indices, vec![0]);
         assert_eq!(output.distances, vec![0.0]);
+        assert_eq!(output.data.get_visibility(0), Some(true));
     }
 }
diff --git a/rust/worker/src/execution/operators/mod.rs b/rust/worker/src/execution/operators/mod.rs
index 60198481545..ed31dca33db 100644
--- a/rust/worker/src/execution/operators/mod.rs
+++ b/rust/worker/src/execution/operators/mod.rs
@@ -1,3 +1,4 @@
 pub(super) mod brute_force_knn;
 pub(super) mod normalize_vectors;
+pub(super) mod partition;
 pub(super) mod pull_log;
diff --git a/rust/worker/src/execution/operators/partition.rs b/rust/worker/src/execution/operators/partition.rs
new file mode 100644
index 00000000000..e32b693ff73
--- /dev/null
+++ b/rust/worker/src/execution/operators/partition.rs
@@ -0,0 +1,221 @@
+use crate::errors::{ChromaError, ErrorCodes};
+use crate::execution::data::data_chunk::DataChunk;
+use crate::execution::operator::Operator;
+use async_trait::async_trait;
+use std::collections::HashMap;
+use thiserror::Error;
+
+#[derive(Debug)]
+/// The partition Operator takes a DataChunk and presents a copy-free
+/// view of N partitions by breaking the data into partitions by max_partition_size. It will group operations 
+/// on the same key into the same partition. Due to this, the max_partition_size is a 
+/// soft-limit, since if there are more operations to a key than max_partition_size we cannot
+/// partition the data.
+pub struct PartitionOperator {}
+
+/// The input to the partition operator.
+/// # Parameters
+/// * `records` - The records to partition.
+#[derive(Debug)]
+pub struct PartitionInput {
+    pub(crate) records: DataChunk,
+    pub(crate) max_partition_size: usize,
+}
+
+impl PartitionInput {
+    /// Create a new partition input.
+    /// # Parameters
+    /// * `records` - The records to partition.
+    /// * `max_partition_size` - The maximum size of a partition. Since we are trying to
+    /// partition the records by id, which can casue the partition size to be larger than this
+    /// value.
+    pub fn new(records: DataChunk, max_partition_size: usize) -> Self {
+        PartitionInput {
+            records,
+            max_partition_size,
+        }
+    }
+}
+
+/// The output of the partition operator.
+/// # Parameters
+/// * `records` - The partitioned records.
+#[derive(Debug)]
+pub struct PartitionOutput {
+    pub(crate) records: Vec<DataChunk>,
+}
+
+#[derive(Debug, Error)]
+pub enum PartitionError {
+    #[error("Failed to partition records.")]
+    PartitionError,
+}
+
+impl ChromaError for PartitionError {
+    fn code(&self) -> ErrorCodes {
+        match self {
+            PartitionError::PartitionError => ErrorCodes::Internal,
+        }
+    }
+}
+
+pub type PartitionResult = Result<PartitionOutput, PartitionError>;
+
+impl PartitionOperator {
+    pub fn new() -> Box<Self> {
+        Box::new(PartitionOperator {})
+    }
+
+    pub fn partition(&self, records: &DataChunk, partition_size: usize) -> Vec<DataChunk> {
+        let mut map = HashMap::new();
+        for data in records.iter() {
+            let record = data.0;
+            let index = data.1;
+            let key = record.id.clone();
+            map.entry(key).or_insert_with(Vec::new).push(index);
+        }
+        let mut result = Vec::new();
+        // Create a new DataChunk for each parition of records with partition_size without
+        // data copying.
+        let mut current_batch_size = 0;
+        let mut new_partition = true;
+        let mut visibility = vec![false; records.total_len()];
+        for (_, v) in map.iter() {
+            // create DataChunk with partition_size by masking the visibility of the records
+            // in the partition.
+            if new_partition {
+                visibility = vec![false; records.total_len()];
+                new_partition = false;
+            }
+            for i in v.iter() {
+                visibility[*i] = true;
+            }
+            current_batch_size += v.len();
+            if current_batch_size >= partition_size {
+                let mut new_data_chunk = records.clone();
+                new_data_chunk.set_visibility(visibility.clone());
+                result.push(new_data_chunk);
+                new_partition = true;
+                current_batch_size = 0;
+            }
+        }
+        // handle the case that the last group is smaller than the group_size.
+        if !new_partition {
+            let mut new_data_chunk = records.clone();
+            new_data_chunk.set_visibility(visibility.clone());
+            result.push(new_data_chunk);
+        }
+        result
+    }
+
+    fn determine_partition_size(&self, num_records: usize, threshold: usize) -> usize {
+        if num_records < threshold {
+            return num_records;
+        } else {
+            return threshold;
+        }
+    }
+}
+
+#[async_trait]
+impl Operator<PartitionInput, PartitionOutput> for PartitionOperator {
+    type Error = PartitionError;
+
+    async fn run(&self, input: &PartitionInput) -> PartitionResult {
+        let records = &input.records;
+        let partition_size = self.determine_partition_size(records.len(), input.max_partition_size);
+        let deduped_records = self.partition(records, partition_size);
+        return Ok(PartitionOutput {
+            records: deduped_records,
+        });
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::EmbeddingRecord;
+    use crate::types::Operation;
+    use num_bigint::BigInt;
+    use std::str::FromStr;
+    use std::sync::Arc;
+    use uuid::Uuid;
+
+    #[tokio::test]
+    async fn test_partition_operator() {
+        let collection_uuid_1 = Uuid::from_str("00000000-0000-0000-0000-000000000001").unwrap();
+        let collection_uuid_2 = Uuid::from_str("00000000-0000-0000-0000-000000000002").unwrap();
+        let data = vec![
+            EmbeddingRecord {
+                id: "embedding_id_1".to_string(),
+                seq_id: BigInt::from(1),
+                embedding: None,
+                encoding: None,
+                metadata: None,
+                operation: Operation::Add,
+                collection_id: collection_uuid_1,
+            },
+            EmbeddingRecord {
+                id: "embedding_id_2".to_string(),
+                seq_id: BigInt::from(2),
+                embedding: None,
+                encoding: None,
+                metadata: None,
+                operation: Operation::Add,
+                collection_id: collection_uuid_1,
+            },
+            EmbeddingRecord {
+                id: "embedding_id_1".to_string(),
+                seq_id: BigInt::from(3),
+                embedding: None,
+                encoding: None,
+                metadata: None,
+                operation: Operation::Add,
+                collection_id: collection_uuid_2,
+            },
+        ];
+        let data: Arc<[EmbeddingRecord]> = data.into();
+
+        // Test group size is larger than the number of records
+        let chunk = DataChunk::new(data.clone());
+        let operator = PartitionOperator::new();
+        let input = PartitionInput::new(chunk, 4);
+        let result = operator.run(&input).await.unwrap();
+        assert_eq!(result.records.len(), 1);
+        assert_eq!(result.records[0].len(), 3);
+
+        // Test group size is the same as the number of records
+        let chunk = DataChunk::new(data.clone());
+        let operator = PartitionOperator::new();
+        let input = PartitionInput::new(chunk, 3);
+        let result = operator.run(&input).await.unwrap();
+        assert_eq!(result.records.len(), 1);
+        assert_eq!(result.records[0].len(), 3);
+
+        // Test group size is smaller than the number of records
+        let chunk = DataChunk::new(data.clone());
+        let operator = PartitionOperator::new();
+        let input = PartitionInput::new(chunk, 2);
+        let mut result = operator.run(&input).await.unwrap();
+
+        // The result can be 1 or 2 groups depending on the order of the records.
+        assert!(result.records.len() == 2 || result.records.len() == 1);
+        if result.records.len() == 2 {
+            result.records.sort_by(|a, b| a.len().cmp(&b.len()));
+            assert_eq!(result.records[0].len(), 1);
+            assert_eq!(result.records[1].len(), 2);
+        } else {
+            assert_eq!(result.records[0].len(), 3);
+        }
+
+        // Test group size is smaller than the number of records
+        let chunk = DataChunk::new(data.clone());
+        let operator = PartitionOperator::new();
+        let input = PartitionInput::new(chunk, 1);
+        let mut result = operator.run(&input).await.unwrap();
+        assert_eq!(result.records.len(), 2);
+        result.records.sort_by(|a, b| a.len().cmp(&b.len()));
+        assert_eq!(result.records[0].len(), 1);
+        assert_eq!(result.records[1].len(), 2);
+    }
+}
diff --git a/rust/worker/src/execution/operators/pull_log.rs b/rust/worker/src/execution/operators/pull_log.rs
index ad564662654..d0a9cc7ae61 100644
--- a/rust/worker/src/execution/operators/pull_log.rs
+++ b/rust/worker/src/execution/operators/pull_log.rs
@@ -1,8 +1,7 @@
-use crate::{
-    execution::operator::Operator,
-    log::log::{Log, PullLogsError},
-    types::EmbeddingRecord,
-};
+use crate::execution::data::data_chunk::DataChunk;
+use crate::execution::operator::Operator;
+use crate::log::log::Log;
+use crate::log::log::PullLogsError;
 use async_trait::async_trait;
 use uuid::Uuid;
 
@@ -65,22 +64,22 @@ impl PullLogsInput {
 /// The output of the pull logs operator.
 #[derive(Debug)]
 pub struct PullLogsOutput {
-    logs: Vec<Box<EmbeddingRecord>>,
+    logs: DataChunk,
 }
 
 impl PullLogsOutput {
     /// Create a new pull logs output.
     /// # Parameters
     /// * `logs` - The logs that were read.
-    pub fn new(logs: Vec<Box<EmbeddingRecord>>) -> Self {
+    pub fn new(logs: DataChunk) -> Self {
         PullLogsOutput { logs }
     }
 
     /// Get the log entries that were read by an invocation of the pull logs operator.
     /// # Returns
     /// The log entries that were read.
-    pub fn logs(&self) -> &Vec<Box<EmbeddingRecord>> {
-        &self.logs
+    pub fn logs(&self) -> DataChunk {
+        self.logs.clone()
     }
 }
 
@@ -139,7 +138,9 @@ impl Operator<PullLogsInput, PullLogsOutput> for PullLogsOperator {
         if input.num_records.is_some() && result.len() > input.num_records.unwrap() as usize {
             result.truncate(input.num_records.unwrap() as usize);
         }
-        Ok(PullLogsOutput::new(result))
+        // Convert to DataChunk
+        let data_chunk = DataChunk::new(result.into());
+        Ok(PullLogsOutput::new(data_chunk))
     }
 }
 
@@ -166,7 +167,7 @@ mod tests {
                 collection_id: collection_id_1.clone(),
                 log_id: 1,
                 log_id_ts: 1,
-                record: Box::new(EmbeddingRecord {
+                record: EmbeddingRecord {
                     id: "embedding_id_1".to_string(),
                     seq_id: BigInt::from(1),
                     embedding: None,
@@ -174,7 +175,7 @@ mod tests {
                     metadata: None,
                     operation: Operation::Add,
                     collection_id: collection_uuid_1,
-                }),
+                },
             }),
         );
         log.add_log(
@@ -183,7 +184,7 @@ mod tests {
                 collection_id: collection_id_1.clone(),
                 log_id: 2,
                 log_id_ts: 2,
-                record: Box::new(EmbeddingRecord {
+                record: EmbeddingRecord {
                     id: "embedding_id_2".to_string(),
                     seq_id: BigInt::from(2),
                     embedding: None,
@@ -191,7 +192,7 @@ mod tests {
                     metadata: None,
                     operation: Operation::Add,
                     collection_id: collection_uuid_1,
-                }),
+                },
             }),
         );
 
diff --git a/rust/worker/src/execution/orchestration/compact.rs b/rust/worker/src/execution/orchestration/compact.rs
new file mode 100644
index 00000000000..bce69aa59e5
--- /dev/null
+++ b/rust/worker/src/execution/orchestration/compact.rs
@@ -0,0 +1,221 @@
+use super::super::operator::{wrap, TaskMessage};
+use crate::errors::ChromaError;
+use crate::execution::data::data_chunk::DataChunk;
+use crate::execution::operators::partition::PartitionInput;
+use crate::execution::operators::partition::PartitionOperator;
+use crate::execution::operators::partition::PartitionResult;
+use crate::execution::operators::pull_log::PullLogsInput;
+use crate::execution::operators::pull_log::PullLogsOperator;
+use crate::execution::operators::pull_log::PullLogsResult;
+use crate::log::log::Log;
+use crate::sysdb::sysdb::SysDb;
+use crate::system::Component;
+use crate::system::Handler;
+use crate::system::Receiver;
+use crate::system::System;
+use async_trait::async_trait;
+use std::time::SystemTime;
+use std::time::UNIX_EPOCH;
+use uuid::Uuid;
+
+/**  The state of the orchestrator.
+In chroma, we have a relatively fixed number of query plans that we can execute. Rather
+than a flexible state machine abstraction, we just manually define the states that we
+expect to encounter for a given query plan. This is a bit more rigid, but it's also simpler and easier to
+understand. We can always add more abstraction later if we need it.
+```plaintext
+
+                               ┌───► Write─────-------┐
+                               │                      │
+  Pending ─► PullLogs ─► Group │                      ├─► Flush ─► Finished
+                               │                      │
+                               └───► Write ───────────┘
+
+```
+*/
+#[derive(Debug)]
+enum ExecutionState {
+    Pending,
+    PullLogs,
+    Partition,
+    Write,
+    Flush,
+    Finished,
+}
+
+#[derive(Debug)]
+pub struct CompactOrchestrator {
+    state: ExecutionState,
+    // Component Execution
+    system: System,
+    segment_id: Uuid,
+    // Dependencies
+    log: Box<dyn Log>,
+    sysdb: Box<dyn SysDb>,
+    // Dispatcher
+    dispatcher: Box<dyn Receiver<TaskMessage>>,
+    // Result Channel
+    result_channel: Option<tokio::sync::oneshot::Sender<Result<(), Box<dyn ChromaError>>>>,
+}
+
+impl CompactOrchestrator {
+    pub fn new(
+        system: System,
+        segment_id: Uuid,
+        log: Box<dyn Log>,
+        sysdb: Box<dyn SysDb>,
+        dispatcher: Box<dyn Receiver<TaskMessage>>,
+        result_channel: Option<tokio::sync::oneshot::Sender<Result<(), Box<dyn ChromaError>>>>,
+    ) -> Self {
+        CompactOrchestrator {
+            state: ExecutionState::Pending,
+            system,
+            segment_id,
+            log,
+            sysdb,
+            dispatcher,
+            result_channel,
+        }
+    }
+
+    /// Get the collection id for a segment id.
+    /// TODO: This can be cached
+    async fn get_collection_id_for_segment_id(&mut self, segment_id: Uuid) -> Option<Uuid> {
+        let segments = self
+            .sysdb
+            .get_segments(Some(segment_id), None, None, None, None)
+            .await;
+        match segments {
+            Ok(segments) => match segments.get(0) {
+                Some(segment) => segment.collection,
+                None => None,
+            },
+            Err(e) => {
+                // Log an error and return
+                return None;
+            }
+        }
+    }
+
+    async fn pull_logs(&mut self, self_address: Box<dyn Receiver<PullLogsResult>>) {
+        self.state = ExecutionState::PullLogs;
+        let operator = PullLogsOperator::new(self.log.clone());
+        let collection_id = match self.get_collection_id_for_segment_id(self.segment_id).await {
+            Some(collection_id) => collection_id,
+            None => {
+                // Log an error and reply + return
+                return;
+            }
+        };
+        let end_timestamp = SystemTime::now().duration_since(UNIX_EPOCH);
+        let end_timestamp = match end_timestamp {
+            // TODO: change protobuf definition to use u64 instead of i64
+            Ok(end_timestamp) => end_timestamp.as_secs() as i64,
+            Err(e) => {
+                // Log an error and reply + return
+                return;
+            }
+        };
+        let input = PullLogsInput::new(collection_id, 0, 100, None, Some(end_timestamp));
+        let task = wrap(operator, input, self_address);
+        match self.dispatcher.send(task).await {
+            Ok(_) => (),
+            Err(e) => {
+                // TODO: log an error and reply to caller
+            }
+        }
+    }
+
+    async fn group(
+        &mut self,
+        records: DataChunk,
+        self_address: Box<dyn Receiver<PartitionResult>>,
+    ) {
+        self.state = ExecutionState::Partition;
+        // TODO: make this configurable
+        let max_partition_size = 100;
+        let operator = PartitionOperator::new();
+        let input = PartitionInput::new(records, max_partition_size);
+        let task = wrap(operator, input, self_address);
+        match self.dispatcher.send(task).await {
+            Ok(_) => (),
+            Err(e) => {
+                // TODO: log an error and reply to caller
+            }
+        }
+    }
+
+    async fn write(&mut self, records: Vec<DataChunk>) {
+        self.state = ExecutionState::Write;
+
+        for record in records {
+            // TODO: implement write
+        }
+    }
+}
+
+// ============== Component Implementation ==============
+
+#[async_trait]
+impl Component for CompactOrchestrator {
+    fn queue_size(&self) -> usize {
+        1000 // TODO: make configurable
+    }
+
+    async fn on_start(&mut self, ctx: &crate::system::ComponentContext<Self>) -> () {
+        self.pull_logs(ctx.sender.as_receiver()).await;
+    }
+}
+
+// ============== Handlers ==============
+#[async_trait]
+impl Handler<PullLogsResult> for CompactOrchestrator {
+    async fn handle(
+        &mut self,
+        message: PullLogsResult,
+        ctx: &crate::system::ComponentContext<CompactOrchestrator>,
+    ) {
+        let records = match message {
+            Ok(result) => result.logs(),
+            Err(e) => {
+                // Log an error and return
+                let result_channel = match self.result_channel.take() {
+                    Some(tx) => tx,
+                    None => {
+                        // Log an error
+                        return;
+                    }
+                };
+                let _ = result_channel.send(Err(Box::new(e)));
+                return;
+            }
+        };
+        self.group(records, ctx.sender.as_receiver()).await;
+    }
+}
+
+#[async_trait]
+impl Handler<PartitionResult> for CompactOrchestrator {
+    async fn handle(
+        &mut self,
+        message: PartitionResult,
+        ctx: &crate::system::ComponentContext<CompactOrchestrator>,
+    ) {
+        let records = match message {
+            Ok(result) => result.records,
+            Err(e) => {
+                // Log an error and return
+                let result_channel = match self.result_channel.take() {
+                    Some(tx) => tx,
+                    None => {
+                        // Log an error
+                        return;
+                    }
+                };
+                let _ = result_channel.send(Err(Box::new(e)));
+                return;
+            }
+        };
+        // TODO: implement write records
+    }
+}
diff --git a/rust/worker/src/execution/orchestration/hnsw.rs b/rust/worker/src/execution/orchestration/hnsw.rs
index 699bfde4b3f..579497cc03b 100644
--- a/rust/worker/src/execution/orchestration/hnsw.rs
+++ b/rust/worker/src/execution/orchestration/hnsw.rs
@@ -1,11 +1,9 @@
 use super::super::operator::{wrap, TaskMessage};
-use super::super::operators::pull_log::{PullLogsInput, PullLogsOperator, PullLogsOutput};
-use crate::distance;
+use super::super::operators::pull_log::{PullLogsInput, PullLogsOperator};
 use crate::distance::DistanceFunction;
 use crate::errors::ChromaError;
 use crate::execution::operators::brute_force_knn::{
-    BruteForceKnnOperator, BruteForceKnnOperatorInput, BruteForceKnnOperatorOutput,
-    BruteForceKnnOperatorResult,
+    BruteForceKnnOperator, BruteForceKnnOperatorInput, BruteForceKnnOperatorResult,
 };
 use crate::execution::operators::pull_log::PullLogsResult;
 use crate::sysdb::sysdb::SysDb;
@@ -30,7 +28,7 @@ understand. We can always add more abstraction later if we need it.
 
                                ┌───► Brute Force ─────┐
                                │                      │
-  Pending ─► PullLogs ─► Dedupe│                      ├─► MergeResults ─► Finished
+  Pending ─► PullLogs ─► Group│                       ├─► MergeResults ─► Finished
                                │                      │
                                └───► HNSW ────────────┘
 
@@ -40,7 +38,7 @@ understand. We can always add more abstraction later if we need it.
 enum ExecutionState {
     Pending,
     PullLogs,
-    Dedupe,
+    Partition,
     QueryKnn,
     MergeResults,
     Finished,
@@ -142,7 +140,7 @@ impl HnswQueryOrchestrator {
     ///  Run the orchestrator and return the result.
     ///  # Note
     ///  Use this over spawning the component directly. This method will start the component and
-    /// wait for it to finish before returning the result.
+    ///  wait for it to finish before returning the result.
     pub(crate) async fn run(mut self) -> Result<Vec<Vec<VectorQueryResult>>, Box<dyn ChromaError>> {
         let (tx, rx) = tokio::sync::oneshot::channel();
         self.result_channel = Some(tx);
@@ -175,19 +173,14 @@ impl Handler<PullLogsResult> for HnswQueryOrchestrator {
         message: PullLogsResult,
         ctx: &crate::system::ComponentContext<HnswQueryOrchestrator>,
     ) {
-        self.state = ExecutionState::Dedupe;
+        self.state = ExecutionState::Partition;
 
         // TODO: implement the remaining state transitions and operators
         // TODO: don't need all this cloning and data shuffling, once we land the chunk abstraction
-        let mut dataset = Vec::new();
         match message {
             Ok(logs) => {
-                for log in logs.logs().iter() {
-                    // TODO: only adds have embeddings, unwrap is fine for now
-                    dataset.push(log.embedding.clone().unwrap());
-                }
                 let bf_input = BruteForceKnnOperatorInput {
-                    data: dataset,
+                    data: logs.logs(),
                     query: self.query_vectors[0].clone(),
                     k: self.k as usize,
                     distance_metric: DistanceFunction::Euclidean,
diff --git a/rust/worker/src/execution/orchestration/mod.rs b/rust/worker/src/execution/orchestration/mod.rs
index 902c3eaf84d..2828cfd365a 100644
--- a/rust/worker/src/execution/orchestration/mod.rs
+++ b/rust/worker/src/execution/orchestration/mod.rs
@@ -1,3 +1,3 @@
+mod compact;
 mod hnsw;
-
 pub(crate) use hnsw::*;
diff --git a/rust/worker/src/log/log.rs b/rust/worker/src/log/log.rs
index 5dae688e6d3..0f1c5c6c16c 100644
--- a/rust/worker/src/log/log.rs
+++ b/rust/worker/src/log/log.rs
@@ -38,7 +38,7 @@ pub(crate) trait Log: Send + Sync + LogClone + Debug {
         offset: i64,
         batch_size: i32,
         end_timestamp: Option<i64>,
-    ) -> Result<Vec<Box<EmbeddingRecord>>, PullLogsError>;
+    ) -> Result<Vec<EmbeddingRecord>, PullLogsError>;
 
     async fn get_collections_with_new_data(
         &mut self,
@@ -121,7 +121,7 @@ impl Log for GrpcLog {
         offset: i64,
         batch_size: i32,
         end_timestamp: Option<i64>,
-    ) -> Result<Vec<Box<EmbeddingRecord>>, PullLogsError> {
+    ) -> Result<Vec<EmbeddingRecord>, PullLogsError> {
         let end_timestamp = match end_timestamp {
             Some(end_timestamp) => end_timestamp,
             None => -1,
@@ -227,7 +227,7 @@ pub(crate) struct LogRecord {
     pub(crate) collection_id: String,
     pub(crate) log_id: i64,
     pub(crate) log_id_ts: i64,
-    pub(crate) record: Box<EmbeddingRecord>,
+    pub(crate) record: EmbeddingRecord,
 }
 
 impl Debug for LogRecord {
@@ -268,7 +268,7 @@ impl Log for InMemoryLog {
         offset: i64,
         batch_size: i32,
         end_timestamp: Option<i64>,
-    ) -> Result<Vec<Box<EmbeddingRecord>>, PullLogsError> {
+    ) -> Result<Vec<EmbeddingRecord>, PullLogsError> {
         let end_timestamp = match end_timestamp {
             Some(end_timestamp) => end_timestamp,
             None => i64::MAX,
diff --git a/rust/worker/src/types/embedding_record.rs b/rust/worker/src/types/embedding_record.rs
index cc53631d4bf..6ba9bdf255a 100644
--- a/rust/worker/src/types/embedding_record.rs
+++ b/rust/worker/src/types/embedding_record.rs
@@ -101,7 +101,7 @@ impl TryFrom<SubmitEmbeddingRecordWithSeqId> for EmbeddingRecord {
     }
 }
 
-impl TryFrom<RecordLog> for Box<EmbeddingRecord> {
+impl TryFrom<RecordLog> for EmbeddingRecord {
     type Error = EmbeddingRecordConversionError;
 
     fn try_from(record_log: RecordLog) -> Result<Self, Self::Error> {
@@ -143,7 +143,7 @@ impl TryFrom<RecordLog> for Box<EmbeddingRecord> {
             None => None,
         };
 
-        Ok(Box::new(EmbeddingRecord {
+        Ok(EmbeddingRecord {
             id: proto_submit.id,
             seq_id: seq_id,
             embedding: embedding,
@@ -151,7 +151,7 @@ impl TryFrom<RecordLog> for Box<EmbeddingRecord> {
             metadata: metadata,
             operation: op,
             collection_id: collection_uuid,
-        }))
+        })
     }
 }
 
@@ -364,7 +364,7 @@ mod tests {
             log_id: 42,
             record: Some(proto_submit),
         };
-        let converted_embedding_record = Box::<EmbeddingRecord>::try_from(record_log).unwrap();
+        let converted_embedding_record = EmbeddingRecord::try_from(record_log).unwrap();
         assert_eq!(converted_embedding_record.id, Uuid::nil().to_string());
         assert_eq!(converted_embedding_record.seq_id, BigInt::from(42));
         assert_eq!(