Skip to content

Commit

Permalink
Add comments for rank
Browse files Browse the repository at this point in the history
  • Loading branch information
Sicheng Pan committed Dec 20, 2024
1 parent 72207e1 commit c6192f7
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 4 deletions.
2 changes: 2 additions & 0 deletions rust/blockstore/src/arrow/blockfile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,8 @@ impl<'me, K: ArrowReadableKey<'me> + Into<KeyWrapper>, V: ArrowReadableValue<'me
self.root.id
}

/// Returns the number of elements strictly less than the given prefix-key pair in the blockfile
/// In other words, the rank is the position where the given prefix-key pair can be inserted while maintaining the order of the blockfile
pub(crate) async fn rank(
&'me self,
prefix: &'me str,
Expand Down
9 changes: 5 additions & 4 deletions rust/worker/src/segment/record_segment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -828,8 +828,7 @@ impl RecordSegmentReader<'_> {
self.id_to_data.contains("", offset_id).await
}

/// Returns all data in the record segment, sorted by
/// embedding id
/// Returns all data in the record segment, sorted by their offset ids
#[allow(dead_code)]
pub(crate) async fn get_all_data(&self) -> Result<Vec<DataRecord>, Box<dyn ChromaError>> {
self.id_to_data
Expand All @@ -838,6 +837,7 @@ impl RecordSegmentReader<'_> {
.map(|vec| vec.into_iter().map(|(_, data)| data).collect())
}

/// Get a stream of offset ids from the smallest to the largest in the given range
pub(crate) fn get_offset_stream<'me>(
&'me self,
offset_range: impl RangeBounds<u32> + Clone + Send + 'me,
Expand All @@ -847,8 +847,9 @@ impl RecordSegmentReader<'_> {
.map(|res| res.map(|(offset_id, _)| offset_id))
}

// Find the rank of the given offset id in the record segment
// The implemention is based on std binary search
/// Find the rank of the given offset id in the record segment
/// The rank of an offset id is the number of offset ids strictly smaller than it
/// In other words, it is the position where the given offset id can be inserted without breaking the order
pub(crate) async fn get_offset_id_rank(
&self,
target_oid: u32,
Expand Down

0 comments on commit c6192f7

Please sign in to comment.