Skip to content

Commit

Permalink
[ENH] Deprecate FetchSegmentOperator (#3261)
Browse files Browse the repository at this point in the history
## Description of changes

*Summarize the changes made by this PR.*
 - Improvements & Bug fixes
   - N/A
 - New functionality
   - Deprecate `FetchSegmentOperator` since the query node already receives the full collection information from the frontend.

## Test plan
*How are these changes tested?*

- [ ] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust

## Documentation Changes
*Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs repository](https://github.com/chroma-core/docs)?*
N/A
  • Loading branch information
Sicheng-Pan authored Dec 17, 2024
1 parent 094424a commit c7c4dda
Show file tree
Hide file tree
Showing 14 changed files with 307 additions and 648 deletions.
3 changes: 0 additions & 3 deletions chromadb/proto/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,9 +573,6 @@ def to_proto_where_document(where_document: WhereDocument) -> chroma_pb.WhereDoc
def to_proto_scan(scan: Scan) -> query_pb.ScanOperator:
return query_pb.ScanOperator(
collection=to_proto_collection(scan.collection),
knn_id=scan.knn["id"].hex,
metadata_id=scan.metadata["id"].hex,
record_id=scan.record["id"].hex,
knn=to_proto_segment(scan.knn),
metadata=to_proto_segment(scan.metadata),
record=to_proto_segment(scan.record),
Expand Down
64 changes: 32 additions & 32 deletions chromadb/proto/query_executor_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 2 additions & 8 deletions chromadb/proto/query_executor_pb2.pyi

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 2 additions & 6 deletions idl/chromadb/proto/query_executor.proto
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,8 @@ import "chromadb/proto/chroma.proto";

message ScanOperator {
Collection collection = 1;
// Deprecated
string knn_id = 2;
// Deprecated
string metadata_id = 3;
// Deprecated
string record_id = 4;
// Reserve for deprecated fields
reserved 2, 3, 4;
Segment knn = 5;
Segment metadata = 6;
Segment record = 7;
Expand Down
39 changes: 38 additions & 1 deletion rust/types/src/collection.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use super::{Metadata, MetadataValueConversionError};
use crate::chroma_proto;
use crate::{chroma_proto, ConversionError, Segment};
use chroma_error::{ChromaError, ErrorCodes};
use thiserror::Error;
use uuid::Uuid;
Expand Down Expand Up @@ -89,6 +89,43 @@ impl TryFrom<chroma_proto::Collection> for Collection {
}
}

#[derive(Clone, Debug)]
pub struct CollectionAndSegments {
pub collection: Collection,
pub metadata_segment: Segment,
pub record_segment: Segment,
pub vector_segment: Segment,
}

impl TryFrom<chroma_proto::ScanOperator> for CollectionAndSegments {
type Error = ConversionError;

fn try_from(value: chroma_proto::ScanOperator) -> Result<Self, ConversionError> {
Ok(Self {
collection: value
.collection
.ok_or(ConversionError::DecodeError)?
.try_into()
.map_err(|_| ConversionError::DecodeError)?,
metadata_segment: value
.metadata
.ok_or(ConversionError::DecodeError)?
.try_into()
.map_err(|_| ConversionError::DecodeError)?,
record_segment: value
.record
.ok_or(ConversionError::DecodeError)?
.try_into()
.map_err(|_| ConversionError::DecodeError)?,
vector_segment: value
.knn
.ok_or(ConversionError::DecodeError)?
.try_into()
.map_err(|_| ConversionError::DecodeError)?,
})
}
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
2 changes: 1 addition & 1 deletion rust/types/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pub enum ConversionError {
impl ChromaError for ConversionError {
fn code(&self) -> ErrorCodes {
match self {
ConversionError::DecodeError => ErrorCodes::Internal,
ConversionError::DecodeError => ErrorCodes::InvalidArgument,
}
}
}
Expand Down
137 changes: 0 additions & 137 deletions rust/worker/src/execution/operators/fetch_segment.rs

This file was deleted.

1 change: 0 additions & 1 deletion rust/worker/src/execution/operators/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ pub(super) mod write_segments;

// Required for benchmark
pub mod fetch_log;
pub mod fetch_segment;
pub mod filter;
pub mod knn;
pub mod knn_hnsw;
Expand Down
17 changes: 4 additions & 13 deletions rust/worker/src/execution/operators/prefetch_record.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
use std::collections::HashSet;

use chroma_blockstore::provider::BlockfileProvider;
use chroma_error::{ChromaError, ErrorCodes};
use chroma_types::{Chunk, LogRecord, Segment};
use thiserror::Error;
use tonic::async_trait;
use tracing::{trace, Instrument, Span};
Expand All @@ -16,16 +14,15 @@ use crate::{
},
};

use super::projection::ProjectionInput;

/// The `PrefetchRecordOperator` prefetches the relevant records from the record segments to the cache
///
/// # Parameters
/// None
///
/// # Inputs
/// - `logs`: The latest logs of the collection
/// - `blockfile_provider`: The blockfile provider
/// - `record_segment`: The record segment information
/// - `offset_ids`: The offset ids of the records to prefetch
/// Identical to ProjectionInput
///
/// # Outputs
/// None
Expand All @@ -35,13 +32,7 @@ use crate::{
#[derive(Debug)]
pub struct PrefetchRecordOperator {}

#[derive(Debug)]
pub struct PrefetchRecordInput {
pub logs: Chunk<LogRecord>,
pub blockfile_provider: BlockfileProvider,
pub record_segment: Segment,
pub offset_ids: Vec<u32>,
}
pub type PrefetchRecordInput = ProjectionInput;

pub type PrefetchRecordOutput = ();

Expand Down
Loading

0 comments on commit c7c4dda

Please sign in to comment.