diff --git a/chromadb/db/mixins/sysdb.py b/chromadb/db/mixins/sysdb.py index 303d58488db..bd24df187e1 100644 --- a/chromadb/db/mixins/sysdb.py +++ b/chromadb/db/mixins/sysdb.py @@ -368,6 +368,7 @@ def get_segments( scope=scope, collection=collection, metadata=metadata, + file_paths={}, ) ) diff --git a/chromadb/proto/convert.py b/chromadb/proto/convert.py index 75f73769c8a..2f2a16cf20b 100644 --- a/chromadb/proto/convert.py +++ b/chromadb/proto/convert.py @@ -161,6 +161,7 @@ def from_proto_segment(segment: chroma_pb.Segment) -> Segment: metadata=from_proto_metadata(segment.metadata) if segment.HasField("metadata") else None, + file_paths={name: [path for path in paths.paths] for name, paths in segment.file_paths.items()} ) @@ -173,6 +174,7 @@ def to_proto_segment(segment: Segment) -> chroma_pb.Segment: metadata=None if segment["metadata"] is None else to_proto_update_metadata(segment["metadata"]), + file_paths={name: chroma_pb.FilePaths(paths=paths) for name, paths in segment["file_paths"].items()} ) diff --git a/chromadb/segment/impl/manager/distributed.py b/chromadb/segment/impl/manager/distributed.py index 0f92e0c0007..62ccb3114d3 100644 --- a/chromadb/segment/impl/manager/distributed.py +++ b/chromadb/segment/impl/manager/distributed.py @@ -55,6 +55,7 @@ def prepare_segments_for_new_collection( metadata=PersistentHnswParams.extract(collection.metadata) if collection.metadata else None, + file_paths={}, ) metadata_segment = Segment( id=uuid4(), @@ -62,6 +63,7 @@ def prepare_segments_for_new_collection( scope=SegmentScope.METADATA, collection=collection.id, metadata=None, + file_paths={}, ) record_segment = Segment( id=uuid4(), @@ -69,6 +71,7 @@ def prepare_segments_for_new_collection( scope=SegmentScope.RECORD, collection=collection.id, metadata=None, + file_paths={}, ) return [vector_segment, record_segment, metadata_segment] diff --git a/chromadb/segment/impl/manager/local.py b/chromadb/segment/impl/manager/local.py index 296ace7f9e7..06b152cc7f3 100644 --- a/chromadb/segment/impl/manager/local.py +++ b/chromadb/segment/impl/manager/local.py @@ -263,4 +263,5 @@ def _segment(type: SegmentType, scope: SegmentScope, collection: Collection) -> scope=scope, collection=collection.id, metadata=metadata, + file_paths={}, ) diff --git a/chromadb/test/db/test_system.py b/chromadb/test/db/test_system.py index af90bf876b8..f49a17226cb 100644 --- a/chromadb/test/db/test_system.py +++ b/chromadb/test/db/test_system.py @@ -174,6 +174,7 @@ def sample_segment(collection_id: uuid.UUID = uuid.uuid4(), scope=scope, collection=collection_id, metadata=metadata, + file_paths={}, ) # region Collection tests @@ -287,6 +288,7 @@ def test_update_collections(sysdb: SysDB) -> None: scope=SegmentScope.VECTOR, collection=coll.id, metadata={"test_str": "str1", "test_int": 1, "test_float": 1.3}, + file_paths={}, ) ], metadata=coll["metadata"], @@ -335,6 +337,7 @@ def test_get_or_create_collection(sysdb: SysDB) -> None: scope=SegmentScope.VECTOR, collection=collection.id, metadata={"test_str": "str1", "test_int": 1, "test_float": 1.3}, + file_paths={}, ) ], metadata=collection["metadata"], @@ -355,6 +358,7 @@ def test_get_or_create_collection(sysdb: SysDB) -> None: scope=SegmentScope.VECTOR, collection=sample_collections[1].id, metadata={"test_str": "str1", "test_int": 1, "test_float": 1.3}, + file_paths={}, ) ], # This could have been empty - []. metadata=collection["metadata"], @@ -377,6 +381,7 @@ def test_get_or_create_collection(sysdb: SysDB) -> None: scope=SegmentScope.VECTOR, collection=sample_collections[1].id, metadata={"test_str": "str1", "test_int": 1, "test_float": 1.3}, + file_paths={}, ) ], get_or_create=True, @@ -396,6 +401,7 @@ def test_get_or_create_collection(sysdb: SysDB) -> None: scope=SegmentScope.VECTOR, collection=sample_collections[2].id, metadata={"test_str": "str1", "test_int": 1, "test_float": 1.3}, + file_paths={}, ) ], get_or_create=False, @@ -417,6 +423,7 @@ def test_get_or_create_collection(sysdb: SysDB) -> None: scope=SegmentScope.VECTOR, collection=sample_collections[2].id, metadata={"test_str": "str1", "test_int": 1, "test_float": 1.3}, + file_paths={}, ) ], metadata=collection["metadata"], @@ -439,6 +446,7 @@ def test_get_or_create_collection(sysdb: SysDB) -> None: scope=SegmentScope.VECTOR, collection=sample_collections[2].id, metadata={"test_str": "str1", "test_int": 1, "test_float": 1.3}, + file_paths={}, ) ], get_or_create=True, @@ -765,6 +773,7 @@ def test_get_database_with_tenants(sysdb: SysDB) -> None: scope=SegmentScope.VECTOR, collection=sample_collections[0].id, metadata={"test_str": "str1", "test_int": 1, "test_float": 1.3}, + file_paths={}, ), Segment( id=uuid.UUID("11111111-d7d7-413b-92e1-731098a6e492"), @@ -772,6 +781,7 @@ def test_get_database_with_tenants(sysdb: SysDB) -> None: scope=SegmentScope.VECTOR, collection=sample_collections[1].id, metadata={"test_str": "str2", "test_int": 2, "test_float": 2.3}, + file_paths={}, ), ] @@ -860,6 +870,7 @@ def test_update_segment(sysdb: SysDB) -> None: scope=SegmentScope.VECTOR, collection=sample_collections[0].id, metadata=metadata, + file_paths={}, ) sysdb.reset_state() diff --git a/chromadb/test/distributed/test_sanity.py b/chromadb/test/distributed/test_sanity.py index 34f04759623..8ffbd140ce4 100644 --- a/chromadb/test/distributed/test_sanity.py +++ b/chromadb/test/distributed/test_sanity.py @@ -5,11 +5,14 @@ import time from chromadb.api import ClientAPI from chromadb.test.conftest import ( - COMPACTION_SLEEP, reset, skip_if_not_cluster, ) from chromadb.test.property import invariants +from chromadb.test.utils.wait_for_version_increase import ( + wait_for_version_increase, + get_collection_version, +) import numpy as np @@ -78,7 +81,7 @@ def test_add_include_all_with_compaction_delay(client: ClientAPI) -> None: documents=[documents[-1]], ) - time.sleep(COMPACTION_SLEEP) # Wait for the documents to be compacted + wait_for_version_increase(client, collection.name, get_collection_version(client, collection.name), 120) random_query_1 = np.random.rand(1, 3)[0] random_query_2 = np.random.rand(1, 3)[0] diff --git a/chromadb/test/segment/distributed/test_protobuf_translation.py b/chromadb/test/segment/distributed/test_protobuf_translation.py index 6fd3777abec..7cfc8b26f3f 100644 --- a/chromadb/test/segment/distributed/test_protobuf_translation.py +++ b/chromadb/test/segment/distributed/test_protobuf_translation.py @@ -28,6 +28,7 @@ def unstarted_grpc_metadata_segment() -> GrpcMetadataSegment: metadata={ "grpc_url": "test", }, + file_paths={}, ) grpc_metadata_segment = GrpcMetadataSegment( system=system, diff --git a/chromadb/test/segment/test_metadata.py b/chromadb/test/segment/test_metadata.py index 50bab861800..bb0f40e4234 100644 --- a/chromadb/test/segment/test_metadata.py +++ b/chromadb/test/segment/test_metadata.py @@ -129,6 +129,7 @@ def _build_document(i: int) -> str: scope=SegmentScope.METADATA, collection=uuid.UUID(int=0), metadata=None, + file_paths={}, ) segment_definition2 = Segment( @@ -137,6 +138,7 @@ def _build_document(i: int) -> str: scope=SegmentScope.METADATA, collection=uuid.UUID(int=1), metadata=None, + file_paths={}, ) diff --git a/chromadb/test/segment/test_vector.py b/chromadb/test/segment/test_vector.py index 0d62c827461..87375940276 100644 --- a/chromadb/test/segment/test_vector.py +++ b/chromadb/test/segment/test_vector.py @@ -115,6 +115,7 @@ def create_random_segment_definition() -> Segment: scope=SegmentScope.VECTOR, collection=uuid.UUID(int=0), metadata=test_hnsw_config, + file_paths={}, ) diff --git a/chromadb/types.py b/chromadb/types.py index 7bce5dac332..bffa95878de 100644 --- a/chromadb/types.py +++ b/chromadb/types.py @@ -176,6 +176,7 @@ class Segment(TypedDict): scope: SegmentScope collection: UUID metadata: Optional[Metadata] + file_paths: Mapping[str, Sequence[str]] class CollectionSegments(TypedDict): collection: Collection