Skip to content

Commit

Permalink
Add file paths to segment
Browse files Browse the repository at this point in the history
  • Loading branch information
Sicheng Pan committed Dec 6, 2024
1 parent 6107539 commit 513c387
Show file tree
Hide file tree
Showing 10 changed files with 28 additions and 2 deletions.
1 change: 1 addition & 0 deletions chromadb/db/mixins/sysdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ def get_segments(
scope=scope,
collection=collection,
metadata=metadata,
file_paths={},
)
)

Expand Down
2 changes: 2 additions & 0 deletions chromadb/proto/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ def from_proto_segment(segment: chroma_pb.Segment) -> Segment:
metadata=from_proto_metadata(segment.metadata)
if segment.HasField("metadata")
else None,
file_paths={name: [path for path in paths.paths] for name, paths in segment.file_paths.items()}
)


Expand All @@ -173,6 +174,7 @@ def to_proto_segment(segment: Segment) -> chroma_pb.Segment:
metadata=None
if segment["metadata"] is None
else to_proto_update_metadata(segment["metadata"]),
file_paths={name: chroma_pb.FilePaths(paths=paths) for name, paths in segment["file_paths"].items()}
)


Expand Down
3 changes: 3 additions & 0 deletions chromadb/segment/impl/manager/distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,20 +55,23 @@ def prepare_segments_for_new_collection(
metadata=PersistentHnswParams.extract(collection.metadata)
if collection.metadata
else None,
file_paths={},
)
metadata_segment = Segment(
id=uuid4(),
type=SegmentType.BLOCKFILE_METADATA.value,
scope=SegmentScope.METADATA,
collection=collection.id,
metadata=None,
file_paths={},
)
record_segment = Segment(
id=uuid4(),
type=SegmentType.BLOCKFILE_RECORD.value,
scope=SegmentScope.RECORD,
collection=collection.id,
metadata=None,
file_paths={},
)
return [vector_segment, record_segment, metadata_segment]

Expand Down
1 change: 1 addition & 0 deletions chromadb/segment/impl/manager/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,4 +263,5 @@ def _segment(type: SegmentType, scope: SegmentScope, collection: Collection) ->
scope=scope,
collection=collection.id,
metadata=metadata,
file_paths={},
)
11 changes: 11 additions & 0 deletions chromadb/test/db/test_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def sample_segment(collection_id: uuid.UUID = uuid.uuid4(),
scope=scope,
collection=collection_id,
metadata=metadata,
file_paths={},
)

# region Collection tests
Expand Down Expand Up @@ -287,6 +288,7 @@ def test_update_collections(sysdb: SysDB) -> None:
scope=SegmentScope.VECTOR,
collection=coll.id,
metadata={"test_str": "str1", "test_int": 1, "test_float": 1.3},
file_paths={},
)
],
metadata=coll["metadata"],
Expand Down Expand Up @@ -335,6 +337,7 @@ def test_get_or_create_collection(sysdb: SysDB) -> None:
scope=SegmentScope.VECTOR,
collection=collection.id,
metadata={"test_str": "str1", "test_int": 1, "test_float": 1.3},
file_paths={},
)
],
metadata=collection["metadata"],
Expand All @@ -355,6 +358,7 @@ def test_get_or_create_collection(sysdb: SysDB) -> None:
scope=SegmentScope.VECTOR,
collection=sample_collections[1].id,
metadata={"test_str": "str1", "test_int": 1, "test_float": 1.3},
file_paths={},
)
], # This could have been empty - [].
metadata=collection["metadata"],
Expand All @@ -377,6 +381,7 @@ def test_get_or_create_collection(sysdb: SysDB) -> None:
scope=SegmentScope.VECTOR,
collection=sample_collections[1].id,
metadata={"test_str": "str1", "test_int": 1, "test_float": 1.3},
file_paths={},
)
],
get_or_create=True,
Expand All @@ -396,6 +401,7 @@ def test_get_or_create_collection(sysdb: SysDB) -> None:
scope=SegmentScope.VECTOR,
collection=sample_collections[2].id,
metadata={"test_str": "str1", "test_int": 1, "test_float": 1.3},
file_paths={},
)
],
get_or_create=False,
Expand All @@ -417,6 +423,7 @@ def test_get_or_create_collection(sysdb: SysDB) -> None:
scope=SegmentScope.VECTOR,
collection=sample_collections[2].id,
metadata={"test_str": "str1", "test_int": 1, "test_float": 1.3},
file_paths={},
)
],
metadata=collection["metadata"],
Expand All @@ -439,6 +446,7 @@ def test_get_or_create_collection(sysdb: SysDB) -> None:
scope=SegmentScope.VECTOR,
collection=sample_collections[2].id,
metadata={"test_str": "str1", "test_int": 1, "test_float": 1.3},
file_paths={},
)
],
get_or_create=True,
Expand Down Expand Up @@ -765,13 +773,15 @@ def test_get_database_with_tenants(sysdb: SysDB) -> None:
scope=SegmentScope.VECTOR,
collection=sample_collections[0].id,
metadata={"test_str": "str1", "test_int": 1, "test_float": 1.3},
file_paths={},
),
Segment(
id=uuid.UUID("11111111-d7d7-413b-92e1-731098a6e492"),
type="test_type_b",
scope=SegmentScope.VECTOR,
collection=sample_collections[1].id,
metadata={"test_str": "str2", "test_int": 2, "test_float": 2.3},
file_paths={},
),
]

Expand Down Expand Up @@ -860,6 +870,7 @@ def test_update_segment(sysdb: SysDB) -> None:
scope=SegmentScope.VECTOR,
collection=sample_collections[0].id,
metadata=metadata,
file_paths={},
)

sysdb.reset_state()
Expand Down
7 changes: 5 additions & 2 deletions chromadb/test/distributed/test_sanity.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
import time
from chromadb.api import ClientAPI
from chromadb.test.conftest import (
COMPACTION_SLEEP,
reset,
skip_if_not_cluster,
)
from chromadb.test.property import invariants
from chromadb.test.utils.wait_for_version_increase import (
wait_for_version_increase,
get_collection_version,
)
import numpy as np


Expand Down Expand Up @@ -78,7 +81,7 @@ def test_add_include_all_with_compaction_delay(client: ClientAPI) -> None:
documents=[documents[-1]],
)

time.sleep(COMPACTION_SLEEP) # Wait for the documents to be compacted
wait_for_version_increase(client, collection.name, get_collection_version(client, collection.name), 120)

random_query_1 = np.random.rand(1, 3)[0]
random_query_2 = np.random.rand(1, 3)[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def unstarted_grpc_metadata_segment() -> GrpcMetadataSegment:
metadata={
"grpc_url": "test",
},
file_paths={},
)
grpc_metadata_segment = GrpcMetadataSegment(
system=system,
Expand Down
2 changes: 2 additions & 0 deletions chromadb/test/segment/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def _build_document(i: int) -> str:
scope=SegmentScope.METADATA,
collection=uuid.UUID(int=0),
metadata=None,
file_paths={},
)

segment_definition2 = Segment(
Expand All @@ -137,6 +138,7 @@ def _build_document(i: int) -> str:
scope=SegmentScope.METADATA,
collection=uuid.UUID(int=1),
metadata=None,
file_paths={},
)


Expand Down
1 change: 1 addition & 0 deletions chromadb/test/segment/test_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def create_random_segment_definition() -> Segment:
scope=SegmentScope.VECTOR,
collection=uuid.UUID(int=0),
metadata=test_hnsw_config,
file_paths={},
)


Expand Down
1 change: 1 addition & 0 deletions chromadb/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ class Segment(TypedDict):
scope: SegmentScope
collection: UUID
metadata: Optional[Metadata]
file_paths: Mapping[str, Sequence[str]]

class CollectionSegments(TypedDict):
collection: Collection
Expand Down

0 comments on commit 513c387

Please sign in to comment.