From 71178d2dac1236d69ba0f98c7ad3e868b0e39a51 Mon Sep 17 00:00:00 2001 From: Itai Smith Date: Sun, 1 Dec 2024 12:19:56 -0800 Subject: [PATCH] Modify list_collections client methods to return a list of collection names --- chromadb/api/__init__.py | 11 ++++----- chromadb/api/async_api.py | 8 +++---- chromadb/api/async_client.py | 14 ++++------- chromadb/api/client.py | 8 +++---- chromadb/api/models/Collection.py | 23 +++++++++++++++++++ chromadb/cli/cli.py | 3 ++- chromadb/test/client/test_database_tenant.py | 20 ++++++++++------ .../test_multiple_clients_concurrency.py | 3 ++- chromadb/test/property/test_collections.py | 2 +- chromadb/test/test_api.py | 3 ++- .../pages/reference/py-client.md | 4 ++-- 11 files changed, 63 insertions(+), 36 deletions(-) diff --git a/chromadb/api/__init__.py b/chromadb/api/__init__.py index d443f9ab0051..057e143d4c35 100644 --- a/chromadb/api/__init__.py +++ b/chromadb/api/__init__.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Sequence, Optional +from typing import Sequence, Optional, List from uuid import UUID from overrides import override @@ -31,8 +31,7 @@ from chromadb.config import Component, Settings from chromadb.types import Database, Tenant, Collection as CollectionModel import chromadb.utils.embedding_functions as ef -from chromadb.api.models.Collection import Collection - +from chromadb.api.models.Collection import Collection, CollectionName # Re-export the async version from chromadb.api.async_api import ( # noqa: F401 @@ -347,19 +346,19 @@ def list_collections( self, limit: Optional[int] = None, offset: Optional[int] = None, - ) -> Sequence[Collection]: + ) -> List[CollectionName]: """List all collections. Args: limit: The maximum number of entries to return. Defaults to None. offset: The number of entries to skip before returning. Defaults to None. Returns: - Sequence[Collection]: A list of collections + List[CollectionName]: A list of collection names Examples: ```python client.list_collections() - # [collection(name="my_collection", metadata={})] + # ["my_collection"] ``` """ pass diff --git a/chromadb/api/async_api.py b/chromadb/api/async_api.py index 8396d1e9a97a..529e10e9761a 100644 --- a/chromadb/api/async_api.py +++ b/chromadb/api/async_api.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Sequence, Optional +from typing import Sequence, Optional, List from uuid import UUID from overrides import override @@ -338,19 +338,19 @@ async def list_collections( self, limit: Optional[int] = None, offset: Optional[int] = None, - ) -> Sequence[AsyncCollection]: + ) -> List[str]: """List all collections. Args: limit: The maximum number of entries to return. Defaults to None. offset: The number of entries to skip before returning. Defaults to None. Returns: - Sequence[Collection]: A list of collections + List[str]: A list of collection names. Examples: ```python await client.list_collections() - # [collection(name="my_collection", metadata={})] + # ["my_collection"] ``` """ pass diff --git a/chromadb/api/async_client.py b/chromadb/api/async_client.py index 56491e5ca819..b45384ae127d 100644 --- a/chromadb/api/async_client.py +++ b/chromadb/api/async_client.py @@ -1,7 +1,9 @@ import httpx -from typing import Optional, Sequence +from typing import Optional, Sequence, List from uuid import UUID from overrides import override + +from chromadb.api.models.Collection import CollectionName from chromadb.auth import UserIdentity from chromadb.auth.utils import maybe_set_tenant_and_database from chromadb.api import AsyncAdminAPI, AsyncClientAPI, AsyncServerAPI @@ -152,17 +154,11 @@ async def heartbeat(self) -> int: @override async def list_collections( self, limit: Optional[int] = None, offset: Optional[int] = None - ) -> Sequence[AsyncCollection]: + ) -> List[CollectionName]: models = await self._server.list_collections( limit, offset, tenant=self.tenant, database=self.database ) - return [ - AsyncCollection( - client=self._server, - model=model, - ) - for model in models - ] + return [CollectionName(model.name) for model in models] @override async def count_collections(self) -> int: diff --git a/chromadb/api/client.py b/chromadb/api/client.py index de9ca1e7115b..e612437f19d7 100644 --- a/chromadb/api/client.py +++ b/chromadb/api/client.py @@ -1,4 +1,4 @@ -from typing import Optional, Sequence +from typing import Optional, Sequence, List from uuid import UUID from overrides import override @@ -25,7 +25,7 @@ from chromadb.auth.utils import maybe_set_tenant_and_database from chromadb.config import Settings, System from chromadb.config import DEFAULT_TENANT, DEFAULT_DATABASE -from chromadb.api.models.Collection import Collection +from chromadb.api.models.Collection import Collection, CollectionName from chromadb.errors import ChromaError from chromadb.types import Database, Tenant, Where, WhereDocument import chromadb.utils.embedding_functions as ef @@ -118,9 +118,9 @@ def heartbeat(self) -> int: @override def list_collections( self, limit: Optional[int] = None, offset: Optional[int] = None - ) -> Sequence[Collection]: + ) -> List[CollectionName]: return [ - Collection(client=self._server, model=model) + CollectionName(model.name) for model in self._server.list_collections( limit, offset, tenant=self.tenant, database=self.database ) diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index 4194f20e2bab..c60244685cd2 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -1,3 +1,4 @@ +import inspect from typing import TYPE_CHECKING, Optional, Union from chromadb.api.models.CollectionCommon import CollectionCommon @@ -380,3 +381,25 @@ def delete( tenant=self.tenant, database=self.database, ) + + + +class CollectionName(str): + """ + A string wrapper to supply users with indicative message about list_collections only + returning collection names, in lieu of Collection object. + """ + + def __getattr__(self, name): + collection_attributes_and_methods = [ + 'add', 'configuration_json', 'count', 'database', + 'delete', 'get', 'get_model', 'id', 'metadata', 'modify', + 'name', 'peek', 'query', 'tenant', 'update', 'upsert' + ] + + if name in collection_attributes_and_methods: + raise NotImplementedError( + f"In Chroma v0.6.0, list_collections only returns collection names. " + f"Use get_collection to access Collection.{name}. " + f"See https://docs.trychroma.com/deployment/migration for more information." + ) diff --git a/chromadb/cli/cli.py b/chromadb/cli/cli.py index 14ab1bc34a3c..11579d19d1cb 100644 --- a/chromadb/cli/cli.py +++ b/chromadb/cli/cli.py @@ -154,7 +154,8 @@ def vacuum( sqlite, system.instance(SegmentManager) ) - for collection in collections: + for collection_name in collections: + collection = client.get_collection(collection_name) sqlite.purge_log(collection_id=collection.id) progress.update(task, advance=1) except Exception as e: diff --git a/chromadb/test/client/test_database_tenant.py b/chromadb/test/client/test_database_tenant.py index 1c84e013812e..4ccb678401fa 100644 --- a/chromadb/test/client/test_database_tenant.py +++ b/chromadb/test/client/test_database_tenant.py @@ -24,32 +24,38 @@ def test_database_tenant_collections(client_factories: ClientFactories) -> None: # List collections in the default database collections = client.list_collections() assert len(collections) == 1 - assert collections[0].name == "collection" - assert collections[0].metadata == {"database": DEFAULT_DATABASE} + assert collections[0] == "collection" + collection = client.get_collection(collections[0]) + assert collection.metadata == {"database": DEFAULT_DATABASE} # List collections in the new database client.set_tenant(tenant=DEFAULT_TENANT, database="test_db") collections = client.list_collections() assert len(collections) == 1 - assert collections[0].metadata == {"database": "test_db"} + collection = client.get_collection(collections[0]) + assert collection.metadata == {"database": "test_db"} # Update the metadata in both databases to different values client.set_tenant(tenant=DEFAULT_TENANT, database=DEFAULT_DATABASE) - client.list_collections()[0].modify(metadata={"database": "default2"}) + collection = client.get_collection(client.list_collections()[0]) + collection.modify(metadata={"database": "default2"}) client.set_tenant(tenant=DEFAULT_TENANT, database="test_db") - client.list_collections()[0].modify(metadata={"database": "test_db2"}) + collection = client.get_collection(client.list_collections()[0]) + collection.modify(metadata={"database": "test_db2"}) # Validate that the metadata was updated client.set_tenant(tenant=DEFAULT_TENANT, database=DEFAULT_DATABASE) collections = client.list_collections() assert len(collections) == 1 - assert collections[0].metadata == {"database": "default2"} + collection = client.get_collection(collections[0]) + assert collection.metadata == {"database": "default2"} client.set_tenant(tenant=DEFAULT_TENANT, database="test_db") collections = client.list_collections() assert len(collections) == 1 - assert collections[0].metadata == {"database": "test_db2"} + collection = client.get_collection(collections[0]) + assert collection.metadata == {"database": "test_db2"} # Delete the collections and make sure databases are isolated client.set_tenant(tenant=DEFAULT_TENANT, database=DEFAULT_DATABASE) diff --git a/chromadb/test/client/test_multiple_clients_concurrency.py b/chromadb/test/client/test_multiple_clients_concurrency.py index a9e855fb4354..458fd3c23534 100644 --- a/chromadb/test/client/test_multiple_clients_concurrency.py +++ b/chromadb/test/client/test_multiple_clients_concurrency.py @@ -44,6 +44,7 @@ def run_target(n: int) -> None: client.set_database(database) seen_collections = client.list_collections() assert len(seen_collections) == COLLECTION_COUNT - for collection in seen_collections: + for collection_name in seen_collections: + collection = client.get_collection(collection_name) assert collection.name in collections assert collection.metadata == {"database": database} diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index 8dd10837ce16..4b3eaecba09d 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -89,7 +89,7 @@ def list_collections(self) -> None: colls = self.client.list_collections() assert len(colls) == len(self.model) for c in colls: - assert c.name in self.model + assert c in self.model # @rule for list_collections with limit and offset @rule( diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index ab91408c992a..0be44498e32a 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -485,7 +485,8 @@ def test_metadata_cru(client): # Test list collections collections = client.list_collections() - for collection in collections: + for collection_name in collections: + collection = client.get_collection(collection_name) if collection.name == "testspace": assert collection.metadata is not None assert collection.metadata["a"] == 2 diff --git a/docs/docs.trychroma.com/pages/reference/py-client.md b/docs/docs.trychroma.com/pages/reference/py-client.md index 2ab4bac6318c..e9be33571f60 100644 --- a/docs/docs.trychroma.com/pages/reference/py-client.md +++ b/docs/docs.trychroma.com/pages/reference/py-client.md @@ -257,10 +257,10 @@ class ClientAPI(BaseAPI, ABC) ```python def list_collections(limit: Optional[int] = None, - offset: Optional[int] = None) -> Sequence[Collection] + offset: Optional[int] = None) -> List[str] ``` -List all collections. +List all collection names. **Arguments**: