Skip to content

Commit

Permalink
[STACKED chroma-core#1255] [ENH] Add multitenancy (chroma-core#1244)
Browse files Browse the repository at this point in the history
## Description of changes

*Summarize the changes made by this PR.*
 - Improvements & Bug fixes
	 - ...
 - New functionality
- Adds multitenancy and databases as first class concepts by migrating
the db and plumbing it through the API into the sysdb
- We now treat the "System" as a singleton-per-path and create a wrapper
API object that proxies to it with context on the tenant/database. In
this way the server is context-unaware about the connection.

## Test plan
*How are these changes tested?*
Unit Tests were added for new client tenant/database behavior
Property tests were added for the new tenant/database behavior by
subclassing the collection state machine and switching the
tenant/database as a state machine transition.

- [ ] Tests pass locally with `pytest` for python, `yarn test` for js

## Documentation Changes
I will add a section to the docs about multitenancy and how to use it.
We can remove warnings about the client being a singleton.
  • Loading branch information
HammadB authored Oct 24, 2023
1 parent e95de3d commit 0552704
Show file tree
Hide file tree
Showing 43 changed files with 2,380 additions and 240 deletions.
68 changes: 48 additions & 20 deletions chromadb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from typing import Dict
import logging
from chromadb.api.client import Client as ClientCreator
from chromadb.api.client import AdminClient as AdminClientCreator
import chromadb.config
from chromadb.config import Settings, System
from chromadb.api import API
from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT, Settings
from chromadb.api import AdminAPI, ClientAPI
from chromadb.api.models.Collection import Collection
from chromadb.api.types import (
CollectionMetadata,
Expand Down Expand Up @@ -35,9 +37,6 @@
"QueryResult",
"GetResult",
]
from chromadb.telemetry.product.events import ClientStartEvent
from chromadb.telemetry.product import ProductTelemetryClient


logger = logging.getLogger(__name__)

Expand All @@ -55,7 +54,7 @@

is_client = False
try:
from chromadb.is_thin_client import is_thin_client # type: ignore
from chromadb.is_thin_client import is_thin_client

is_client = is_thin_client
except ImportError:
Expand Down Expand Up @@ -95,28 +94,43 @@ def get_settings() -> Settings:
return __settings


def EphemeralClient(settings: Settings = Settings()) -> API:
def EphemeralClient(
settings: Settings = Settings(),
tenant: str = DEFAULT_TENANT,
database: str = DEFAULT_DATABASE,
) -> ClientAPI:
"""
Creates an in-memory instance of Chroma. This is useful for testing and
development, but not recommended for production use.
Args:
tenant: The tenant to use for this client. Defaults to the default tenant.
database: The database to use for this client. Defaults to the default database.
"""
settings.is_persistent = False

return Client(settings)
return ClientCreator(settings=settings, tenant=tenant, database=database)


def PersistentClient(path: str = "./chroma", settings: Settings = Settings()) -> API:
def PersistentClient(
path: str = "./chroma",
settings: Settings = Settings(),
tenant: str = DEFAULT_TENANT,
database: str = DEFAULT_DATABASE,
) -> ClientAPI:
"""
Creates a persistent instance of Chroma that saves to disk. This is useful for
testing and development, but not recommended for production use.
Args:
path: The directory to save Chroma's data to. Defaults to "./chroma".
tenant: The tenant to use for this client. Defaults to the default tenant.
database: The database to use for this client. Defaults to the default database.
"""
settings.persist_directory = path
settings.is_persistent = True

return Client(settings)
return ClientCreator(tenant=tenant, database=database, settings=settings)


def HttpClient(
Expand All @@ -125,7 +139,9 @@ def HttpClient(
ssl: bool = False,
headers: Dict[str, str] = {},
settings: Settings = Settings(),
) -> API:
tenant: str = DEFAULT_TENANT,
database: str = DEFAULT_DATABASE,
) -> ClientAPI:
"""
Creates a client that connects to a remote Chroma server. This supports
many clients connecting to the same server, and is the recommended way to
Expand All @@ -136,6 +152,8 @@ def HttpClient(
port: The port of the Chroma server. Defaults to "8000".
ssl: Whether to use SSL to connect to the Chroma server. Defaults to False.
headers: A dictionary of headers to send to the Chroma server. Defaults to {}.
tenant: The tenant to use for this client. Defaults to the default tenant.
database: The database to use for this client. Defaults to the default database.
"""

settings.chroma_api_impl = "chromadb.api.fastapi.FastAPI"
Expand All @@ -144,19 +162,29 @@ def HttpClient(
settings.chroma_server_ssl_enabled = ssl
settings.chroma_server_headers = headers

return Client(settings)
return ClientCreator(tenant=tenant, database=database, settings=settings)


def Client(settings: Settings = __settings) -> API:
"""Return a running chroma.API instance"""
def Client(
settings: Settings = __settings,
tenant: str = DEFAULT_TENANT,
database: str = DEFAULT_DATABASE,
) -> ClientAPI:
"""
Return a running chroma.API instance
system = System(settings)
tenant: The tenant to use for this client. Defaults to the default tenant.
database: The database to use for this client. Defaults to the default database.
product_telemetry_client = system.instance(ProductTelemetryClient)
api = system.instance(API)
"""

return ClientCreator(tenant=tenant, database=database, settings=settings)

system.start()

product_telemetry_client.capture(ClientStartEvent())
def AdminClient(settings: Settings = Settings()) -> AdminAPI:
"""
return api
Creates an admin client that can be used to create tenants and databases.
"""
return AdminClientCreator(settings=settings)
145 changes: 142 additions & 3 deletions chromadb/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from abc import ABC, abstractmethod
from typing import Sequence, Optional
from uuid import UUID

from overrides import override
from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT
from chromadb.api.models.Collection import Collection
from chromadb.api.types import (
CollectionMetadata,
Expand All @@ -16,10 +19,11 @@
WhereDocument,
)
from chromadb.config import Component, Settings
from chromadb.types import Database, Tenant
import chromadb.utils.embedding_functions as ef


class API(Component, ABC):
class BaseAPI(ABC):
@abstractmethod
def heartbeat(self) -> int:
"""Get the current time in nanoseconds since epoch.
Expand Down Expand Up @@ -371,10 +375,10 @@ def get_version(self) -> str:

@abstractmethod
def get_settings(self) -> Settings:
"""Get the settings used to initialize the client.
"""Get the settings used to initialize.
Returns:
Settings: The settings used to initialize the client.
Settings: The settings used to initialize.
"""
pass
Expand All @@ -385,3 +389,138 @@ def max_batch_size(self) -> int:
"""Return the maximum number of records that can be submitted in a single call
to submit_embeddings."""
pass


class ClientAPI(BaseAPI, ABC):
tenant: str
database: str

@abstractmethod
def set_tenant(self, tenant: str, database: str = DEFAULT_DATABASE) -> None:
"""Set the tenant and database for the client. Raises an error if the tenant or
database does not exist.
Args:
tenant: The tenant to set.
database: The database to set.
"""
pass

@abstractmethod
def set_database(self, database: str) -> None:
"""Set the database for the client. Raises an error if the database does not exist.
Args:
database: The database to set.
"""
pass

@staticmethod
@abstractmethod
def clear_system_cache() -> None:
"""Clear the system cache so that new systems can be created for an existing path.
This should only be used for testing purposes."""
pass


class AdminAPI(ABC):
@abstractmethod
def create_database(self, name: str, tenant: str = DEFAULT_TENANT) -> None:
"""Create a new database. Raises an error if the database already exists.
Args:
database: The name of the database to create.
"""
pass

@abstractmethod
def get_database(self, name: str, tenant: str = DEFAULT_TENANT) -> Database:
"""Get a database. Raises an error if the database does not exist.
Args:
database: The name of the database to get.
tenant: The tenant of the database to get.
"""
pass

@abstractmethod
def create_tenant(self, name: str) -> None:
"""Create a new tenant. Raises an error if the tenant already exists.
Args:
tenant: The name of the tenant to create.
"""
pass

@abstractmethod
def get_tenant(self, name: str) -> Tenant:
"""Get a tenant. Raises an error if the tenant does not exist.
Args:
tenant: The name of the tenant to get.
"""
pass


class ServerAPI(BaseAPI, AdminAPI, Component):
"""An API instance that extends the relevant Base API methods by passing
in a tenant and database. This is the root component of the Chroma System"""

@abstractmethod
@override
def list_collections(
self, tenant: str = DEFAULT_TENANT, database: str = DEFAULT_DATABASE
) -> Sequence[Collection]:
pass

@abstractmethod
@override
def create_collection(
self,
name: str,
metadata: Optional[CollectionMetadata] = None,
embedding_function: Optional[EmbeddingFunction] = ef.DefaultEmbeddingFunction(),
get_or_create: bool = False,
tenant: str = DEFAULT_TENANT,
database: str = DEFAULT_DATABASE,
) -> Collection:
pass

@abstractmethod
@override
def get_collection(
self,
name: str,
embedding_function: Optional[EmbeddingFunction] = ef.DefaultEmbeddingFunction(),
tenant: str = DEFAULT_TENANT,
database: str = DEFAULT_DATABASE,
) -> Collection:
pass

@abstractmethod
@override
def get_or_create_collection(
self,
name: str,
metadata: Optional[CollectionMetadata] = None,
embedding_function: Optional[EmbeddingFunction] = ef.DefaultEmbeddingFunction(),
tenant: str = DEFAULT_TENANT,
database: str = DEFAULT_DATABASE,
) -> Collection:
pass

@abstractmethod
@override
def delete_collection(
self,
name: str,
tenant: str = DEFAULT_TENANT,
database: str = DEFAULT_DATABASE,
) -> None:
pass
Loading

0 comments on commit 0552704

Please sign in to comment.