Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[STACKED #1255] [ENH] Add multitenancy #1244

Merged
merged 23 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/chroma-client-integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ on:
- main
- '**'
workflow_dispatch:

jobs:
test:
timeout-minutes: 90
strategy:
matrix:
python: ['3.7', '3.8', '3.9', '3.10']
python: ['3.8', '3.9', '3.10', '3.11']
platform: [ubuntu-latest, windows-latest]
runs-on: ${{ matrix.platform }}
steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/chroma-cluster-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
test:
strategy:
matrix:
python: ['3.7']
python: ['3.8']
platform: [ubuntu-latest]
testfile: ["chromadb/test/ingest/test_producer_consumer.py",
"chromadb/test/segment/distributed/test_memberlist_provider.py",]
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/chroma-integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
test:
strategy:
matrix:
python: ['3.7']
python: ['3.8']
platform: [ubuntu-latest, windows-latest]
testfile: ["--ignore-glob 'chromadb/test/property/*' --ignore='chromadb/test/test_cli.py'",
"chromadb/test/property/test_add.py",
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/chroma-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
timeout-minutes: 90
strategy:
matrix:
python: ['3.7', '3.8', '3.9', '3.10']
python: ['3.8', '3.9', '3.10', '3.11']
platform: [ubuntu-latest, windows-latest]
testfile: ["--ignore-glob 'chromadb/test/property/*' --ignore-glob 'chromadb/test/stress/*'",
"chromadb/test/property/test_add.py",
Expand Down Expand Up @@ -44,7 +44,7 @@ jobs:
timeout-minutes: 90
strategy:
matrix:
python: ['3.7']
python: ['3.8']
platform: ['16core-64gb-ubuntu-latest', '16core-64gb-windows-latest']
testfile: ["'chromadb/test/stress/'"]
runs-on: ${{ matrix.platform }}
Expand Down
55 changes: 33 additions & 22 deletions chromadb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from typing import Dict
import logging
from chromadb.api.client import Client as ClientCreator
from chromadb.api.client import AdminClient as AdminClientCreator
import chromadb.config
from chromadb.config import Settings, System
from chromadb.api import API
from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT, Settings
from chromadb.api import AdminAPI, ClientAPI
from chromadb.api.models.Collection import Collection
from chromadb.api.types import (
CollectionMetadata,
Expand Down Expand Up @@ -35,8 +37,6 @@
"QueryResult",
"GetResult",
]
from chromadb.telemetry.events import ClientStartEvent
from chromadb.telemetry import Telemetry


logger = logging.getLogger(__name__)
Expand All @@ -55,13 +55,15 @@

is_client = False
try:
from chromadb.is_thin_client import is_thin_client # type: ignore
from chromadb.is_thin_client import is_thin_client

is_client = is_thin_client
except ImportError:
is_client = False

if not is_client:
import sqlite3

if sqlite3.sqlite_version_info < (3, 35, 0):
if IN_COLAB:
# In Colab, hotswap to pysqlite-binary if it's too old
Expand Down Expand Up @@ -90,17 +92,26 @@ def get_settings() -> Settings:
return __settings


def EphemeralClient(settings: Settings = Settings()) -> API:
def EphemeralClient(
settings: Settings = Settings(),
tenant: str = DEFAULT_TENANT,
HammadB marked this conversation as resolved.
Show resolved Hide resolved
database: str = DEFAULT_DATABASE,
) -> ClientAPI:
"""
Creates an in-memory instance of Chroma. This is useful for testing and
development, but not recommended for production use.
"""
settings.is_persistent = False

return Client(settings)
return ClientCreator(settings=settings, tenant=tenant, database=database)


def PersistentClient(path: str = "./chroma", settings: Settings = Settings()) -> API:
def PersistentClient(
HammadB marked this conversation as resolved.
Show resolved Hide resolved
path: str = "./chroma",
settings: Settings = Settings(),
tenant: str = DEFAULT_TENANT,
database: str = DEFAULT_DATABASE,
) -> ClientAPI:
"""
Creates a persistent instance of Chroma that saves to disk. This is useful for
testing and development, but not recommended for production use.
Expand All @@ -111,7 +122,7 @@ def PersistentClient(path: str = "./chroma", settings: Settings = Settings()) ->
settings.persist_directory = path
settings.is_persistent = True

return Client(settings)
return ClientCreator(tenant=tenant, database=database, settings=settings)
HammadB marked this conversation as resolved.
Show resolved Hide resolved


def HttpClient(
Expand All @@ -120,7 +131,9 @@ def HttpClient(
ssl: bool = False,
headers: Dict[str, str] = {},
settings: Settings = Settings(),
) -> API:
tenant: str = DEFAULT_TENANT,
database: str = DEFAULT_DATABASE,
) -> ClientAPI:
"""
Creates a client that connects to a remote Chroma server. This supports
many clients connecting to the same server, and is the recommended way to
Expand All @@ -139,20 +152,18 @@ def HttpClient(
settings.chroma_server_ssl_enabled = ssl
settings.chroma_server_headers = headers

return Client(settings)
return ClientCreator(tenant=tenant, database=database, settings=settings)


def Client(settings: Settings = __settings) -> API:
"""Return a running chroma.API instance"""

system = System(settings)
def AdminClient(settings: Settings = Settings()) -> AdminAPI:
return AdminClientCreator(settings=settings)

telemetry_client = system.instance(Telemetry)
api = system.instance(API)

system.start()

# Submit event for client start
telemetry_client.capture(ClientStartEvent())
def Client(
settings: Settings = __settings,
tenant: str = DEFAULT_TENANT,
database: str = DEFAULT_DATABASE,
) -> ClientAPI:
"""Return a running chroma.API instance"""

return api
return ClientCreator(tenant=tenant, database=database, settings=settings)
155 changes: 152 additions & 3 deletions chromadb/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from abc import ABC, abstractmethod
from typing import Sequence, Optional
from uuid import UUID

from overrides import override
from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT
from chromadb.api.models.Collection import Collection
from chromadb.api.types import (
CollectionMetadata,
Expand All @@ -16,10 +19,11 @@
WhereDocument,
)
from chromadb.config import Component, Settings
from chromadb.types import Database, Tenant
import chromadb.utils.embedding_functions as ef


class API(Component, ABC):
class BaseAPI(ABC):
HammadB marked this conversation as resolved.
Show resolved Hide resolved
@abstractmethod
def heartbeat(self) -> int:
"""Get the current time in nanoseconds since epoch.
Expand Down Expand Up @@ -371,10 +375,10 @@ def get_version(self) -> str:

@abstractmethod
def get_settings(self) -> Settings:
"""Get the settings used to initialize the client.
"""Get the settings used to initialize.

Returns:
Settings: The settings used to initialize the client.
Settings: The settings used to initialize.

"""
pass
Expand All @@ -385,3 +389,148 @@ def max_batch_size(self) -> int:
"""Return the maximum number of records that can be submitted in a single call
to submit_embeddings."""
pass


class ClientAPI(BaseAPI, ABC):
tenant: str
database: str

@abstractmethod
def set_tenant(self, tenant: str, database: str = DEFAULT_DATABASE) -> None:
HammadB marked this conversation as resolved.
Show resolved Hide resolved
"""Set the tenant and database for the client. Raises an error if the tenant or
database does not exist.

Args:
tenant: The tenant to set.
database: The database to set.

"""
pass

@abstractmethod
def set_database(self, database: str) -> None:
"""Set the database for the client. Raises an error if the database does not exist.

Args:
database: The database to set.

"""
pass

@staticmethod
@abstractmethod
def clear_system_cache() -> None:
"""Clear the system cache so that new systems can be created for an existing path.
This should only be used for testing purposes."""
pass


class AdminAPI(ABC):
@abstractmethod
def create_database(self, name: str, tenant: str = DEFAULT_TENANT) -> None:
"""Create a new database. Raises an error if the database already exists.

Args:
database: The name of the database to create.

"""
pass

@abstractmethod
def get_database(self, name: str, tenant: str = DEFAULT_TENANT) -> Database:
"""Get a database. Raises an error if the database does not exist.

Args:
database: The name of the database to get.
tenant: The tenant of the database to get.

"""
pass

@abstractmethod
def create_tenant(self, name: str) -> None:
"""Create a new tenant. Raises an error if the tenant already exists.

Args:
tenant: The name of the tenant to create.

"""
pass

@abstractmethod
def get_tenant(self, name: str) -> Tenant:
"""Get a tenant. Raises an error if the tenant does not exist.

Args:
tenant: The name of the tenant to get.

"""
pass


class ServerAPI(BaseAPI, AdminAPI, Component):
HammadB marked this conversation as resolved.
Show resolved Hide resolved
"""An API instance that extends the relevant Base API methods by passing
in a tenant and database. This is the root component of the Chroma System"""

@abstractmethod
@override
def list_collections(
self, tenant: str = DEFAULT_TENANT, database: str = DEFAULT_DATABASE
) -> Sequence[Collection]:
pass

@abstractmethod
@override
def create_collection(
self,
name: str,
metadata: Optional[CollectionMetadata] = None,
embedding_function: Optional[EmbeddingFunction] = ef.DefaultEmbeddingFunction(),
get_or_create: bool = False,
tenant: str = DEFAULT_TENANT,
database: str = DEFAULT_DATABASE,
) -> Collection:
pass

@abstractmethod
@override
def get_collection(
self,
name: str,
embedding_function: Optional[EmbeddingFunction] = ef.DefaultEmbeddingFunction(),
tenant: str = DEFAULT_TENANT,
database: str = DEFAULT_DATABASE,
) -> Collection:
pass

@abstractmethod
@override
def get_or_create_collection(
self,
name: str,
metadata: Optional[CollectionMetadata] = None,
embedding_function: Optional[EmbeddingFunction] = ef.DefaultEmbeddingFunction(),
tenant: str = DEFAULT_TENANT,
database: str = DEFAULT_DATABASE,
) -> Collection:
pass

@abstractmethod
@override
def _modify(
HammadB marked this conversation as resolved.
Show resolved Hide resolved
self,
id: UUID,
new_name: Optional[str] = None,
new_metadata: Optional[CollectionMetadata] = None,
) -> None:
pass

@abstractmethod
@override
def delete_collection(
self,
name: str,
tenant: str = DEFAULT_TENANT,
database: str = DEFAULT_DATABASE,
) -> None:
pass
Loading
Loading