From 69cd2ba8ac7915bbba80245dc0fdb21cfe327d8e Mon Sep 17 00:00:00 2001
From: Anton Troynikov <atroyn@users.noreply.github.com>
Date: Tue, 7 Nov 2023 10:00:00 -0800
Subject: [PATCH] Revert "[ENH] Multimodal Embeddings" (#1344)

Reverts chroma-core/chroma#1293
---
 chromadb/api/__init__.py                      |  29 +--
 chromadb/api/fastapi.py                       |  39 ++--
 chromadb/api/models/Collection.py             | 220 ++++++------------
 chromadb/api/types.py                         | 117 ++--------
 chromadb/test/conftest.py                     |   1 -
 chromadb/test/ef/test_multimodal_ef.py        | 152 ------------
 chromadb/test/property/strategies.py          |  50 ++--
 chromadb/test/property/test_add.py            |  12 +-
 .../property/test_cross_version_persist.py    |   6 +-
 chromadb/test/test_api.py                     |  24 +-
 chromadb/utils/embedding_functions.py         | 130 +++--------
 multimodal_ef_example.ipynb                   | 102 --------
 12 files changed, 186 insertions(+), 696 deletions(-)
 delete mode 100644 chromadb/test/ef/test_multimodal_ef.py
 delete mode 100644 multimodal_ef_example.ipynb

diff --git a/chromadb/api/__init__.py b/chromadb/api/__init__.py
index ab8d22499bc..b68d7a27d10 100644
--- a/chromadb/api/__init__.py
+++ b/chromadb/api/__init__.py
@@ -8,7 +8,6 @@
 from chromadb.api.types import (
     CollectionMetadata,
     Documents,
-    Embeddable,
     EmbeddingFunction,
     Embeddings,
     IDs,
@@ -59,9 +58,7 @@ def create_collection(
         self,
         name: str,
         metadata: Optional[CollectionMetadata] = None,
-        embedding_function: Optional[
-            EmbeddingFunction[Embeddable]
-        ] = ef.DefaultEmbeddingFunction(),  # type: ignore
+        embedding_function: Optional[EmbeddingFunction] = ef.DefaultEmbeddingFunction(),
         get_or_create: bool = False,
     ) -> Collection:
         """Create a new collection with the given name and metadata.
@@ -93,11 +90,9 @@ def create_collection(
     @abstractmethod
     def get_collection(
         self,
-        name: str,
+        name: Optional[str] = None,
         id: Optional[UUID] = None,
-        embedding_function: Optional[
-            EmbeddingFunction[Embeddable]
-        ] = ef.DefaultEmbeddingFunction(),  # type: ignore
+        embedding_function: Optional[EmbeddingFunction] = ef.DefaultEmbeddingFunction(),
     ) -> Collection:
         """Get a collection with the given name.
         Args:
@@ -124,9 +119,7 @@ def get_or_create_collection(
         self,
         name: str,
         metadata: Optional[CollectionMetadata] = None,
-        embedding_function: Optional[
-            EmbeddingFunction[Embeddable]
-        ] = ef.DefaultEmbeddingFunction(),  # type: ignore
+        embedding_function: Optional[EmbeddingFunction] = ef.DefaultEmbeddingFunction(),
     ) -> Collection:
         """Get or create a collection with the given name and metadata.
         Args:
@@ -493,9 +486,7 @@ def create_collection(
         self,
         name: str,
         metadata: Optional[CollectionMetadata] = None,
-        embedding_function: Optional[
-            EmbeddingFunction[Embeddable]
-        ] = ef.DefaultEmbeddingFunction(),  # type: ignore
+        embedding_function: Optional[EmbeddingFunction] = ef.DefaultEmbeddingFunction(),
         get_or_create: bool = False,
         tenant: str = DEFAULT_TENANT,
         database: str = DEFAULT_DATABASE,
@@ -506,11 +497,9 @@ def create_collection(
     @override
     def get_collection(
         self,
-        name: str,
+        name: Optional[str] = None,
         id: Optional[UUID] = None,
-        embedding_function: Optional[
-            EmbeddingFunction[Embeddable]
-        ] = ef.DefaultEmbeddingFunction(),  # type: ignore
+        embedding_function: Optional[EmbeddingFunction] = ef.DefaultEmbeddingFunction(),
         tenant: str = DEFAULT_TENANT,
         database: str = DEFAULT_DATABASE,
     ) -> Collection:
@@ -522,9 +511,7 @@ def get_or_create_collection(
         self,
         name: str,
         metadata: Optional[CollectionMetadata] = None,
-        embedding_function: Optional[
-            EmbeddingFunction[Embeddable]
-        ] = ef.DefaultEmbeddingFunction(),  # type: ignore
+        embedding_function: Optional[EmbeddingFunction] = ef.DefaultEmbeddingFunction(),
         tenant: str = DEFAULT_TENANT,
         database: str = DEFAULT_DATABASE,
     ) -> Collection:
diff --git a/chromadb/api/fastapi.py b/chromadb/api/fastapi.py
index 6dcaaf84c44..38af7e52a91 100644
--- a/chromadb/api/fastapi.py
+++ b/chromadb/api/fastapi.py
@@ -14,7 +14,6 @@
 from chromadb.api.models.Collection import Collection
 from chromadb.api.types import (
     Documents,
-    Embeddable,
     Embeddings,
     EmbeddingFunction,
     IDs,
@@ -220,9 +219,7 @@ def create_collection(
         self,
         name: str,
         metadata: Optional[CollectionMetadata] = None,
-        embedding_function: Optional[
-            EmbeddingFunction[Embeddable]
-        ] = ef.DefaultEmbeddingFunction(),  # type: ignore
+        embedding_function: Optional[EmbeddingFunction] = ef.DefaultEmbeddingFunction(),
         get_or_create: bool = False,
         tenant: str = DEFAULT_TENANT,
         database: str = DEFAULT_DATABASE,
@@ -253,9 +250,9 @@ def create_collection(
     @override
     def get_collection(
         self,
-        name: str,
+        name: Optional[str] = None,
         id: Optional[UUID] = None,
-        embedding_function: Optional[EmbeddingFunction[Embeddable]] = ef.DefaultEmbeddingFunction(),  # type: ignore
+        embedding_function: Optional[EmbeddingFunction] = ef.DefaultEmbeddingFunction(),
         tenant: str = DEFAULT_TENANT,
         database: str = DEFAULT_DATABASE,
     ) -> Collection:
@@ -287,20 +284,17 @@ def get_or_create_collection(
         self,
         name: str,
         metadata: Optional[CollectionMetadata] = None,
-        embedding_function: Optional[EmbeddingFunction[Embeddable]] = ef.DefaultEmbeddingFunction(),  # type: ignore
+        embedding_function: Optional[EmbeddingFunction] = ef.DefaultEmbeddingFunction(),
         tenant: str = DEFAULT_TENANT,
         database: str = DEFAULT_DATABASE,
     ) -> Collection:
-        return cast(
-            Collection,
-            self.create_collection(
-                name,
-                metadata,
-                embedding_function,
-                get_or_create=True,
-                tenant=tenant,
-                database=database,
-            ),
+        return self.create_collection(
+            name,
+            metadata,
+            embedding_function,
+            get_or_create=True,
+            tenant=tenant,
+            database=database,
         )
 
     @trace_method("FastAPI._modify", OpenTelemetryGranularity.OPERATION)
@@ -353,13 +347,10 @@ def _peek(
         collection_id: UUID,
         n: int = 10,
     ) -> GetResult:
-        return cast(
-            GetResult,
-            self._get(
-                collection_id,
-                limit=n,
-                include=["embeddings", "documents", "metadatas"],
-            ),
+        return self._get(
+            collection_id,
+            limit=n,
+            include=["embeddings", "documents", "metadatas"],
         )
 
     @trace_method("FastAPI._get", OpenTelemetryGranularity.OPERATION)
diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py
index 058c9c86f8f..ef7c66139d2 100644
--- a/chromadb/api/models/Collection.py
+++ b/chromadb/api/models/Collection.py
@@ -1,4 +1,4 @@
-from typing import TYPE_CHECKING, Optional, Tuple, Any
+from typing import TYPE_CHECKING, Optional, Tuple, cast, List
 from pydantic import BaseModel, PrivateAttr
 
 from uuid import UUID
@@ -7,15 +7,9 @@
 from chromadb.api.types import (
     CollectionMetadata,
     Embedding,
-    Embeddings,
-    Embeddable,
     Include,
     Metadata,
-    Metadatas,
     Document,
-    Documents,
-    Image,
-    Images,
     Where,
     IDs,
     EmbeddingFunction,
@@ -24,11 +18,7 @@
     ID,
     OneOrMany,
     WhereDocument,
-    maybe_cast_one_to_many_ids,
-    maybe_cast_one_to_many_embedding,
-    maybe_cast_one_to_many_metadata,
-    maybe_cast_one_to_many_document,
-    maybe_cast_one_to_many_image,
+    maybe_cast_one_to_many,
     validate_ids,
     validate_include,
     validate_metadata,
@@ -37,7 +27,6 @@
     validate_where_document,
     validate_n_results,
     validate_embeddings,
-    validate_embedding_function,
 )
 import logging
 
@@ -54,16 +43,14 @@ class Collection(BaseModel):
     tenant: Optional[str] = None
     database: Optional[str] = None
     _client: "ServerAPI" = PrivateAttr()
-    _embedding_function: Optional[EmbeddingFunction[Embeddable]] = PrivateAttr()
+    _embedding_function: Optional[EmbeddingFunction] = PrivateAttr()
 
     def __init__(
         self,
         client: "ServerAPI",
         name: str,
         id: UUID,
-        embedding_function: Optional[
-            EmbeddingFunction[Embeddable]
-        ] = ef.DefaultEmbeddingFunction(),  # type: ignore
+        embedding_function: Optional[EmbeddingFunction] = ef.DefaultEmbeddingFunction(),
         tenant: Optional[str] = None,
         database: Optional[str] = None,
         metadata: Optional[CollectionMetadata] = None,
@@ -72,11 +59,6 @@ def __init__(
             name=name, metadata=metadata, id=id, tenant=tenant, database=database
         )
         self._client = client
-
-        # Check to make sure the embedding function has the right signature, as defined by the EmbeddingFunction protocol
-        if embedding_function is not None:
-            validate_embedding_function(embedding_function)
-
         self._embedding_function = embedding_function
 
     def __repr__(self) -> str:
@@ -97,15 +79,13 @@ def add(
         embeddings: Optional[OneOrMany[Embedding]] = None,
         metadatas: Optional[OneOrMany[Metadata]] = None,
         documents: Optional[OneOrMany[Document]] = None,
-        images: Optional[OneOrMany[Image]] = None,
     ) -> None:
         """Add embeddings to the data store.
         Args:
             ids: The ids of the embeddings you wish to add
-            embeddings: The embeddings to add. If None, embeddings will be computed based on the documents or images using the embedding_function set for the Collection. Optional.
+            embeddings: The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional.
             metadatas: The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional.
             documents: The documents to associate with the embeddings. Optional.
-            images: The images to associate with the embeddings. Optional.
 
         Returns:
             None
@@ -119,22 +99,10 @@ def add(
 
         """
 
-        ids, embeddings, metadatas, documents, images = self._validate_embedding_set(
-            ids, embeddings, metadatas, documents, images
+        ids, embeddings, metadatas, documents = self._validate_embedding_set(
+            ids, embeddings, metadatas, documents
         )
 
-        # We need to compute the embeddings if they're not provided
-        if embeddings is None:
-            # At this point, we know that one of documents or images are provided from the validation above
-            if documents is not None:
-                embeddings = self._embed(input=documents)
-            elif images is not None:
-                embeddings = self._embed(input=images)
-            else:
-                raise ValueError(
-                    "You must provide embeddings, documents, or images, or an embedding function."
-                )
-
         self._client._add(ids, self.id, embeddings, metadatas, documents)
 
     def get(
@@ -165,7 +133,7 @@ def get(
         where_document = (
             validate_where_document(where_document) if where_document else None
         )
-        ids = validate_ids(maybe_cast_one_to_many_ids(ids)) if ids else None
+        ids = validate_ids(maybe_cast_one_to_many(ids)) if ids else None
         include = validate_include(include, allow_distances=False)
         return self._client._get(
             self.id,
@@ -193,7 +161,6 @@ def query(
         self,
         query_embeddings: Optional[OneOrMany[Embedding]] = None,
         query_texts: Optional[OneOrMany[Document]] = None,
-        query_images: Optional[OneOrMany[Image]] = None,
         n_results: int = 10,
         where: Optional[Where] = None,
         where_document: Optional[WhereDocument] = None,
@@ -204,7 +171,6 @@ def query(
         Args:
             query_embeddings: The embeddings to get the closes neighbors of. Optional.
             query_texts: The document texts to get the closes neighbors of. Optional.
-            query_images: The images to get the closes neighbors of. Optional.
             n_results: The number of neighbors to return for each query_embedding or query_texts. Optional.
             where: A Where type dict used to filter results by. E.g. `{"$and": ["color" : "red", "price": {"$gte": 4.20}]}`. Optional.
             where_document: A WhereDocument type dict used to filter by the documents. E.g. `{$contains: {"text": "hello"}}`. Optional.
@@ -214,58 +180,43 @@ def query(
             QueryResult: A QueryResult object containing the results.
 
         Raises:
-            ValueError: If you don't provide either query_embeddings, query_texts, or query_images
+            ValueError: If you don't provide either query_embeddings or query_texts
             ValueError: If you provide both query_embeddings and query_texts
-            ValueError: If you provide both query_embeddings and query_images
-            ValueError: If you provide both query_texts and query_images
 
         """
-        # If neither query_embeddings nor query_texts are provided, or both are provided, raise an error
-        if (
-            (query_embeddings is None and query_texts is None and query_images is None)
-            or (
-                query_embeddings is not None
-                and (query_texts is not None or query_images is not None)
-            )
-            or (query_texts is not None and query_images is not None)
-        ):
-            raise ValueError(
-                "You must provide either query embeddings, or else one of query texts or query images."
-            )
-
         where = validate_where(where) if where else None
         where_document = (
             validate_where_document(where_document) if where_document else None
         )
         query_embeddings = (
-            validate_embeddings(maybe_cast_one_to_many_embedding(query_embeddings))
+            validate_embeddings(maybe_cast_one_to_many(query_embeddings))
             if query_embeddings is not None
             else None
         )
         query_texts = (
-            maybe_cast_one_to_many_document(query_texts)
-            if query_texts is not None
-            else None
-        )
-        query_images = (
-            maybe_cast_one_to_many_image(query_images)
-            if query_images is not None
-            else None
+            maybe_cast_one_to_many(query_texts) if query_texts is not None else None
         )
         include = validate_include(include, allow_distances=True)
         n_results = validate_n_results(n_results)
 
-        # If query_embeddings are not provided, we need to compute them from the inputs
+        # If neither query_embeddings nor query_texts are provided, or both are provided, raise an error
+        if (query_embeddings is None and query_texts is None) or (
+            query_embeddings is not None and query_texts is not None
+        ):
+            raise ValueError(
+                "You must provide either query embeddings or query texts, but not both"
+            )
+
+        # If query_embeddings are not provided, we need to compute them from the query_texts
         if query_embeddings is None:
-            # At this point, we know that one of query_texts or query_images are provided from the validation above
-            if query_texts is not None:
-                query_embeddings = self._embed(input=query_texts)
-            elif query_images is not None:
-                query_embeddings = self._embed(input=query_images)
-            else:
+            if self._embedding_function is None:
                 raise ValueError(
-                    "You must provide either query embeddings, or else one of query texts or query images."
+                    "You must provide embeddings or a function to compute them"
                 )
+            # We know query texts is not None at this point, cast for the typechecker
+            query_embeddings = self._embedding_function(
+                cast(List[Document], query_texts)
+            )
 
         if where is None:
             where = {}
@@ -309,35 +260,23 @@ def update(
         embeddings: Optional[OneOrMany[Embedding]] = None,
         metadatas: Optional[OneOrMany[Metadata]] = None,
         documents: Optional[OneOrMany[Document]] = None,
-        images: Optional[OneOrMany[Image]] = None,
     ) -> None:
         """Update the embeddings, metadatas or documents for provided ids.
 
         Args:
             ids: The ids of the embeddings to update
-            embeddings: The embeddings to update. If None, embeddings will be computed based on the documents or images using the embedding_function set for the Collection. Optional.
+            embeddings: The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional.
             metadatas:  The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional.
             documents: The documents to associate with the embeddings. Optional.
-            images: The images to associate with the embeddings. Optional.
+
         Returns:
             None
         """
 
-        ids, embeddings, metadatas, documents, images = self._validate_embedding_set(
-            ids,
-            embeddings,
-            metadatas,
-            documents,
-            images,
-            require_embeddings_or_data=False,
+        ids, embeddings, metadatas, documents = self._validate_embedding_set(
+            ids, embeddings, metadatas, documents, require_embeddings_or_documents=False
         )
 
-        if embeddings is None:
-            if documents is not None:
-                embeddings = self._embed(input=documents)
-            elif images is not None:
-                embeddings = self._embed(input=images)
-
         self._client._update(self.id, ids, embeddings, metadatas, documents)
 
     def upsert(
@@ -346,7 +285,6 @@ def upsert(
         embeddings: Optional[OneOrMany[Embedding]] = None,
         metadatas: Optional[OneOrMany[Metadata]] = None,
         documents: Optional[OneOrMany[Document]] = None,
-        images: Optional[OneOrMany[Image]] = None,
     ) -> None:
         """Update the embeddings, metadatas or documents for provided ids, or create them if they don't exist.
 
@@ -360,16 +298,10 @@ def upsert(
             None
         """
 
-        ids, embeddings, metadatas, documents, images = self._validate_embedding_set(
-            ids, embeddings, metadatas, documents, images
+        ids, embeddings, metadatas, documents = self._validate_embedding_set(
+            ids, embeddings, metadatas, documents
         )
 
-        if embeddings is None:
-            if documents is not None:
-                embeddings = self._embed(input=documents)
-            else:
-                embeddings = self._embed(input=images)
-
         self._client._upsert(
             collection_id=self.id,
             ids=ids,
@@ -397,7 +329,7 @@ def delete(
         Raises:
             ValueError: If you don't provide either ids, where, or where_document
         """
-        ids = validate_ids(maybe_cast_one_to_many_ids(ids)) if ids else None
+        ids = validate_ids(maybe_cast_one_to_many(ids)) if ids else None
         where = validate_where(where) if where else None
         where_document = (
             validate_where_document(where_document) if where_document else None
@@ -411,74 +343,58 @@ def _validate_embedding_set(
         embeddings: Optional[OneOrMany[Embedding]],
         metadatas: Optional[OneOrMany[Metadata]],
         documents: Optional[OneOrMany[Document]],
-        images: Optional[OneOrMany[Image]] = None,
-        require_embeddings_or_data: bool = True,
+        require_embeddings_or_documents: bool = True,
     ) -> Tuple[
         IDs,
-        Optional[Embeddings],
-        Optional[Metadatas],
-        Optional[Documents],
-        Optional[Images],
+        List[Embedding],
+        Optional[List[Metadata]],
+        Optional[List[Document]],
     ]:
-        valid_ids = validate_ids(maybe_cast_one_to_many_ids(ids))
-        valid_embeddings = (
-            validate_embeddings(maybe_cast_one_to_many_embedding(embeddings))
+        ids = validate_ids(maybe_cast_one_to_many(ids))
+        embeddings = (
+            validate_embeddings(maybe_cast_one_to_many(embeddings))
             if embeddings is not None
             else None
         )
-        valid_metadatas = (
-            validate_metadatas(maybe_cast_one_to_many_metadata(metadatas))
+        metadatas = (
+            validate_metadatas(maybe_cast_one_to_many(metadatas))
             if metadatas is not None
             else None
         )
-        valid_documents = (
-            maybe_cast_one_to_many_document(documents)
-            if documents is not None
-            else None
-        )
-        valid_images = (
-            maybe_cast_one_to_many_image(images) if images is not None else None
-        )
+        documents = maybe_cast_one_to_many(documents) if documents is not None else None
 
-        # Check that one of embeddings or ducuments or images is provided
-        if require_embeddings_or_data:
-            if (
-                valid_embeddings is None
-                and valid_documents is None
-                and valid_images is None
-            ):
-                raise ValueError("You must provide embeddings, documents, or images.")
-
-        # Only one of documents or images can be provided
-        if valid_documents is not None and valid_images is not None:
-            raise ValueError("You can only provide documents or images, not both.")
+        # Check that one of embeddings or documents is provided
+        if require_embeddings_or_documents:
+            if embeddings is None and documents is None:
+                raise ValueError(
+                    "You must provide either embeddings or documents, or both"
+                )
 
         # Check that, if they're provided, the lengths of the arrays match the length of ids
-        if valid_embeddings is not None and len(valid_embeddings) != len(valid_ids):
+        if embeddings is not None and len(embeddings) != len(ids):
             raise ValueError(
-                f"Number of embeddings {len(valid_embeddings)} must match number of ids {len(valid_ids)}"
+                f"Number of embeddings {len(embeddings)} must match number of ids {len(ids)}"
             )
-        if valid_metadatas is not None and len(valid_metadatas) != len(valid_ids):
+        if metadatas is not None and len(metadatas) != len(ids):
             raise ValueError(
-                f"Number of metadatas {len(valid_metadatas)} must match number of ids {len(valid_ids)}"
+                f"Number of metadatas {len(metadatas)} must match number of ids {len(ids)}"
             )
-        if valid_documents is not None and len(valid_documents) != len(valid_ids):
+        if documents is not None and len(documents) != len(ids):
             raise ValueError(
-                f"Number of documents {len(valid_documents)} must match number of ids {len(valid_ids)}"
+                f"Number of documents {len(documents)} must match number of ids {len(ids)}"
             )
 
-        return (
-            valid_ids,
-            valid_embeddings,
-            valid_metadatas,
-            valid_documents,
-            valid_images,
-        )
+        # If document embeddings are not provided, we need to compute them
+        if embeddings is None and documents is not None:
+            if self._embedding_function is None:
+                raise ValueError(
+                    "You must provide embeddings or a function to compute them"
+                )
+            embeddings = self._embedding_function(documents)
 
-    def _embed(self, input: Any) -> Embeddings:
-        if self._embedding_function is None:
-            raise ValueError(
-                "You must provide an embedding function to compute embeddings."
-                "https://docs.trychroma.com/embeddings"
-            )
-        return self._embedding_function(input=input)
+        # if embeddings is None:
+        #     raise ValueError(
+        #         "Something went wrong. Embeddings should be computed at this point"
+        #     )
+
+        return ids, embeddings, metadatas, documents  # type: ignore
diff --git a/chromadb/api/types.py b/chromadb/api/types.py
index 84c55257dcb..017e356ffac 100644
--- a/chromadb/api/types.py
+++ b/chromadb/api/types.py
@@ -1,6 +1,4 @@
-from typing import Optional, Union, TypeVar, List, Dict, Any, Tuple, cast
-from numpy.typing import NDArray
-import numpy as np
+from typing import Optional, Union, Sequence, TypeVar, List, Dict, Any, Tuple
 from typing_extensions import Literal, TypedDict, Protocol
 import chromadb.errors as errors
 from chromadb.types import (
@@ -15,97 +13,27 @@
     WhereDocumentOperator,
     WhereDocument,
 )
-from inspect import signature
 
 # Re-export types from chromadb.types
 __all__ = ["Metadata", "Where", "WhereDocument", "UpdateCollectionMetadata"]
 
-T = TypeVar("T")
-OneOrMany = Union[T, List[T]]
-
-# IDs
 ID = str
 IDs = List[ID]
 
-
-def maybe_cast_one_to_many_ids(target: OneOrMany[ID]) -> IDs:
-    if isinstance(target, str):
-        # One ID
-        return cast(IDs, [target])
-    # Already a sequence
-    return cast(IDs, target)
-
-
-# Embeddings
 Embedding = Vector
 Embeddings = List[Embedding]
 
-
-def maybe_cast_one_to_many_embedding(target: OneOrMany[Embedding]) -> Embeddings:
-    if isinstance(target, List):
-        # One Embedding
-        if isinstance(target[0], (int, float)):
-            return cast(Embeddings, [target])
-    # Already a sequence
-    return cast(Embeddings, target)
-
-
-# Metadatas
 Metadatas = List[Metadata]
 
-
-def maybe_cast_one_to_many_metadata(target: OneOrMany[Metadata]) -> Metadatas:
-    # One Metadata dict
-    if isinstance(target, dict):
-        return cast(Metadatas, [target])
-    # Already a sequence
-    return cast(Metadatas, target)
-
-
 CollectionMetadata = Dict[str, Any]
 UpdateCollectionMetadata = UpdateMetadata
 
-# Documents
 Document = str
 Documents = List[Document]
 
-
-def is_document(target: Any) -> bool:
-    if not isinstance(target, str):
-        return False
-    return True
-
-
-def maybe_cast_one_to_many_document(target: OneOrMany[Document]) -> Documents:
-    # One Document
-    if is_document(target):
-        return cast(Documents, [target])
-    # Already a sequence
-    return cast(Documents, target)
-
-
-# Images
-ImageDType = Union[np.uint, np.int_, np.float_]
-Image = NDArray[ImageDType]
-Images = List[Image]
-
-
-def is_image(target: Any) -> bool:
-    if not isinstance(target, np.ndarray):
-        return False
-    if len(target.shape) < 2:
-        return False
-    return True
-
-
-def maybe_cast_one_to_many_image(target: OneOrMany[Image]) -> Images:
-    if is_image(target):
-        return cast(Images, [target])
-    # Already a sequence
-    return cast(Images, target)
-
-
-Parameter = TypeVar("Parameter", Document, Image, Embedding, Metadata, ID)
+Parameter = TypeVar("Parameter", Embedding, Document, Metadata, ID)
+T = TypeVar("T")
+OneOrMany = Union[T, List[T]]
 
 # This should ust be List[Literal["documents", "embeddings", "metadatas", "distances"]]
 # However, this provokes an incompatibility with the Overrides library and Python 3.7
@@ -153,29 +81,28 @@ class IndexMetadata(TypedDict):
     time_created: float
 
 
-Embeddable = Union[Documents, Images]
-D = TypeVar("D", bound=Embeddable, contravariant=True)
-
-
-class EmbeddingFunction(Protocol[D]):
-    def __call__(self, input: D) -> Embeddings:
+class EmbeddingFunction(Protocol):
+    def __call__(self, texts: Documents) -> Embeddings:
         ...
 
 
-def validate_embedding_function(
-    embedding_function: EmbeddingFunction[Embeddable],
-) -> None:
-    function_signature = signature(
-        embedding_function.__class__.__call__
-    ).parameters.keys()
-    protocol_signature = signature(EmbeddingFunction.__call__).parameters.keys()
+def maybe_cast_one_to_many(
+    target: OneOrMany[Parameter],
+) -> List[Parameter]:
+    """Infers if target is Embedding, Metadata, or Document and casts it to a many object if its one"""
 
-    if not function_signature == protocol_signature:
-        raise ValueError(
-            f"Expected EmbeddingFunction.__call__ to have the following signature: {protocol_signature}, got {function_signature}\n"
-            "Please see https://docs.trychroma.com/embeddings for details of the EmbeddingFunction interface.\n"
-            "Please note the recent change to the EmbeddingFunction interface: https://docs.trychroma.com/migration#migration-to-0416---november-7-2023 \n"
-        )
+    if isinstance(target, Sequence):
+        # One Document or ID
+        if isinstance(target, str) and target is not None:
+            return [target]
+        # One Embedding
+        if isinstance(target[0], (int, float)):
+            return [target]  # type: ignore
+    # One Metadata dict
+    if isinstance(target, dict):
+        return [target]
+    # Already a sequence
+    return target  # type: ignore
 
 
 def validate_ids(ids: IDs) -> IDs:
diff --git a/chromadb/test/conftest.py b/chromadb/test/conftest.py
index 087cb2271bd..401139684ab 100644
--- a/chromadb/test/conftest.py
+++ b/chromadb/test/conftest.py
@@ -429,7 +429,6 @@ def client(system: System) -> Generator[ClientAPI, None, None]:
     system.reset_state()
     client = ClientCreator.from_system(system)
     yield client
-    client.clear_system_cache()
 
 
 @pytest.fixture(scope="function")
diff --git a/chromadb/test/ef/test_multimodal_ef.py b/chromadb/test/ef/test_multimodal_ef.py
deleted file mode 100644
index 52213c77a4c..00000000000
--- a/chromadb/test/ef/test_multimodal_ef.py
+++ /dev/null
@@ -1,152 +0,0 @@
-from typing import Generator, cast
-import numpy as np
-import pytest
-import chromadb
-from chromadb.api.types import (
-    Embeddable,
-    EmbeddingFunction,
-    Embeddings,
-    Image,
-    Document,
-)
-from chromadb.test.property.strategies import hashing_embedding_function
-from chromadb.test.property.invariants import _exact_distances
-
-
-# A 'standard' multimodal embedding function, which converts inputs to strings
-# then hashes them to a fixed dimension.
-class hashing_multimodal_ef(EmbeddingFunction[Embeddable]):
-    def __init__(self) -> None:
-        self._hef = hashing_embedding_function(dim=10, dtype=np.float_)
-
-    def __call__(self, input: Embeddable) -> Embeddings:
-        to_texts = [str(i) for i in input]
-        embeddings = np.array(self._hef(to_texts))
-        # Normalize the embeddings
-        # This is so we can generate random unit vectors and have them be close to the embeddings
-        embeddings /= np.linalg.norm(embeddings, axis=1, keepdims=True)
-        return cast(Embeddings, embeddings.tolist())
-
-
-def random_image() -> Image:
-    return np.random.randint(0, 255, size=(10, 10, 3), dtype=np.int32)
-
-
-def random_document() -> Document:
-    return str(random_image())
-
-
-@pytest.fixture
-def multimodal_collection(
-    default_ef: EmbeddingFunction[Embeddable] = hashing_multimodal_ef(),
-) -> Generator[chromadb.Collection, None, None]:
-    client = chromadb.Client()
-    collection = client.create_collection(
-        name="multimodal_collection", embedding_function=default_ef
-    )
-    yield collection
-    client.clear_system_cache()
-
-
-# Test adding and querying of a multimodal collection consisting of images and documents
-def test_multimodal(
-    multimodal_collection: chromadb.Collection,
-    default_ef: EmbeddingFunction[Embeddable] = hashing_multimodal_ef(),
-    n_examples: int = 10,
-    n_query_results: int = 3,
-) -> None:
-    image_ids = [str(i) for i in range(n_examples)]
-    images = [random_image() for _ in range(n_examples)]
-    image_embeddings = default_ef(images)
-
-    document_ids = [str(i) for i in range(n_examples, 2 * n_examples)]
-    documents = [random_document() for _ in range(n_examples)]
-    document_embeddings = default_ef(documents)
-
-    # Trying to add a document and an image at the same time should fail
-    with pytest.raises(
-        ValueError, match="You can only provide documents or images, not both."
-    ):
-        multimodal_collection.add(
-            ids=image_ids[0], documents=documents[0], images=images[0]
-        )
-
-    # Add some documents
-    multimodal_collection.add(ids=document_ids, documents=documents)
-    # Add some images
-    multimodal_collection.add(ids=image_ids, images=images)
-
-    # get() should return all the documents and images
-    # ids corresponding to images should not have documents
-    get_result = multimodal_collection.get(include=["documents"])
-    assert len(get_result["ids"]) == len(document_ids) + len(image_ids)
-    for i, id in enumerate(get_result["ids"]):
-        assert id in document_ids or id in image_ids
-        assert get_result["documents"] is not None
-        if id in document_ids:
-            assert get_result["documents"][i] == documents[document_ids.index(id)]
-        if id in image_ids:
-            assert get_result["documents"][i] is None
-
-    # Generate a random query image
-    query_image = random_image()
-    query_image_embedding = default_ef([query_image])
-
-    image_neighbor_indices, _ = _exact_distances(
-        query_image_embedding, image_embeddings + document_embeddings
-    )
-    # Get the ids of the nearest neighbors
-    nearest_image_neighbor_ids = [
-        image_ids[i] if i < n_examples else document_ids[i % n_examples]
-        for i in image_neighbor_indices[0][:n_query_results]
-    ]
-
-    # Generate a random query document
-    query_document = random_document()
-    query_document_embedding = default_ef([query_document])
-    document_neighbor_indices, _ = _exact_distances(
-        query_document_embedding, image_embeddings + document_embeddings
-    )
-    nearest_document_neighbor_ids = [
-        image_ids[i] if i < n_examples else document_ids[i % n_examples]
-        for i in document_neighbor_indices[0][:n_query_results]
-    ]
-
-    # Querying with both images and documents should fail
-    with pytest.raises(ValueError):
-        multimodal_collection.query(
-            query_images=[query_image], query_texts=[query_document]
-        )
-
-    # Query with images
-    query_result = multimodal_collection.query(
-        query_images=[query_image], n_results=n_query_results, include=["documents"]
-    )
-
-    assert query_result["ids"][0] == nearest_image_neighbor_ids
-
-    # Query with documents
-    query_result = multimodal_collection.query(
-        query_texts=[query_document], n_results=n_query_results, include=["documents"]
-    )
-
-    assert query_result["ids"][0] == nearest_document_neighbor_ids
-
-
-@pytest.mark.xfail
-def test_multimodal_update_with_image(
-    multimodal_collection: chromadb.Collection,
-) -> None:
-    # Updating an entry with an existing document should remove the documentß
-
-    document = random_document()
-    image = random_image()
-    id = "0"
-
-    multimodal_collection.add(ids=id, documents=document)
-
-    multimodal_collection.update(ids=id, images=image)
-
-    get_result = multimodal_collection.get(ids=id, include=["documents"])
-    assert get_result["documents"] is not None
-    assert get_result["documents"][0] is None
diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py
index 142fbc8b3f2..3583dadfba9 100644
--- a/chromadb/test/property/strategies.py
+++ b/chromadb/test/property/strategies.py
@@ -1,7 +1,7 @@
 import hashlib
 import hypothesis
 import hypothesis.strategies as st
-from typing import Any, Optional, List, Dict, Union, cast
+from typing import Any, Optional, List, Dict, Union
 from typing_extensions import TypedDict
 import numpy as np
 import numpy.typing as npt
@@ -13,14 +13,8 @@
 
 from dataclasses import dataclass
 
-from chromadb.api.types import (
-    Documents,
-    Embeddable,
-    EmbeddingFunction,
-    Embeddings,
-    Metadata,
-)
-from chromadb.types import LiteralValue, WhereOperator, LogicalOperator
+from chromadb.api.types import Documents, Embeddings, Metadata
+from chromadb.types import LiteralValue
 
 # Set the random seed for reproducibility
 np.random.seed(0)  # unnecessary, hypothesis does this for us
@@ -184,15 +178,15 @@ def create_embeddings(
     return embeddings
 
 
-class hashing_embedding_function(types.EmbeddingFunction[Documents]):
+class hashing_embedding_function(types.EmbeddingFunction):
     def __init__(self, dim: int, dtype: npt.DTypeLike) -> None:
         self.dim = dim
         self.dtype = dtype
 
-    def __call__(self, input: types.Documents) -> types.Embeddings:
+    def __call__(self, texts: types.Documents) -> types.Embeddings:
         # Hash the texts and convert to hex strings
         hashed_texts = [
-            list(hashlib.sha256(text.encode("utf-8")).hexdigest()) for text in input
+            list(hashlib.sha256(text.encode("utf-8")).hexdigest()) for text in texts
         ]
         # Pad with repetition, or truncate the hex strings to the desired dimension
         padded_texts = [
@@ -209,17 +203,15 @@ def __call__(self, input: types.Documents) -> types.Embeddings:
         return embeddings
 
 
-class not_implemented_embedding_function(types.EmbeddingFunction[Documents]):
-    def __call__(self, input: Documents) -> Embeddings:
+class not_implemented_embedding_function(types.EmbeddingFunction):
+    def __call__(self, texts: Documents) -> Embeddings:
         assert False, "This embedding function is not implemented"
 
 
 def embedding_function_strategy(
     dim: int, dtype: npt.DTypeLike
-) -> st.SearchStrategy[types.EmbeddingFunction[Embeddable]]:
-    return st.just(
-        cast(EmbeddingFunction[Embeddable], hashing_embedding_function(dim, dtype))
-    )
+) -> st.SearchStrategy[types.EmbeddingFunction]:
+    return st.just(hashing_embedding_function(dim, dtype))
 
 
 @dataclass
@@ -232,7 +224,7 @@ class Collection:
     known_document_keywords: List[str]
     has_documents: bool = False
     has_embeddings: bool = False
-    embedding_function: Optional[types.EmbeddingFunction[Embeddable]] = None
+    embedding_function: Optional[types.EmbeddingFunction] = None
 
 
 @st.composite
@@ -319,12 +311,12 @@ def metadata(draw: st.DrawFn, collection: Collection) -> types.Metadata:
     if collection.known_metadata_keys:
         for key in collection.known_metadata_keys.keys():
             if key in metadata:
-                del metadata[key]  # type: ignore
+                del metadata[key]
         # Finally, add in some of the known keys for the collection
         sampling_dict: Dict[str, st.SearchStrategy[Union[str, int, float]]] = {
             k: st.just(v) for k, v in collection.known_metadata_keys.items()
         }
-        metadata.update(draw(st.fixed_dictionaries({}, optional=sampling_dict)))  # type: ignore
+        metadata.update(draw(st.fixed_dictionaries({}, optional=sampling_dict)))
     return metadata
 
 
@@ -340,11 +332,11 @@ def document(draw: st.DrawFn, collection: Collection) -> types.Document:
     else:
         known_words_st = st.text(
             min_size=1,
-            alphabet=st.characters(blacklist_categories=blacklist_categories),  # type: ignore
+            alphabet=st.characters(blacklist_categories=blacklist_categories),
         )
 
     random_words_st = st.text(
-        min_size=1, alphabet=st.characters(blacklist_categories=blacklist_categories)  # type: ignore
+        min_size=1, alphabet=st.characters(blacklist_categories=blacklist_categories)
     )
     words = draw(st.lists(st.one_of(known_words_st, random_words_st), min_size=1))
     return " ".join(words)
@@ -495,20 +487,20 @@ def where_clause(draw: st.DrawFn, collection: Collection) -> types.Where:
         # Add or subtract a small number to avoid floating point rounding errors
         value = value + draw(st.sampled_from([1e-6, -1e-6]))
 
-    op: WhereOperator = draw(st.sampled_from(legal_ops))
+    op: types.WhereOperator = draw(st.sampled_from(legal_ops))
 
     if op is None:
         return {key: value}
-    elif op == "$in":  # type: ignore
+    elif op == "$in":
         if isinstance(value, str) and not value:
             return {}
         return {key: {op: [value, *[draw(opposite_value(value)) for _ in range(3)]]}}
-    elif op == "$nin":  # type: ignore
+    elif op == "$nin":
         if isinstance(value, str) and not value:
             return {}
         return {key: {op: [draw(opposite_value(value)) for _ in range(3)]}}
     else:
-        return {key: {op: value}}  # type: ignore
+        return {key: {op: value}}
 
 
 @st.composite
@@ -524,7 +516,7 @@ def where_doc_clause(draw: st.DrawFn, collection: Collection) -> types.WhereDocu
 def binary_operator_clause(
     base_st: SearchStrategy[types.Where],
 ) -> SearchStrategy[types.Where]:
-    op: SearchStrategy[LogicalOperator] = st.sampled_from(["$and", "$or"])
+    op: SearchStrategy[types.LogicalOperator] = st.sampled_from(["$and", "$or"])
     return st.dictionaries(
         keys=op,
         values=st.lists(base_st, max_size=2, min_size=2),
@@ -536,7 +528,7 @@ def binary_operator_clause(
 def binary_document_operator_clause(
     base_st: SearchStrategy[types.WhereDocument],
 ) -> SearchStrategy[types.WhereDocument]:
-    op: SearchStrategy[LogicalOperator] = st.sampled_from(["$and", "$or"])
+    op: SearchStrategy[types.LogicalOperator] = st.sampled_from(["$and", "$or"])
     return st.dictionaries(
         keys=op,
         values=st.lists(base_st, max_size=2, min_size=2),
diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py
index f97e33aa305..5f8991b00ed 100644
--- a/chromadb/test/property/test_add.py
+++ b/chromadb/test/property/test_add.py
@@ -26,7 +26,7 @@ def test_add(
     # TODO: Generative embedding functions
     coll = api.create_collection(
         name=collection.name,
-        metadata=collection.metadata,  # type: ignore
+        metadata=collection.metadata,
         embedding_function=collection.embedding_function,
     )
     normalized_record_set = invariants.wrap_all(record_set)
@@ -64,7 +64,7 @@ def create_large_recordset(
         "metadatas": metadatas,
         "documents": documents,
     }
-    return cast(strategies.RecordSet, record_set)
+    return record_set
 
 
 @given(collection=collection_st)
@@ -77,7 +77,7 @@ def test_add_large(api: ServerAPI, collection: strategies.Collection) -> None:
     )
     coll = api.create_collection(
         name=collection.name,
-        metadata=collection.metadata,  # type: ignore
+        metadata=collection.metadata,
         embedding_function=collection.embedding_function,
     )
     normalized_record_set = invariants.wrap_all(record_set)
@@ -107,7 +107,7 @@ def test_add_large_exceeding(api: ServerAPI, collection: strategies.Collection)
     )
     coll = api.create_collection(
         name=collection.name,
-        metadata=collection.metadata,  # type: ignore
+        metadata=collection.metadata,
         embedding_function=collection.embedding_function,
     )
     normalized_record_set = invariants.wrap_all(record_set)
@@ -157,7 +157,7 @@ def test_out_of_order_ids(api: ServerAPI) -> None:
     ]
 
     coll = api.create_collection(
-        "test", embedding_function=lambda input: [[1, 2, 3] for _ in input]  # type: ignore
+        "test", embedding_function=lambda texts: [[1, 2, 3] for _ in texts]  # type: ignore
     )
     embeddings: Embeddings = [[1, 2, 3] for _ in ooo_ids]
     coll.add(ids=ooo_ids, embeddings=embeddings)
@@ -174,7 +174,7 @@ def test_add_partial(api: ServerAPI) -> None:
     # TODO: We need to clean up the api types to support this typing
     coll.add(
         ids=["1", "2", "3"],
-        embeddings=[[1, 2, 3], [1, 2, 3], [1, 2, 3]],  # type: ignore
+        embeddings=[[1, 2, 3], [1, 2, 3], [1, 2, 3]],
         metadatas=[{"a": 1}, None, {"a": 3}],  # type: ignore
         documents=["a", "b", None],  # type: ignore
     )
diff --git a/chromadb/test/property/test_cross_version_persist.py b/chromadb/test/property/test_cross_version_persist.py
index 82bfc5f7cda..b5320dfe7a9 100644
--- a/chromadb/test/property/test_cross_version_persist.py
+++ b/chromadb/test/property/test_cross_version_persist.py
@@ -204,8 +204,8 @@ def switch_to_version(version: str) -> ModuleType:
     return chromadb
 
 
-class not_implemented_ef(EmbeddingFunction[Documents]):
-    def __call__(self, input: Documents) -> Embeddings:
+class not_implemented_ef(EmbeddingFunction):
+    def __call__(self, texts: Documents) -> Embeddings:
         assert False, "Embedding function should not be called"
 
 
@@ -314,7 +314,7 @@ def test_cycle_versions(
     system.start()
     coll = api.get_collection(
         name=collection_strategy.name,
-        embedding_function=not_implemented_ef(),  # type: ignore
+        embedding_function=not_implemented_ef(),
     )
     invariants.count(coll, embeddings_strategy)
     invariants.metadatas_match(coll, embeddings_strategy)
diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py
index d6d3e3c30a8..ed3c87ee682 100644
--- a/chromadb/test/test_api.py
+++ b/chromadb/test/test_api.py
@@ -3,7 +3,7 @@
 
 import chromadb
 from chromadb.api.fastapi import FastAPI
-from chromadb.api.types import QueryResult, EmbeddingFunction, Document
+from chromadb.api.types import QueryResult
 from chromadb.config import Settings
 import chromadb.server.fastapi
 import pytest
@@ -91,17 +91,14 @@ def test_persist_index_loading(api_fixture, request):
 
 @pytest.mark.parametrize("api_fixture", [local_persist_api])
 def test_persist_index_loading_embedding_function(api_fixture, request):
-    class TestEF(EmbeddingFunction[Document]):
-        def __call__(self, input):
-            return [[1, 2, 3] for _ in range(len(input))]
-
+    embedding_function = lambda x: [[1, 2, 3] for _ in range(len(x))]  # noqa E731
     api = request.getfixturevalue("local_persist_api")
     api.reset()
-    collection = api.create_collection("test", embedding_function=TestEF())
+    collection = api.create_collection("test", embedding_function=embedding_function)
     collection.add(ids="id1", documents="hello")
 
     api2 = request.getfixturevalue("local_persist_api_cache_bust")
-    collection = api2.get_collection("test", embedding_function=TestEF())
+    collection = api2.get_collection("test", embedding_function=embedding_function)
 
     nn = collection.query(
         query_texts="hello",
@@ -114,17 +111,18 @@ def __call__(self, input):
 
 @pytest.mark.parametrize("api_fixture", [local_persist_api])
 def test_persist_index_get_or_create_embedding_function(api_fixture, request):
-    class TestEF(EmbeddingFunction[Document]):
-        def __call__(self, input):
-            return [[1, 2, 3] for _ in range(len(input))]
-
+    embedding_function = lambda x: [[1, 2, 3] for _ in range(len(x))]  # noqa E731
     api = request.getfixturevalue("local_persist_api")
     api.reset()
-    collection = api.get_or_create_collection("test", embedding_function=TestEF())
+    collection = api.get_or_create_collection(
+        "test", embedding_function=embedding_function
+    )
     collection.add(ids="id1", documents="hello")
 
     api2 = request.getfixturevalue("local_persist_api_cache_bust")
-    collection = api2.get_or_create_collection("test", embedding_function=TestEF())
+    collection = api2.get_or_create_collection(
+        "test", embedding_function=embedding_function
+    )
 
     nn = collection.query(
         query_texts="hello",
diff --git a/chromadb/utils/embedding_functions.py b/chromadb/utils/embedding_functions.py
index 5e38936ef6c..aaef53c01e2 100644
--- a/chromadb/utils/embedding_functions.py
+++ b/chromadb/utils/embedding_functions.py
@@ -1,21 +1,11 @@
 import logging
 
-from chromadb.api.types import (
-    Document,
-    Documents,
-    Embedding,
-    Image,
-    Images,
-    EmbeddingFunction,
-    Embeddings,
-    is_image,
-    is_document,
-)
+from chromadb.api.types import Documents, EmbeddingFunction, Embeddings
 from pathlib import Path
 import os
 import tarfile
 import requests
-from typing import Any, Dict, List, Union, cast
+from typing import Any, Dict, List, cast
 import numpy as np
 import numpy.typing as npt
 import importlib
@@ -31,7 +21,7 @@
 logger = logging.getLogger(__name__)
 
 
-class SentenceTransformerEmbeddingFunction(EmbeddingFunction[Documents]):
+class SentenceTransformerEmbeddingFunction(EmbeddingFunction):
     # Since we do dynamic imports we have to type this as Any
     models: Dict[str, Any] = {}
 
@@ -54,15 +44,15 @@ def __init__(
         self._model = self.models[model_name]
         self._normalize_embeddings = normalize_embeddings
 
-    def __call__(self, input: Documents) -> Embeddings:
+    def __call__(self, texts: Documents) -> Embeddings:
         return self._model.encode(  # type: ignore
-            list(input),
+            list(texts),
             convert_to_numpy=True,
             normalize_embeddings=self._normalize_embeddings,
         ).tolist()
 
 
-class Text2VecEmbeddingFunction(EmbeddingFunction[Documents]):
+class Text2VecEmbeddingFunction(EmbeddingFunction):
     def __init__(self, model_name: str = "shibing624/text2vec-base-chinese"):
         try:
             from text2vec import SentenceModel
@@ -72,11 +62,11 @@ def __init__(self, model_name: str = "shibing624/text2vec-base-chinese"):
             )
         self._model = SentenceModel(model_name_or_path=model_name)
 
-    def __call__(self, input: Documents) -> Embeddings:
-        return self._model.encode(list(input), convert_to_numpy=True).tolist()  # type: ignore # noqa E501
+    def __call__(self, texts: Documents) -> Embeddings:
+        return self._model.encode(list(texts), convert_to_numpy=True).tolist()  # type: ignore # noqa E501
 
 
-class OpenAIEmbeddingFunction(EmbeddingFunction[Documents]):
+class OpenAIEmbeddingFunction(EmbeddingFunction):
     def __init__(
         self,
         api_key: Optional[str] = None,
@@ -135,12 +125,12 @@ def __init__(
         self._client = openai.Embedding
         self._model_name = model_name
 
-    def __call__(self, input: Documents) -> Embeddings:
+    def __call__(self, texts: Documents) -> Embeddings:
         # replace newlines, which can negatively affect performance.
-        input = [t.replace("\n", " ") for t in input]
+        texts = [t.replace("\n", " ") for t in texts]
 
         # Call the OpenAI Embedding API
-        embeddings = self._client.create(input=input, engine=self._model_name)["data"]
+        embeddings = self._client.create(input=texts, engine=self._model_name)["data"]
 
         # Sort resulting embeddings by index
         sorted_embeddings = sorted(embeddings, key=lambda e: e["index"])  # type: ignore
@@ -149,7 +139,7 @@ def __call__(self, input: Documents) -> Embeddings:
         return [result["embedding"] for result in sorted_embeddings]
 
 
-class CohereEmbeddingFunction(EmbeddingFunction[Documents]):
+class CohereEmbeddingFunction(EmbeddingFunction):
     def __init__(self, api_key: str, model_name: str = "large"):
         try:
             import cohere
@@ -161,15 +151,15 @@ def __init__(self, api_key: str, model_name: str = "large"):
         self._client = cohere.Client(api_key)
         self._model_name = model_name
 
-    def __call__(self, input: Documents) -> Embeddings:
+    def __call__(self, texts: Documents) -> Embeddings:
         # Call Cohere Embedding API for each document.
         return [
             embeddings
-            for embeddings in self._client.embed(texts=input, model=self._model_name)
+            for embeddings in self._client.embed(texts=texts, model=self._model_name)
         ]
 
 
-class HuggingFaceEmbeddingFunction(EmbeddingFunction[Documents]):
+class HuggingFaceEmbeddingFunction(EmbeddingFunction):
     """
     This class is used to get embeddings for a list of texts using the HuggingFace API.
     It requires an API key and a model name. The default model name is "sentence-transformers/all-MiniLM-L6-v2".
@@ -195,7 +185,7 @@ def __init__(
         self._session = requests.Session()
         self._session.headers.update({"Authorization": f"Bearer {api_key}"})
 
-    def __call__(self, input: Documents) -> Embeddings:
+    def __call__(self, texts: Documents) -> Embeddings:
         """
         Get the embeddings for a list of texts.
 
@@ -212,11 +202,11 @@ def __call__(self, input: Documents) -> Embeddings:
         """
         # Call HuggingFace Embedding API for each document
         return self._session.post(  # type: ignore
-            self._api_url, json={"inputs": input, "options": {"wait_for_model": True}}
+            self._api_url, json={"inputs": texts, "options": {"wait_for_model": True}}
         ).json()
 
 
-class InstructorEmbeddingFunction(EmbeddingFunction[Documents]):
+class InstructorEmbeddingFunction(EmbeddingFunction):
     # If you have a GPU with at least 6GB try model_name = "hkunlp/instructor-xl" and device = "cuda"
     # for a full list of options: https://github.com/HKUNLP/instructor-embedding#model-list
     def __init__(
@@ -234,11 +224,11 @@ def __init__(
         self._model = INSTRUCTOR(model_name, device=device)
         self._instruction = instruction
 
-    def __call__(self, input: Documents) -> Embeddings:
+    def __call__(self, texts: Documents) -> Embeddings:
         if self._instruction is None:
-            return self._model.encode(input).tolist()  # type: ignore
+            return self._model.encode(texts).tolist()  # type: ignore
 
-        texts_with_instructions = [[self._instruction, text] for text in input]
+        texts_with_instructions = [[self._instruction, text] for text in texts]
         return self._model.encode(texts_with_instructions).tolist()  # type: ignore
 
 
@@ -247,7 +237,7 @@ def __call__(self, input: Documents) -> Embeddings:
 # implements the same functionality as "all-MiniLM-L6-v2" from sentence-transformers.
 # visit https://github.com/chroma-core/onnx-embedding for the source code to generate
 # and verify the ONNX model.
-class ONNXMiniLM_L6_V2(EmbeddingFunction[Documents]):
+class ONNXMiniLM_L6_V2(EmbeddingFunction):
     MODEL_NAME = "all-MiniLM-L6-v2"
     DOWNLOAD_PATH = Path.home() / ".cache" / "chroma" / "onnx_models" / MODEL_NAME
     EXTRACTED_FOLDER_NAME = "onnx"
@@ -384,11 +374,11 @@ def _init_model_and_tokenizer(self) -> None:
                 providers=self._preferred_providers,
             )
 
-    def __call__(self, input: Documents) -> Embeddings:
+    def __call__(self, texts: Documents) -> Embeddings:
         # Only download the model when it is actually used
         self._download_model_if_not_exists()
         self._init_model_and_tokenizer()
-        res = cast(Embeddings, self._forward(input).tolist())
+        res = cast(Embeddings, self._forward(texts).tolist())
         return res
 
     def _download_model_if_not_exists(self) -> None:
@@ -423,14 +413,14 @@ def _download_model_if_not_exists(self) -> None:
                 tar.extractall(path=self.DOWNLOAD_PATH)
 
 
-def DefaultEmbeddingFunction() -> Optional[EmbeddingFunction[Documents]]:
+def DefaultEmbeddingFunction() -> Optional[EmbeddingFunction]:
     if is_thin_client:
         return None
     else:
         return ONNXMiniLM_L6_V2()
 
 
-class GooglePalmEmbeddingFunction(EmbeddingFunction[Documents]):
+class GooglePalmEmbeddingFunction(EmbeddingFunction):
     """To use this EmbeddingFunction, you must have the google.generativeai Python package installed and have a PaLM API key."""
 
     def __init__(self, api_key: str, model_name: str = "models/embedding-gecko-001"):
@@ -451,16 +441,16 @@ def __init__(self, api_key: str, model_name: str = "models/embedding-gecko-001")
         self._palm = palm
         self._model_name = model_name
 
-    def __call__(self, input: Documents) -> Embeddings:
+    def __call__(self, texts: Documents) -> Embeddings:
         return [
             self._palm.generate_embeddings(model=self._model_name, text=text)[
                 "embedding"
             ]
-            for text in input
+            for text in texts
         ]
 
 
-class GoogleVertexEmbeddingFunction(EmbeddingFunction[Documents]):
+class GoogleVertexEmbeddingFunction(EmbeddingFunction):
     # Follow API Quickstart for Google Vertex AI
     # https://cloud.google.com/vertex-ai/docs/generative-ai/start/quickstarts/api-quickstart
     # Information about the text embedding modules in Google Vertex AI
@@ -476,9 +466,9 @@ def __init__(
         self._session = requests.Session()
         self._session.headers.update({"Authorization": f"Bearer {api_key}"})
 
-    def __call__(self, input: Documents) -> Embeddings:
+    def __call__(self, texts: Documents) -> Embeddings:
         embeddings = []
-        for text in input:
+        for text in texts:
             response = self._session.post(
                 self._api_url, json={"instances": [{"content": text}]}
             ).json()
@@ -489,62 +479,6 @@ def __call__(self, input: Documents) -> Embeddings:
         return embeddings
 
 
-class OpenCLIPEmbeddingFunction(EmbeddingFunction[Union[Documents, Images]]):
-    def __init__(
-        self, model_name: str = "ViT-B-32", checkpoint: str = "laion2b_s34b_b79k"
-    ) -> None:
-        try:
-            import open_clip
-        except ImportError:
-            raise ValueError(
-                "The open_clip python package is not installed. Please install it with `pip install open-clip-torch`. https://github.com/mlfoundations/open_clip"
-            )
-        try:
-            self._torch = importlib.import_module("torch")
-        except ImportError:
-            raise ValueError(
-                "The torch python package is not installed. Please install it with `pip install torch`"
-            )
-
-        try:
-            self._PILImage = importlib.import_module("PIL.Image")
-        except ImportError:
-            raise ValueError(
-                "The PIL python package is not installed. Please install it with `pip install pillow`"
-            )
-
-        model, _, preprocess = open_clip.create_model_and_transforms(
-            model_name=model_name, pretrained=checkpoint
-        )
-        self._model = model
-        self._preprocess = preprocess
-        self._tokenizer = open_clip.get_tokenizer(model_name=model_name)
-
-    def _encode_image(self, image: Image) -> Embedding:
-        pil_image = self._PILImage.fromarray(image)
-        with self._torch.no_grad():
-            image_features = self._model.encode_image(
-                self._preprocess(pil_image).unsqueeze(0)
-            )
-            image_features /= image_features.norm(dim=-1, keepdim=True)
-            return cast(Embedding, image_features.squeeze().tolist())
-
-    def _encode_text(self, text: Document) -> Embedding:
-        with self._torch.no_grad():
-            text_features = self._model.encode_text(self._tokenizer(text))
-            text_features /= text_features.norm(dim=-1, keepdim=True)
-            return cast(Embedding, text_features.squeeze().tolist())
-
-    def __call__(self, input: Union[Documents, Images]) -> Embeddings:
-        embeddings: Embeddings = []
-        for item in input:
-            if is_image(item):
-                embeddings.append(self._encode_image(cast(Image, item)))
-            elif is_document(item):
-                embeddings.append(self._encode_text(cast(Document, item)))
-        return embeddings
-
-
 # List of all classes in this module
 _classes = [
     name
diff --git a/multimodal_ef_example.ipynb b/multimodal_ef_example.ipynb
deleted file mode 100644
index 879c04454a5..00000000000
--- a/multimodal_ef_example.ipynb
+++ /dev/null
@@ -1,102 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import chromadb\n",
-    "\n",
-    "client = chromadb.Client()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from chromadb.api.types import Embeddings, Images\n",
-    "from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction\n",
-    "\n",
-    "embedding_function = OpenCLIPEmbeddingFunction()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "collection = client.create_collection('test', embedding_function=embedding_function)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "from PIL import Image\n",
-    "\n",
-    "image = np.array(Image.open('test_img.jpeg'))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "collection.add(ids='a', images=image)\n",
-    "collection.add(ids='b', documents='hello world')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'ids': ['a', 'b'],\n",
-       " 'embeddings': None,\n",
-       " 'metadatas': None,\n",
-       " 'documents': [None, 'hello world']}"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "collection.get(include=['documents'])"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "chroma",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.16"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}