diff --git a/chromadb/test/client/test_http_client_v1_compatability.py b/chromadb/test/client/test_http_client_v1_compatability.py index 9e49f4fb61e..983770ee7ad 100644 --- a/chromadb/test/client/test_http_client_v1_compatability.py +++ b/chromadb/test/client/test_http_client_v1_compatability.py @@ -1,6 +1,6 @@ import multiprocessing from unittest.mock import patch - +import sys as pysys from multiprocessing.connection import Connection from chromadb.config import System @@ -41,7 +41,16 @@ def test_http_client_bw_compatibility() -> None: port = sys.settings.chroma_server_http_port old_version = "0.5.11" # Module with known v1 client - install_version(old_version) + + # Version <3.9 requires bounding tokenizers<=0.20.3 + # TOOD(hammadb): This code is duplicated in test_cross_version_persist.py + # for expediency on 11/27/2024 I am copy pasting rather than refactoring + # to DRY. Refactor later. + (major, minor, _) = pysys.version_info[:3] + if major == 3 and minor < 9: + install_version(old_version, {"tokenizers": "<=0.20.3"}) + else: + install_version(old_version, {}) ctx = multiprocessing.get_context("spawn") conn1, conn2 = multiprocessing.Pipe() diff --git a/chromadb/test/property/test_cross_version_persist.py b/chromadb/test/property/test_cross_version_persist.py index 1a2510c5f05..677426a2082 100644 --- a/chromadb/test/property/test_cross_version_persist.py +++ b/chromadb/test/property/test_cross_version_persist.py @@ -22,6 +22,7 @@ import chromadb.test.property.invariants as invariants from packaging import version as packaging_version import re +import sys import multiprocessing from chromadb.config import Settings from chromadb.api.client import Client as ClientCreator @@ -38,7 +39,7 @@ version_re = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+$") # Some modules do not work across versions, since we upgrade our support for them, and should be explicitly reimported in the subprocess -VERSIONED_MODULES = ["pydantic", "numpy"] +VERSIONED_MODULES = ["pydantic", "numpy", "tokenizers"] def versions() -> List[str]: @@ -148,7 +149,14 @@ def configurations(versions: List[str]) -> List[Tuple[str, Settings]]: def version_settings(request) -> Generator[Tuple[str, Settings], None, None]: configuration = request.param version = configuration[0] - install_version(version) + + # Version <3.9 requires bounding tokenizers<=0.20.3 + (major, minor, patch) = sys.version_info[:3] + if major == 3 and minor < 9: + install_version(version, {"tokenizers": "<=0.20.3"}) + else: + install_version(version, {}) + yield configuration # Cleanup the installed version path = get_path_to_version_install(version) diff --git a/chromadb/test/utils/cross_version.py b/chromadb/test/utils/cross_version.py index 737b7f8da97..6480803f2fa 100644 --- a/chromadb/test/utils/cross_version.py +++ b/chromadb/test/utils/cross_version.py @@ -3,7 +3,7 @@ import os import tempfile from types import ModuleType -from typing import List +from typing import Dict, List base_install_dir = tempfile.gettempdir() + "/persistence_test_chromadb_versions" @@ -38,16 +38,16 @@ def get_path_to_version_library(version: str) -> str: return get_path_to_version_install(version) + "/chromadb/__init__.py" -def install_version(version: str) -> None: +def install_version(version: str, dep_overrides: Dict[str, str]) -> None: # Check if already installed version_library = get_path_to_version_library(version) if os.path.exists(version_library): return path = get_path_to_version_install(version) - install(f"chromadb=={version}", path) + install(f"chromadb=={version}", path, dep_overrides) -def install(pkg: str, path: str) -> int: +def install(pkg: str, path: str, dep_overrides: Dict[str, str]) -> int: # -q -q to suppress pip output to ERROR level # https://pip.pypa.io/en/stable/cli/pip/#quiet print("Purging pip cache") @@ -60,17 +60,14 @@ def install(pkg: str, path: str) -> int: "purge", ] ) + + command = [sys.executable, "-m", "pip", "-q", "-q", "install", pkg] + + for dep, operator_version in dep_overrides.items(): + command.append(f"{dep}{operator_version}") + + command.append("--no-binary=chroma-hnswlib") + command.append(f"--target={path}") + print(f"Installing chromadb version {pkg} to {path}") - return subprocess.check_call( - [ - sys.executable, - "-m", - "pip", - "-q", - "-q", - "install", - pkg, - "--no-binary=chroma-hnswlib", - "--target={}".format(path), - ] - ) + return subprocess.check_call(command) diff --git a/pyproject.toml b/pyproject.toml index 140a519f083..9fb9759dabd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ dependencies = [ 'opentelemetry-exporter-otlp-proto-grpc>=1.2.0', 'opentelemetry-instrumentation-fastapi>=0.41b0', 'opentelemetry-sdk>=1.2.0', - 'tokenizers >= 0.13.2', + 'tokenizers >= 0.13.2, <= 0.20.3', 'pypika >= 0.48.9', 'tqdm >= 4.65.0', 'overrides >= 7.3.1', diff --git a/requirements.txt b/requirements.txt index b7b621faf2a..19b079af0eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ pypika>=0.48.9 PyYAML>=6.0.0 rich>=10.11.0 tenacity>=8.2.3 -tokenizers>=0.13.2 +tokenizers>=0.13.2,<=0.20.3 tqdm>=4.65.0 typer>=0.9.0 typing_extensions>=4.5.0