From 56a6a16bc71edb281156cfec18dec2361a8ae4cd Mon Sep 17 00:00:00 2001 From: hammadb Date: Wed, 27 Nov 2024 10:53:27 -0800 Subject: [PATCH] [BUG] Bound tokenizers, patch CVP test --- .../property/test_cross_version_persist.py | 12 ++++++++-- chromadb/test/utils/cross_version.py | 23 +++++++++++++++---- pyproject.toml | 2 +- requirements.txt | 2 +- 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/chromadb/test/property/test_cross_version_persist.py b/chromadb/test/property/test_cross_version_persist.py index 1a2510c5f05..677426a2082 100644 --- a/chromadb/test/property/test_cross_version_persist.py +++ b/chromadb/test/property/test_cross_version_persist.py @@ -22,6 +22,7 @@ import chromadb.test.property.invariants as invariants from packaging import version as packaging_version import re +import sys import multiprocessing from chromadb.config import Settings from chromadb.api.client import Client as ClientCreator @@ -38,7 +39,7 @@ version_re = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+$") # Some modules do not work across versions, since we upgrade our support for them, and should be explicitly reimported in the subprocess -VERSIONED_MODULES = ["pydantic", "numpy"] +VERSIONED_MODULES = ["pydantic", "numpy", "tokenizers"] def versions() -> List[str]: @@ -148,7 +149,14 @@ def configurations(versions: List[str]) -> List[Tuple[str, Settings]]: def version_settings(request) -> Generator[Tuple[str, Settings], None, None]: configuration = request.param version = configuration[0] - install_version(version) + + # Version <3.9 requires bounding tokenizers<=0.20.3 + (major, minor, patch) = sys.version_info[:3] + if major == 3 and minor < 9: + install_version(version, {"tokenizers": "<=0.20.3"}) + else: + install_version(version, {}) + yield configuration # Cleanup the installed version path = get_path_to_version_install(version) diff --git a/chromadb/test/utils/cross_version.py b/chromadb/test/utils/cross_version.py index 737b7f8da97..287aa154b91 100644 --- a/chromadb/test/utils/cross_version.py +++ b/chromadb/test/utils/cross_version.py @@ -3,7 +3,7 @@ import os import tempfile from types import ModuleType -from typing import List +from typing import Dict, List base_install_dir = tempfile.gettempdir() + "/persistence_test_chromadb_versions" @@ -38,16 +38,16 @@ def get_path_to_version_library(version: str) -> str: return get_path_to_version_install(version) + "/chromadb/__init__.py" -def install_version(version: str) -> None: +def install_version(version: str, dep_overrides: Dict[str, str]) -> None: # Check if already installed version_library = get_path_to_version_library(version) if os.path.exists(version_library): return path = get_path_to_version_install(version) - install(f"chromadb=={version}", path) + install(f"chromadb=={version}", path, dep_overrides) -def install(pkg: str, path: str) -> int: +def install(pkg: str, path: str, dep_overrides: Dict[str, str]) -> int: # -q -q to suppress pip output to ERROR level # https://pip.pypa.io/en/stable/cli/pip/#quiet print("Purging pip cache") @@ -60,6 +60,21 @@ def install(pkg: str, path: str) -> int: "purge", ] ) + + for dep, operator_version in dep_overrides.items(): + print(f"Installing {dep} version {operator_version}") + subprocess.check_call( + [ + sys.executable, + "-m", + "pip", + "-q", + "-q", + "install", + f"'{dep}{operator_version}'", + ] + ) + print(f"Installing chromadb version {pkg} to {path}") return subprocess.check_call( [ diff --git a/pyproject.toml b/pyproject.toml index 140a519f083..9fb9759dabd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ dependencies = [ 'opentelemetry-exporter-otlp-proto-grpc>=1.2.0', 'opentelemetry-instrumentation-fastapi>=0.41b0', 'opentelemetry-sdk>=1.2.0', - 'tokenizers >= 0.13.2', + 'tokenizers >= 0.13.2, <= 0.20.3', 'pypika >= 0.48.9', 'tqdm >= 4.65.0', 'overrides >= 7.3.1', diff --git a/requirements.txt b/requirements.txt index b7b621faf2a..19b079af0eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ pypika>=0.48.9 PyYAML>=6.0.0 rich>=10.11.0 tenacity>=8.2.3 -tokenizers>=0.13.2 +tokenizers>=0.13.2,<=0.20.3 tqdm>=4.65.0 typer>=0.9.0 typing_extensions>=4.5.0