Skip to content

Commit

Permalink
[BUG] Bound tokenizers, patch CVP test
Browse files Browse the repository at this point in the history
  • Loading branch information
HammadB committed Nov 27, 2024
1 parent f36c386 commit 56a6a16
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 8 deletions.
12 changes: 10 additions & 2 deletions chromadb/test/property/test_cross_version_persist.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import chromadb.test.property.invariants as invariants
from packaging import version as packaging_version
import re
import sys
import multiprocessing
from chromadb.config import Settings
from chromadb.api.client import Client as ClientCreator
Expand All @@ -38,7 +39,7 @@
version_re = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+$")

# Some modules do not work across versions, since we upgrade our support for them, and should be explicitly reimported in the subprocess
VERSIONED_MODULES = ["pydantic", "numpy"]
VERSIONED_MODULES = ["pydantic", "numpy", "tokenizers"]


def versions() -> List[str]:
Expand Down Expand Up @@ -148,7 +149,14 @@ def configurations(versions: List[str]) -> List[Tuple[str, Settings]]:
def version_settings(request) -> Generator[Tuple[str, Settings], None, None]:
configuration = request.param
version = configuration[0]
install_version(version)

# Version <3.9 requires bounding tokenizers<=0.20.3
(major, minor, patch) = sys.version_info[:3]
if major == 3 and minor < 9:
install_version(version, {"tokenizers": "<=0.20.3"})
else:
install_version(version, {})

yield configuration
# Cleanup the installed version
path = get_path_to_version_install(version)
Expand Down
23 changes: 19 additions & 4 deletions chromadb/test/utils/cross_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
import tempfile
from types import ModuleType
from typing import List
from typing import Dict, List

base_install_dir = tempfile.gettempdir() + "/persistence_test_chromadb_versions"

Expand Down Expand Up @@ -38,16 +38,16 @@ def get_path_to_version_library(version: str) -> str:
return get_path_to_version_install(version) + "/chromadb/__init__.py"


def install_version(version: str) -> None:
def install_version(version: str, dep_overrides: Dict[str, str]) -> None:
# Check if already installed
version_library = get_path_to_version_library(version)
if os.path.exists(version_library):
return
path = get_path_to_version_install(version)
install(f"chromadb=={version}", path)
install(f"chromadb=={version}", path, dep_overrides)


def install(pkg: str, path: str) -> int:
def install(pkg: str, path: str, dep_overrides: Dict[str, str]) -> int:
# -q -q to suppress pip output to ERROR level
# https://pip.pypa.io/en/stable/cli/pip/#quiet
print("Purging pip cache")
Expand All @@ -60,6 +60,21 @@ def install(pkg: str, path: str) -> int:
"purge",
]
)

for dep, operator_version in dep_overrides.items():
print(f"Installing {dep} version {operator_version}")
subprocess.check_call(
[
sys.executable,
"-m",
"pip",
"-q",
"-q",
"install",
f"'{dep}{operator_version}'",
]
)

print(f"Installing chromadb version {pkg} to {path}")
return subprocess.check_call(
[
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ dependencies = [
'opentelemetry-exporter-otlp-proto-grpc>=1.2.0',
'opentelemetry-instrumentation-fastapi>=0.41b0',
'opentelemetry-sdk>=1.2.0',
'tokenizers >= 0.13.2',
'tokenizers >= 0.13.2, <= 0.20.3',
'pypika >= 0.48.9',
'tqdm >= 4.65.0',
'overrides >= 7.3.1',
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pypika>=0.48.9
PyYAML>=6.0.0
rich>=10.11.0
tenacity>=8.2.3
tokenizers>=0.13.2
tokenizers>=0.13.2,<=0.20.3
tqdm>=4.65.0
typer>=0.9.0
typing_extensions>=4.5.0
Expand Down

0 comments on commit 56a6a16

Please sign in to comment.