Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUG] Bound tokenizers, patch CVP test #3211

Merged
merged 5 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions chromadb/test/client/test_http_client_v1_compatability.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import multiprocessing
from unittest.mock import patch

import sys as pysys
from multiprocessing.connection import Connection

from chromadb.config import System
Expand Down Expand Up @@ -41,7 +41,16 @@ def test_http_client_bw_compatibility() -> None:
port = sys.settings.chroma_server_http_port

old_version = "0.5.11" # Module with known v1 client
install_version(old_version)

# Version <3.9 requires bounding tokenizers<=0.20.3
# TOOD(hammadb): This code is duplicated in test_cross_version_persist.py
# for expediency on 11/27/2024 I am copy pasting rather than refactoring
# to DRY. Refactor later.
(major, minor, _) = pysys.version_info[:3]
if major == 3 and minor < 9:
install_version(old_version, {"tokenizers": "<=0.20.3"})
else:
install_version(old_version, {})

ctx = multiprocessing.get_context("spawn")
conn1, conn2 = multiprocessing.Pipe()
Expand Down
12 changes: 10 additions & 2 deletions chromadb/test/property/test_cross_version_persist.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import chromadb.test.property.invariants as invariants
from packaging import version as packaging_version
import re
import sys
import multiprocessing
from chromadb.config import Settings
from chromadb.api.client import Client as ClientCreator
Expand All @@ -38,7 +39,7 @@
version_re = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+$")

# Some modules do not work across versions, since we upgrade our support for them, and should be explicitly reimported in the subprocess
VERSIONED_MODULES = ["pydantic", "numpy"]
VERSIONED_MODULES = ["pydantic", "numpy", "tokenizers"]


def versions() -> List[str]:
Expand Down Expand Up @@ -148,7 +149,14 @@ def configurations(versions: List[str]) -> List[Tuple[str, Settings]]:
def version_settings(request) -> Generator[Tuple[str, Settings], None, None]:
configuration = request.param
version = configuration[0]
install_version(version)

# Version <3.9 requires bounding tokenizers<=0.20.3
(major, minor, patch) = sys.version_info[:3]
if major == 3 and minor < 9:
install_version(version, {"tokenizers": "<=0.20.3"})
else:
install_version(version, {})

yield configuration
# Cleanup the installed version
path = get_path_to_version_install(version)
Expand Down
31 changes: 14 additions & 17 deletions chromadb/test/utils/cross_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
import tempfile
from types import ModuleType
from typing import List
from typing import Dict, List

base_install_dir = tempfile.gettempdir() + "/persistence_test_chromadb_versions"

Expand Down Expand Up @@ -38,16 +38,16 @@ def get_path_to_version_library(version: str) -> str:
return get_path_to_version_install(version) + "/chromadb/__init__.py"


def install_version(version: str) -> None:
def install_version(version: str, dep_overrides: Dict[str, str]) -> None:
# Check if already installed
version_library = get_path_to_version_library(version)
if os.path.exists(version_library):
return
path = get_path_to_version_install(version)
install(f"chromadb=={version}", path)
install(f"chromadb=={version}", path, dep_overrides)


def install(pkg: str, path: str) -> int:
def install(pkg: str, path: str, dep_overrides: Dict[str, str]) -> int:
# -q -q to suppress pip output to ERROR level
# https://pip.pypa.io/en/stable/cli/pip/#quiet
print("Purging pip cache")
Expand All @@ -60,17 +60,14 @@ def install(pkg: str, path: str) -> int:
"purge",
]
)

command = [sys.executable, "-m", "pip", "-q", "-q", "install", pkg]

for dep, operator_version in dep_overrides.items():
command.append(f"{dep}{operator_version}")

command.append("--no-binary=chroma-hnswlib")
command.append(f"--target={path}")

print(f"Installing chromadb version {pkg} to {path}")
return subprocess.check_call(
[
sys.executable,
"-m",
"pip",
"-q",
"-q",
"install",
pkg,
"--no-binary=chroma-hnswlib",
"--target={}".format(path),
]
)
return subprocess.check_call(command)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ dependencies = [
'opentelemetry-exporter-otlp-proto-grpc>=1.2.0',
'opentelemetry-instrumentation-fastapi>=0.41b0',
'opentelemetry-sdk>=1.2.0',
'tokenizers >= 0.13.2',
'tokenizers >= 0.13.2, <= 0.20.3',
'pypika >= 0.48.9',
'tqdm >= 4.65.0',
'overrides >= 7.3.1',
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pypika>=0.48.9
PyYAML>=6.0.0
rich>=10.11.0
tenacity>=8.2.3
tokenizers>=0.13.2
tokenizers>=0.13.2,<=0.20.3
tqdm>=4.65.0
typer>=0.9.0
typing_extensions>=4.5.0
Expand Down
Loading