From 81dfc1a89fcc501e158568204fb48431a18c50c7 Mon Sep 17 00:00:00 2001 From: Ana Caklovic Date: Mon, 20 May 2024 11:14:45 -0700 Subject: [PATCH 01/12] tests for services --- morpheus.code-workspace | 6 +- .../llm/services/nvfoundation_llm_service.py | 35 ++-- morpheus/service/vdb/faiss_vdb_service.py | 86 +++++----- tests/conftest.py | 36 ++++- .../services/test_nvfoundation_llm_client.py | 142 ++++++++++++++++ tests/test_faiss_vector_db_service.py | 152 ++++++++++++++++++ 6 files changed, 401 insertions(+), 56 deletions(-) create mode 100644 tests/llm/services/test_nvfoundation_llm_client.py create mode 100644 tests/test_faiss_vector_db_service.py diff --git a/morpheus.code-workspace b/morpheus.code-workspace index cbeadce076..f8886b8ac6 100644 --- a/morpheus.code-workspace +++ b/morpheus.code-workspace @@ -27,6 +27,7 @@ "launch": { "compounds": [], "configurations": [ + { "args": [ "--log_level=DEBUG", @@ -694,7 +695,7 @@ "tests" ], "python.testing.pytestEnabled": true, - "python.testing.unittestEnabled": false, + "python.testing.unittestEnabled": true, "rewrap.wrappingColumn": 120, "testMate.cpp.test.advancedExecutables": [ { @@ -730,6 +731,7 @@ }, "yapf.args": [ "--style=${workspaceFolder}/setup.cfg" - ] + ], + "python.analysis.inlayHints.pytestParameters": true } } diff --git a/morpheus/llm/services/nvfoundation_llm_service.py b/morpheus/llm/services/nvfoundation_llm_service.py index 9be5130bf9..bde0cfd4b8 100644 --- a/morpheus/llm/services/nvfoundation_llm_service.py +++ b/morpheus/llm/services/nvfoundation_llm_service.py @@ -42,7 +42,7 @@ class NVFoundationLLMClient(LLMClient): `NeMoLLMService.get_client` method. Parameters ---------- - parent : NeMoLLMService + parent : NVFoundationMService The parent service for this client. model_name : str The name of the model to interact with. @@ -63,7 +63,10 @@ def __init__(self, parent: "NVFoundationLLMService", *, model_name: str, **model self._model_kwargs = model_kwargs self._prompt_key = "prompt" - self._client = ChatNVIDIA(client=self._parent._nve_client, model=model_name, **model_kwargs) + self._client = ChatNVIDIA(api_key=self._parent._api_key, + base_url=self._parent._base_url, + model=model_name, + **model_kwargs) # type: ignore def get_input_names(self) -> list[str]: schema = self._client.get_input_schema() @@ -144,24 +147,30 @@ class NVFoundationLLMService(LLMService): variable. If neither are present `https://api.nvcf.nvidia.com/v2` will be used., by default None """ - def __init__(self, *, api_key: str = None, base_url: str = None) -> None: + def __init__(self, *, api_key: str = None, base_url: str = None, **model_kwargs) -> None: if IMPORT_EXCEPTION is not None: raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION super().__init__() - self._api_key = api_key if base_url is None: - self._base_url = os.getenv('NVIDIA_API_BASE', 'https://api.nvcf.nvidia.com/v2') + self._base_url = os.getenv('NVIDIA_API_BASE', "https://api.nvcf.nvidia.com/v2/nvcf") else: self._base_url = base_url - self._nve_client = NVEModel( - nvidia_api_key=self._api_key, - fetch_url_format=f"{self._base_url}/nvcf/pexec/status/", - call_invoke_base=f"{self._base_url}/nvcf/pexec/functions", - func_list_format=f"{self._base_url}/nvcf/functions", - ) # type: ignore + if "NVIDIA_API_KEY" in os.environ: + self._api_key = os.getenv('NVIDIA_API_KEY') + else: + self._api_key = api_key + + self._default_model_kwargs = model_kwargs + + def _merge_model_kwargs(self, model_kwargs: dict) -> dict: + return {**self._default_model_kwargs, **model_kwargs} + + @property + def api_key(self): + return self._api_key def get_client(self, *, model_name: str, **model_kwargs) -> NVFoundationLLMClient: """ @@ -174,4 +183,6 @@ def get_client(self, *, model_name: str, **model_kwargs) -> NVFoundationLLMClien Additional keyword arguments to pass to the model when generating text. """ - return NVFoundationLLMClient(self, model_name=model_name, **model_kwargs) + final_model_kwargs = self._merge_model_kwargs(model_kwargs) + + return NVFoundationLLMClient(self, model_name=model_name, **final_model_kwargs) diff --git a/morpheus/service/vdb/faiss_vdb_service.py b/morpheus/service/vdb/faiss_vdb_service.py index 81f63aef5b..30f6d7d766 100644 --- a/morpheus/service/vdb/faiss_vdb_service.py +++ b/morpheus/service/vdb/faiss_vdb_service.py @@ -21,7 +21,10 @@ import typing from functools import wraps +import numpy as np import pandas as pd +from langchain.docstore.document import Document +from langchain_community.vectorstores import FAISS import cudf @@ -81,7 +84,9 @@ def insert(self, data: list[list] | list[dict], **kwargs: dict[str, typing.Any]) dict Returns response content as a dictionary. """ - raise NotImplementedError("Insert operation is not supported in FAISS") + self._index.add_embeddings(data) + return {"status": "success"} + #return list_of_ids def insert_dataframe(self, df: typing.Union[cudf.DataFrame, pd.DataFrame], **kwargs: dict[str, typing.Any]) -> dict: """ @@ -149,7 +154,7 @@ async def similarity_search(self, k: int = 4, **kwargs: dict[str, typing.Any]) -> list[list[dict]]: """ - Perform a similarity search within the collection. + Perform a similarity search within the FAISS docstore (asimilarity_search_by_vector returns docs most similar to embedding vector asynchronously). Parameters ---------- @@ -211,7 +216,7 @@ def delete_by_keys(self, keys: int | str | list, **kwargs: dict[str, typing.Any] def delete(self, expr: str, **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]: """ - Delete vectors from the collection using expressions. + Delete vectors by giving a list of IDs. Parameters ---------- @@ -225,7 +230,8 @@ def delete(self, expr: str, **kwargs: dict[str, typing.Any]) -> dict[str, typing dict[str, typing.Any] Returns result of the given keys that are deleted from the collection. """ - raise NotImplementedError("Delete operation is not supported in FAISS") + self._index.delete(expr) + return {"status": "success"} def retrieve_by_keys(self, keys: int | str | list, **kwargs: dict[str, typing.Any]) -> list[typing.Any]: """ @@ -260,7 +266,9 @@ def count(self, **kwargs: dict[str, typing.Any]) -> int: int Returns number of entities in the collection. """ - raise NotImplementedError("Count operation is not supported in FAISS") + docstore = self._parent._local_dir + count = len(docstore) + return count def drop(self, **kwargs: dict[str, typing.Any]) -> None: """ @@ -311,19 +319,28 @@ def load_resource(self, name: str = "index", **kwargs: dict[str, typing.Any]) -> def has_store_object(self, name: str) -> bool: """ - Check if a collection exists in the Milvus vector database. + Check if specific index file name exists by attempting to load FAISS index, docstore, and index_to_docstore_id from disk with the index file name. Parameters ---------- name : str - Name of the collection to check. + Name of the FAISS index file to check. Returns ------- bool - True if the collection exists, False otherwise. - """ - return self._client.has_collection(collection_name=name) + True if the file exists, False otherwise. + """ + try: + FAISS.load_local(folder_path=self._local_dir, + embeddings=self._embeddings, + index_name=name, + allow_dangerous_deserialization=True) + return True + except Exception as e: + print(f"Failed to load FAISS with the given index file name: {e}") + # Return False if given index file name cannot be loaded + return False def list_store_objects(self, **kwargs: dict[str, typing.Any]) -> list[str]: """ @@ -362,42 +379,28 @@ def create(self, name: str, overwrite: bool = False, **kwargs: dict[str, typing. ValueError If the provided schema fields configuration is empty. """ - logger.debug("Creating collection: %s, overwrite=%s, kwargs=%s", name, overwrite, kwargs) - - # Preserve original configuration. - collection_conf = copy.deepcopy(kwargs) + # can create with: from_embeddings, from_texts, or from_documents - auto_id = collection_conf.get("auto_id", False) - index_conf = collection_conf.get("index_conf", None) - partition_conf = collection_conf.get("partition_conf", None) - - schema_conf = collection_conf.get("schema_conf") - schema_fields_conf = schema_conf.pop("schema_fields") - - if not self.has_store_object(name) or overwrite: - if overwrite and self.has_store_object(name): - self.drop(name) - - if len(schema_fields_conf) == 0: - raise ValueError("Cannot create collection as provided empty schema_fields configuration") + resource = self.load_resource(name) - schema_fields = [FieldSchemaEncoder.from_dict(field_conf) for field_conf in schema_fields_conf] + if "documents" in kwargs: + documents = kwargs["documents"] + return resource._index.from_documents(documents, self._embeddings) - schema = pymilvus.CollectionSchema(fields=schema_fields, **schema_conf) + elif "text_embeddings" in kwargs: + text_embeddings = kwargs["text_embeddings"] + metadatas = kwargs.get("metadatas") + ids = kwargs.get("ids") + return resource._index.from_embeddings(text_embeddings, self._embeddings, metadatas, ids) - self._client.create_collection_with_schema(collection_name=name, - schema=schema, - index_params=index_conf, - auto_id=auto_id, - shards_num=collection_conf.get("shards", 2), - consistency_level=collection_conf.get( - "consistency_level", "Strong")) + elif "texts" in kwargs: + texts = kwargs["texts"] + metadatas = kwargs.get("metadatas") + ids = kwargs.get("ids") + return resource._index.from_texts(texts, self._embeddings, metadatas, ids) - if partition_conf: - timeout = partition_conf.get("timeout", 1.0) - # Iterate over each partition configuration - for part in partition_conf["partitions"]: - self._client.create_partition(collection_name=name, partition_name=part["name"], timeout=timeout) + else: + raise ValueError("You must provide documents, texts, or text_embeddings along with embeddings in kwargs.") def create_from_dataframe(self, name: str, @@ -468,6 +471,7 @@ def insert(self, name: str, data: list[list] | list[dict], **kwargs: dict[str, """ resource = self.load_resource(name) + return resource.insert(data, **kwargs) def insert_dataframe(self, diff --git a/tests/conftest.py b/tests/conftest.py index 0a33fa7891..2fe3f923e8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,10 +25,13 @@ import types import typing import warnings +from pathlib import Path from unittest import mock import pytest import requests +from langchain_community.vectorstores import FAISS # added +from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings # added from _utils import import_or_skip from _utils.kafka import _init_pytest_kafka @@ -483,7 +486,7 @@ def seed_fn(seed=42): @pytest.fixture(scope="function") -def chdir_tmpdir(request: pytest.FixtureRequest, tmp_path): +def chdir_tmpdir(request: pytest.FixtureRequest, tmp_path: Path): """ Executes a test in the tmp_path directory """ @@ -1008,6 +1011,25 @@ def milvus_server_uri(tmp_path_factory): yield uri +@pytest.fixture(scope="session") +def faiss_test_dir(): + # Get oath for FAISS directory + tmp_dir_path = os.environ.get('FAISS_DIR') + if tmp_dir_path is None: + raise ValueError("set FAISS_DIR to directory with FAISS DB") + + # Can change embedding model + embeddings = NVIDIAEmbeddings(model="nvolveqa_40k") + tmp_dir = FAISS.load_local(tmp_dir_path, embeddings=embeddings, allow_dangerous_deserialization=True) + yield tmp_dir + + +@pytest.fixture(scope="session") +def test_embeddings(): + embeddings = NVIDIAEmbeddings(model="nvolveqa_40k") + yield embeddings + + @pytest.fixture(scope="session", name="milvus_data") def milvus_data_fixture(): inital_data = [{"id": i, "embedding": [i / 10.0] * 3, "age": 25 + i} for i in range(10)] @@ -1037,6 +1059,18 @@ def nemollm_fixture(fail_missing: bool): yield import_or_skip("nemollm", reason=skip_reason, fail_missing=fail_missing) +@pytest.fixture(name="nvfoundationllm", scope='session') +def nvfoundationllm_fixture(fail_missing: bool): + """ + Fixture to ensure nvfoundationllm is installed + """ + skip_reason = ( + "Tests for NVFoundation require the langchain-nvidia-ai-endpoints package to be installed, to install this run:\n" + "`conda env update --solver=libmamba -n morpheus " + "--file conda/environments/dev_cuda-121_arch-x86_64.yaml --prune`") + yield import_or_skip("langchain_nvidia_ai_endpoints", reason=skip_reason, fail_missing=fail_missing) + + @pytest.fixture(name="openai", scope='session') def openai_fixture(fail_missing: bool): """ diff --git a/tests/llm/services/test_nvfoundation_llm_client.py b/tests/llm/services/test_nvfoundation_llm_client.py new file mode 100644 index 0000000000..72218d2880 --- /dev/null +++ b/tests/llm/services/test_nvfoundation_llm_client.py @@ -0,0 +1,142 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from unittest import mock + +import pytest +from langchain_core.messages import BaseMessage +from langchain_core.messages import ChatMessage +from langchain_core.outputs import ChatGeneration +from langchain_core.outputs import LLMResult + +from morpheus.llm.services.llm_service import LLMClient +from morpheus.llm.services.nvfoundation_llm_service import NVFoundationLLMClient +from morpheus.llm.services.nvfoundation_llm_service import NVFoundationLLMService + + +@pytest.mark.usefixtures("restore_environ") +@pytest.mark.parametrize("api_key", [None, "test_api_key"]) +@pytest.mark.parametrize("set_env", [True, False]) +def test_constructor(mock_nvfoundationllm: mock.MagicMock, api_key: str, set_env: bool): + """ + Test that the constructor prefers explicit arguments over environment variables. + """ + env_api_key = "test_env_api_key" + + if set_env: + os.environ["NVIDIA_API_KEY"] = env_api_key + + service = NVFoundationLLMService(api_key=api_key) + + expected_api_key = api_key if "NVIDIA_API_KEY" not in os.environ else env_api_key + + assert service.api_key == expected_api_key + + +def test_get_client(): + service = NVFoundationLLMService(api_key="test_api_key") + client = service.get_client(model_name="test_model") + + assert isinstance(client, NVFoundationLLMClient) + + +def test_model_kwargs(): + service = NVFoundationLLMService(arg1="default_value1", arg2="default_value2") + + client = service.get_client(model_name="model_name", arg2="value2") + + assert client.model_kwargs["arg1"] == "default_value1" + assert client.model_kwargs["arg2"] == "value2" + + +def test_get_input_names(): + client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model", additional_arg="test_arg") + + assert client.get_input_names() == ["prompt"] + + +def test_generate(): + with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.generate_prompt", autospec=True) as mock_nvfoundationllm: + + def mock_generation_side_effect(*args, **kwargs): + return LLMResult(generations=[[ + ChatGeneration(message=ChatMessage(content=x.text, role="assistant")) for x in kwargs["prompts"] + ]]) + + mock_nvfoundationllm.side_effect = mock_generation_side_effect + + client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model") + assert client.generate(prompt="test_prompt") == "test_prompt" + + +def test_generate_batch(): + + with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.generate_prompt", autospec=True) as mock_nvfoundationllm: + + def mock_generation_side_effect(*args, **kwargs): + return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))] + for x in kwargs["prompts"]]) + + mock_nvfoundationllm.side_effect = mock_generation_side_effect + + client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model") + + assert client.generate_batch({'prompt': ["prompt1", "prompt2"]}) == ["prompt1", "prompt2"] + + +async def test_generate_async(): + + with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.agenerate_prompt", autospec=True) as mock_nvfoundationllm: + + def mock_generation_side_effect(*args, **kwargs): + return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))] + for x in kwargs["prompts"]]) + + mock_nvfoundationllm.side_effect = mock_generation_side_effect + + client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model") + + assert await client.generate_async(prompt="test_prompt") == "test_prompt" + + +async def test_generate_batch_async(): + + with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.agenerate_prompt", autospec=True) as mock_nvfoundationllm: + + def mock_generation_side_effect(*args, **kwargs): + return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))] + for x in kwargs["prompts"]]) + + mock_nvfoundationllm.side_effect = mock_generation_side_effect + + client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model") + + assert await client.generate_batch_async({'prompt': ["prompt1", "prompt2"]}) + + +async def test_generate_batch_async_error(): + with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.agenerate_prompt", autospec=True) as mock_nvfoundationllm: + + def mock_generation_side_effect(*args, **kwargs): + return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))] + for x in kwargs["prompts"]]) + + mock_nvfoundationllm.side_effect = mock_generation_side_effect + + client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model") + + with pytest.raises(RuntimeError, match="unittest"): + await client.generate_batch_async({'prompt': ["prompt1", "prompt2"]}) diff --git a/tests/test_faiss_vector_db_service.py b/tests/test_faiss_vector_db_service.py new file mode 100644 index 0000000000..9217d5f7f8 --- /dev/null +++ b/tests/test_faiss_vector_db_service.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pathlib import Path +from typing import Union + +import numpy as np +import pytest +from langchain_community.docstore.in_memory import InMemoryDocstore +from langchain_community.vectorstores import FAISS +from langchain_core.documents import Document +from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings + +import cudf + +from morpheus.service.vdb.faiss_vdb_service import FaissVectorDBResourceService +from morpheus.service.vdb.faiss_vdb_service import FaissVectorDBService + +# create FAISS docstore for testing +texts = ["for", "the", "test"] +embeddings = NVIDIAEmbeddings(model="nvolveqa_40k") +ids = ["a", "b", "c"] +create_store = FAISS.from_texts(texts, embeddings, ids=ids) +index_name = "index" +tmp_dir_path = "/workspace/.tmp/faiss_test_index" +create_store.save_local(tmp_dir_path, index_name) +faiss_service = FaissVectorDBService(local_dir=tmp_dir_path, embeddings=embeddings) + + +@pytest.fixture(scope="module", name="faiss_service") +def faiss_service_fixture(faiss_test_dir: str, test_embeddings: list): + # Fixture for FAISS service; can edit FAISS docstore instantiated outside fixture if need to change + # embedding model, et. + service = FaissVectorDBService(local_dir=faiss_test_dir, embeddings=test_embeddings) + yield service + + +def test_load_resource(faiss_service: FaissVectorDBService): + resource = faiss_service.load_resource(name="index") + assert isinstance(resource, FaissVectorDBResourceService) + assert resource._name == "index" + + +def test_count(faiss_service: FaissVectorDBService): + collection = "index" + count = faiss_service.count(collection) + assert count == len(faiss_service._local_dir) + + +def test_insert(): + # Test for inserting embeddings (not docs, texts) into docsotre + vector = NVIDIAEmbeddings(model="nvolveqa_40k").embed_query("hi") + test_data = list(iter([("hi", vector)])) + docstore_name = "index" + response = faiss_service.insert(name=docstore_name, data=test_data) + assert response == {"status": "success"} + + +def test_delete(): + # specify name of docstore and ID to delete + docstore_name = "index" + delete_id = "a" + response_delete = faiss_service.delete(name=docstore_name, expr=delete_id) + assert response_delete == {"status": "success"} + + +async def test_similarity_search(): + index_to_id = create_store.index_to_docstore_id + in_mem_docstore = InMemoryDocstore({ + index_to_id[0]: Document(page_content="for"), + index_to_id[1]: Document(page_content="the"), + index_to_id[2]: Document(page_content="test"), + }) + + assert create_store.docstore.__dict__ == in_mem_docstore.__dict__ + + query_vec = await embeddings.aembed_query(text="for") + output = await create_store.asimilarity_search_by_vector(query_vec, k=1) + + assert output == [Document(page_content="for")] + + +def test_has_store_object(): + # create FAISS docstore to test with + object_store = FAISS.from_texts(texts, embeddings, ids=ids) + object_name = "store_object_index" + object_store.save_local(tmp_dir_path, object_name) + + # attempt to load docstore with given index name + load_attempt = faiss_service.has_store_object(object_name) + assert load_attempt is True + + # attempt to load docstore with wrong index name + object_name = "wrong_index_name" + load_attempt = faiss_service.has_store_object(object_name) + assert load_attempt is False + + +def test_create(): + # Test creating docstore from embeddings + vector = NVIDIAEmbeddings(model="nvolveqa_40k").embed_query("hi") + test_embedding = list(iter([("hi", vector)])) + docstore_name = "index" + embeddings_docstore = faiss_service.create(name=docstore_name, text_embeddings=test_embedding) + + # save created docstore + index_name_embeddings = "embeddings_index" + embeddings_docstore.save_local(tmp_dir_path, index_name_embeddings) + + # attempt to load created docstore + load_attempt = faiss_service.has_store_object(index_name_embeddings) + + assert load_attempt is True + + # Test creating docstore from texts + test_texts = ["for", "the", "test"] + texts_docstore = faiss_service.create(name=docstore_name, texts=test_texts) + + # save created docstore + index_name_texts = "texts_index" + texts_docstore.save_local(tmp_dir_path, index_name_texts) + + # attempt to load created docstore + load_attempt = faiss_service.has_store_object(index_name_texts) + + assert load_attempt is True + + # Test creating docstore from documents + test_documents = [Document(page_content="This is for the test.")] + docs_docstore = faiss_service.create(name=docstore_name, documents=test_documents) + + # save created docstore + index_name_docs = "docs_index" + docs_docstore.save_local(tmp_dir_path, index_name_docs) + + # attempt to load created docstore + load_attempt = faiss_service.has_store_object(index_name_docs) + + assert load_attempt is True From 1f1db6cacbd3102940e60f6ed7f5ddfadac23bac Mon Sep 17 00:00:00 2001 From: Ana Caklovic Date: Wed, 22 May 2024 19:55:09 +0000 Subject: [PATCH 02/12] faiss fixes --- .../llm/services/nvfoundation_llm_service.py | 27 +++++- morpheus/service/vdb/faiss_vdb_service.py | 83 ++++--------------- tests/_utils/faiss.py | 28 +++++++ tests/conftest.py | 20 ++--- .../services/test_nvfoundation_llm_client.py | 14 ++-- tests/test_faiss_vector_db_service.py | 66 ++++++++------- 6 files changed, 119 insertions(+), 119 deletions(-) create mode 100644 tests/_utils/faiss.py diff --git a/morpheus/llm/services/nvfoundation_llm_service.py b/morpheus/llm/services/nvfoundation_llm_service.py index d808c6f43d..63b8a10643 100644 --- a/morpheus/llm/services/nvfoundation_llm_service.py +++ b/morpheus/llm/services/nvfoundation_llm_service.py @@ -14,6 +14,7 @@ import logging import os +import typing from morpheus.llm.services.llm_service import LLMClient from morpheus.llm.services.llm_service import LLMService @@ -22,7 +23,8 @@ IMPORT_EXCEPTION = None IMPORT_ERROR_MESSAGE = ( - "The `langchain-nvidia-ai-endpoints` package was not found. Install it and other additional dependencies by running the following command:\n" + "The `langchain-nvidia-ai-endpoints` package was not found. Install it and other additional dependencies by " + "running the following command:" "`conda env update --solver=libmamba -n morpheus " "--file morpheus/conda/environments/dev_cuda-121_arch-x86_64.yaml --prune`") @@ -77,8 +79,6 @@ def __init__(self, parent: "NVFoundationLLMService", *, model_name: str, **model self._client = ChatNVIDIA(**{**chat_kwargs, **model_kwargs}) # type: ignore def get_input_names(self) -> list[str]: - schema = self._client.get_input_schema() - return [self._prompt_key] def generate(self, **input_dict) -> str: @@ -118,6 +118,8 @@ def generate_batch(self, inputs: dict[str, list], **kwargs) -> list[str]: ---------- inputs : dict Inputs containing prompt data. + **kwargs : dict + Additional keyword arguments for generate batch. """ prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]] @@ -127,13 +129,30 @@ def generate_batch(self, inputs: dict[str, list], **kwargs) -> list[str]: return [g[0].text for g in responses.generations] - async def generate_batch_async(self, inputs: dict[str, list], **kwargs) -> list[str]: + @typing.overload + async def generate_batch_async(self, + inputs: dict[str, list], + return_exceptions: typing.Literal[True] = True) -> list[str | BaseException]: + ... + + @typing.overload + async def generate_batch_async(self, + inputs: dict[str, list], + return_exceptions: typing.Literal[False] = False) -> list[str]: + ... + + async def generate_batch_async(self, + inputs: dict[str, list], + return_exceptions=False) -> list[str] | list[str | BaseException]: """ Issue an asynchronous request to generate a list of responses based on a list of prompts. + Parameters ---------- inputs : dict Inputs containing prompt data. + return_exceptions : bool + Whether to return exceptions in the output list or raise them immediately. """ prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]] diff --git a/morpheus/service/vdb/faiss_vdb_service.py b/morpheus/service/vdb/faiss_vdb_service.py index 30f6d7d766..a23d254819 100644 --- a/morpheus/service/vdb/faiss_vdb_service.py +++ b/morpheus/service/vdb/faiss_vdb_service.py @@ -13,18 +13,11 @@ # limitations under the License. import asyncio -import copy -import json import logging -import threading import time import typing -from functools import wraps -import numpy as np import pandas as pd -from langchain.docstore.document import Document -from langchain_community.vectorstores import FAISS import cudf @@ -34,7 +27,7 @@ logger = logging.getLogger(__name__) IMPORT_EXCEPTION = None -IMPORT_ERROR_MESSAGE = "MilvusVectorDBResourceService requires the milvus and pymilvus packages to be installed." +IMPORT_ERROR_MESSAGE = "FaissDBResourceService requires the FAISS." try: from langchain.vectorstores.faiss import FAISS @@ -86,7 +79,6 @@ def insert(self, data: list[list] | list[dict], **kwargs: dict[str, typing.Any]) """ self._index.add_embeddings(data) return {"status": "success"} - #return list_of_ids def insert_dataframe(self, df: typing.Union[cudf.DataFrame, pd.DataFrame], **kwargs: dict[str, typing.Any]) -> dict: """ @@ -154,7 +146,8 @@ async def similarity_search(self, k: int = 4, **kwargs: dict[str, typing.Any]) -> list[list[dict]]: """ - Perform a similarity search within the FAISS docstore (asimilarity_search_by_vector returns docs most similar to embedding vector asynchronously). + Perform a similarity search within the FAISS docstore (asimilarity_search_by_vector + returns docs most similar to embedding vector asynchronously). Parameters ---------- @@ -305,7 +298,7 @@ class FaissVectorDBService(VectorDBService): _cleanup_interval = 600 # 10mins _last_cleanup_time = time.time() - def __init__(self, local_dir: str, embeddings, **kwargs: dict[str, typing.Any]): + def __init__(self, local_dir: str, embeddings): if IMPORT_EXCEPTION is not None: raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION @@ -319,7 +312,8 @@ def load_resource(self, name: str = "index", **kwargs: dict[str, typing.Any]) -> def has_store_object(self, name: str) -> bool: """ - Check if specific index file name exists by attempting to load FAISS index, docstore, and index_to_docstore_id from disk with the index file name. + Check if specific index file name exists by attempting to load FAISS index, docstore, + and index_to_docstore_id from disk with the index file name. Parameters ---------- @@ -351,13 +345,7 @@ def list_store_objects(self, **kwargs: dict[str, typing.Any]) -> list[str]: list[str] A list of collection names. """ - return self._client.list_collections(**kwargs) - - def _create_schema_field(self, field_conf: dict) -> "pymilvus.FieldSchema": - - field_schema = pymilvus.FieldSchema.construct_from_dict(field_conf) - - return field_schema + raise NotImplementedError("Drop operation is not supported in FAISS") def create(self, name: str, overwrite: bool = False, **kwargs: dict[str, typing.Any]): """ @@ -387,20 +375,19 @@ def create(self, name: str, overwrite: bool = False, **kwargs: dict[str, typing. documents = kwargs["documents"] return resource._index.from_documents(documents, self._embeddings) - elif "text_embeddings" in kwargs: + if "text_embeddings" in kwargs: text_embeddings = kwargs["text_embeddings"] metadatas = kwargs.get("metadatas") ids = kwargs.get("ids") return resource._index.from_embeddings(text_embeddings, self._embeddings, metadatas, ids) - elif "texts" in kwargs: + if "texts" in kwargs: texts = kwargs["texts"] metadatas = kwargs.get("metadatas") ids = kwargs.get("ids") return resource._index.from_texts(texts, self._embeddings, metadatas, ids) - else: - raise ValueError("You must provide documents, texts, or text_embeddings along with embeddings in kwargs.") + raise ValueError("You must provide documents, texts, or text_embeddings along with embeddings in kwargs.") def create_from_dataframe(self, name: str, @@ -422,28 +409,7 @@ def create_from_dataframe(self, Extra keyword arguments specific to the vector database implementation. """ - fields = self._build_schema_conf(df=df) - - create_kwargs = { - "schema_conf": { - "description": "Auto generated schema from DataFrame in Morpheus", - "schema_fields": fields, - } - } - - if (kwargs.get("index_field", None) is not None): - # Check to make sure the column name exists in the fields - create_kwargs["index_conf"] = { - "field_name": kwargs.get("index_field"), # Default index type - "metric_type": "L2", - "index_type": "HNSW", - "params": { - "M": 8, - "efConstruction": 64, - }, - } - - self.create(name=name, overwrite=overwrite, **create_kwargs) + raise NotImplementedError("Describe operation is not supported in FAISS") def insert(self, name: str, data: list[list] | list[dict], **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]: @@ -703,28 +669,7 @@ def drop(self, name: str, **kwargs: dict[str, typing.Any]) -> None: If mandatory arguments are missing or if the provided 'collection' value is invalid. """ - logger.debug("Dropping collection: %s, kwargs=%s", name, kwargs) - - if self.has_store_object(name): - resource = kwargs.get("resource", "collection") - if resource == "collection": - self._client.drop_collection(collection_name=name) - elif resource == "partition": - if "partition_name" not in kwargs: - raise ValueError("Mandatory argument 'partition_name' is required when resource='partition'") - partition_name = kwargs["partition_name"] - if self._client.has_partition(collection_name=name, partition_name=partition_name): - # Collection need to be released before dropping the partition. - self._client.release_collection(collection_name=name) - self._client.drop_partition(collection_name=name, partition_name=partition_name) - elif resource == "index": - if "field_name" in kwargs and "index_name" in kwargs: - self._client.drop_index(collection_name=name, - field_name=kwargs["field_name"], - index_name=kwargs["index_name"]) - else: - raise ValueError( - "Mandatory arguments 'field_name' and 'index_name' are required when resource='index'") + raise NotImplementedError("Describe operation is not supported in FAISS") def describe(self, name: str, **kwargs: dict[str, typing.Any]) -> dict: """ @@ -757,7 +702,7 @@ def release_resource(self, name: str) -> None: Name of the collection to release. """ - self._client.release_collection(collection_name=name) + raise NotImplementedError("Describe operation is not supported in FAISS") def close(self) -> None: """ @@ -766,4 +711,4 @@ def close(self) -> None: This method disconnects from the Milvus vector database by removing the connection. """ - self._client.close() + raise NotImplementedError("Describe operation is not supported in FAISS") diff --git a/tests/_utils/faiss.py b/tests/_utils/faiss.py new file mode 100644 index 0000000000..6e8d1d3bbf --- /dev/null +++ b/tests/_utils/faiss.py @@ -0,0 +1,28 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utilities for testing Morpheus with FAISS""" +from typing import List + + +class FakeEmbedder: + + def embed_documents(self, data: list) -> List[List[float]]: + return [[float(3.1)] * 1023 + [float(i)] for i in range(len(data))] + + def embed_query(self, data: str) -> List[float]: + return [float(1.0)] * 1023 + [float(0.0)] + + async def aembed_query(self, data: str) -> List[float]: + return [float(1.0)] * 1023 + [float(0.0)] diff --git a/tests/conftest.py b/tests/conftest.py index 2fe3f923e8..b4ac6ca95c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1011,22 +1011,22 @@ def milvus_server_uri(tmp_path_factory): yield uri +from _utils.faiss import FakeEmbedder + + @pytest.fixture(scope="session") def faiss_test_dir(): - # Get oath for FAISS directory + # Get path for FAISS directory tmp_dir_path = os.environ.get('FAISS_DIR') if tmp_dir_path is None: raise ValueError("set FAISS_DIR to directory with FAISS DB") - - # Can change embedding model - embeddings = NVIDIAEmbeddings(model="nvolveqa_40k") - tmp_dir = FAISS.load_local(tmp_dir_path, embeddings=embeddings, allow_dangerous_deserialization=True) - yield tmp_dir + yield tmp_dir_path @pytest.fixture(scope="session") -def test_embeddings(): - embeddings = NVIDIAEmbeddings(model="nvolveqa_40k") +def faiss_test_embeddings(): + #embeddings = NVIDIAEmbeddings(model="nvolveqa_40k") + embeddings = FakeEmbedder() yield embeddings @@ -1065,8 +1065,8 @@ def nvfoundationllm_fixture(fail_missing: bool): Fixture to ensure nvfoundationllm is installed """ skip_reason = ( - "Tests for NVFoundation require the langchain-nvidia-ai-endpoints package to be installed, to install this run:\n" - "`conda env update --solver=libmamba -n morpheus " + "Tests for NVFoundation require the langchain-nvidia-ai-endpoints package to be installed, to install this " + "run:\n `conda env update --solver=libmamba -n morpheus " "--file conda/environments/dev_cuda-121_arch-x86_64.yaml --prune`") yield import_or_skip("langchain_nvidia_ai_endpoints", reason=skip_reason, fail_missing=fail_missing) diff --git a/tests/llm/services/test_nvfoundation_llm_client.py b/tests/llm/services/test_nvfoundation_llm_client.py index 72218d2880..dc02c1836a 100644 --- a/tests/llm/services/test_nvfoundation_llm_client.py +++ b/tests/llm/services/test_nvfoundation_llm_client.py @@ -17,12 +17,10 @@ from unittest import mock import pytest -from langchain_core.messages import BaseMessage from langchain_core.messages import ChatMessage from langchain_core.outputs import ChatGeneration from langchain_core.outputs import LLMResult -from morpheus.llm.services.llm_service import LLMClient from morpheus.llm.services.nvfoundation_llm_service import NVFoundationLLMClient from morpheus.llm.services.nvfoundation_llm_service import NVFoundationLLMService @@ -30,7 +28,7 @@ @pytest.mark.usefixtures("restore_environ") @pytest.mark.parametrize("api_key", [None, "test_api_key"]) @pytest.mark.parametrize("set_env", [True, False]) -def test_constructor(mock_nvfoundationllm: mock.MagicMock, api_key: str, set_env: bool): +def test_constructor(api_key: str, set_env: bool): """ Test that the constructor prefers explicit arguments over environment variables. """ @@ -71,7 +69,7 @@ def test_get_input_names(): def test_generate(): with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.generate_prompt", autospec=True) as mock_nvfoundationllm: - def mock_generation_side_effect(*args, **kwargs): + def mock_generation_side_effect(*_, **kwargs): return LLMResult(generations=[[ ChatGeneration(message=ChatMessage(content=x.text, role="assistant")) for x in kwargs["prompts"] ]]) @@ -86,7 +84,7 @@ def test_generate_batch(): with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.generate_prompt", autospec=True) as mock_nvfoundationllm: - def mock_generation_side_effect(*args, **kwargs): + def mock_generation_side_effect(*_, **kwargs): return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))] for x in kwargs["prompts"]]) @@ -101,7 +99,7 @@ async def test_generate_async(): with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.agenerate_prompt", autospec=True) as mock_nvfoundationllm: - def mock_generation_side_effect(*args, **kwargs): + def mock_generation_side_effect(*_, **kwargs): return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))] for x in kwargs["prompts"]]) @@ -116,7 +114,7 @@ async def test_generate_batch_async(): with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.agenerate_prompt", autospec=True) as mock_nvfoundationllm: - def mock_generation_side_effect(*args, **kwargs): + def mock_generation_side_effect(*_, **kwargs): return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))] for x in kwargs["prompts"]]) @@ -130,7 +128,7 @@ def mock_generation_side_effect(*args, **kwargs): async def test_generate_batch_async_error(): with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.agenerate_prompt", autospec=True) as mock_nvfoundationllm: - def mock_generation_side_effect(*args, **kwargs): + def mock_generation_side_effect(*_, **kwargs): return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))] for x in kwargs["prompts"]]) diff --git a/tests/test_faiss_vector_db_service.py b/tests/test_faiss_vector_db_service.py index 9217d5f7f8..f140ecfec0 100644 --- a/tests/test_faiss_vector_db_service.py +++ b/tests/test_faiss_vector_db_service.py @@ -14,62 +14,72 @@ # See the License for the specific language governing permissions and # limitations under the License. -from pathlib import Path -from typing import Union - -import numpy as np import pytest from langchain_community.docstore.in_memory import InMemoryDocstore from langchain_community.vectorstores import FAISS from langchain_core.documents import Document from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings -import cudf - +from _utils.faiss import FakeEmbedder from morpheus.service.vdb.faiss_vdb_service import FaissVectorDBResourceService from morpheus.service.vdb.faiss_vdb_service import FaissVectorDBService # create FAISS docstore for testing texts = ["for", "the", "test"] -embeddings = NVIDIAEmbeddings(model="nvolveqa_40k") +embeddings = FakeEmbedder() ids = ["a", "b", "c"] create_store = FAISS.from_texts(texts, embeddings, ids=ids) -index_name = "index" -tmp_dir_path = "/workspace/.tmp/faiss_test_index" -create_store.save_local(tmp_dir_path, index_name) -faiss_service = FaissVectorDBService(local_dir=tmp_dir_path, embeddings=embeddings) +INDEX_NAME = "index" +TMP_DIR_PATH = "/workspace/.tmp/faiss_test_index" +create_store.save_local(TMP_DIR_PATH, INDEX_NAME) + + +def test_dir_path(): + import os + from _utils.faiss import FakeEmbedder -@pytest.fixture(scope="module", name="faiss_service") -def faiss_service_fixture(faiss_test_dir: str, test_embeddings: list): + tmp_dir_path = os.environ.get('FAISS_DIR') + if tmp_dir_path is None: + raise ValueError("set FAISS_DIR to directory with FAISS DB") + + # Can change embedding model + embeddings = FakeEmbedder() + tmp_dir = FAISS.load_local(tmp_dir_path, embeddings=embeddings, allow_dangerous_deserialization=True) + return tmp_dir + + +# scope = function +@pytest.fixture(scope="function", name="faiss_service") +def faiss_service_fixture(faiss_test_dir: str, faiss_test_embeddings: list): # Fixture for FAISS service; can edit FAISS docstore instantiated outside fixture if need to change # embedding model, et. - service = FaissVectorDBService(local_dir=faiss_test_dir, embeddings=test_embeddings) + service = FaissVectorDBService(local_dir=faiss_test_dir, embeddings=faiss_test_embeddings) yield service def test_load_resource(faiss_service: FaissVectorDBService): - resource = faiss_service.load_resource(name="index") + resource = faiss_service.load_resource() assert isinstance(resource, FaissVectorDBResourceService) assert resource._name == "index" def test_count(faiss_service: FaissVectorDBService): - collection = "index" - count = faiss_service.count(collection) + docstore = "index" + count = faiss_service.count(docstore) assert count == len(faiss_service._local_dir) -def test_insert(): - # Test for inserting embeddings (not docs, texts) into docsotre - vector = NVIDIAEmbeddings(model="nvolveqa_40k").embed_query("hi") +def test_insert(faiss_service: FaissVectorDBService): + # Test for inserting embeddings (not docs, texts) into docstore + vector = FakeEmbedder().embed_query(data="hi") test_data = list(iter([("hi", vector)])) docstore_name = "index" response = faiss_service.insert(name=docstore_name, data=test_data) assert response == {"status": "success"} -def test_delete(): +def test_delete(faiss_service: FaissVectorDBService): # specify name of docstore and ID to delete docstore_name = "index" delete_id = "a" @@ -87,17 +97,17 @@ async def test_similarity_search(): assert create_store.docstore.__dict__ == in_mem_docstore.__dict__ - query_vec = await embeddings.aembed_query(text="for") + query_vec = await embeddings.aembed_query("for") output = await create_store.asimilarity_search_by_vector(query_vec, k=1) assert output == [Document(page_content="for")] -def test_has_store_object(): +def test_has_store_object(faiss_service: FaissVectorDBService): # create FAISS docstore to test with object_store = FAISS.from_texts(texts, embeddings, ids=ids) object_name = "store_object_index" - object_store.save_local(tmp_dir_path, object_name) + object_store.save_local(TMP_DIR_PATH, object_name) # attempt to load docstore with given index name load_attempt = faiss_service.has_store_object(object_name) @@ -109,7 +119,7 @@ def test_has_store_object(): assert load_attempt is False -def test_create(): +def test_create(faiss_service: FaissVectorDBService): # Test creating docstore from embeddings vector = NVIDIAEmbeddings(model="nvolveqa_40k").embed_query("hi") test_embedding = list(iter([("hi", vector)])) @@ -118,7 +128,7 @@ def test_create(): # save created docstore index_name_embeddings = "embeddings_index" - embeddings_docstore.save_local(tmp_dir_path, index_name_embeddings) + embeddings_docstore.save_local(TMP_DIR_PATH, index_name_embeddings) # attempt to load created docstore load_attempt = faiss_service.has_store_object(index_name_embeddings) @@ -131,7 +141,7 @@ def test_create(): # save created docstore index_name_texts = "texts_index" - texts_docstore.save_local(tmp_dir_path, index_name_texts) + texts_docstore.save_local(TMP_DIR_PATH, index_name_texts) # attempt to load created docstore load_attempt = faiss_service.has_store_object(index_name_texts) @@ -144,7 +154,7 @@ def test_create(): # save created docstore index_name_docs = "docs_index" - docs_docstore.save_local(tmp_dir_path, index_name_docs) + docs_docstore.save_local(TMP_DIR_PATH, index_name_docs) # attempt to load created docstore load_attempt = faiss_service.has_store_object(index_name_docs) From 53df8a05558eafcdb486601b3f781b2b0b3ce44b Mon Sep 17 00:00:00 2001 From: Ana Caklovic Date: Fri, 24 May 2024 04:35:44 +0000 Subject: [PATCH 03/12] nvfoundation fixes --- dependencies.yaml | 3 ++ .../llm/services/nvfoundation_llm_service.py | 49 +++++++++++++++---- tests/_utils/faiss.py | 15 ++++-- tests/conftest.py | 7 +-- tests/test_faiss_vector_db_service.py | 19 +------ 5 files changed, 56 insertions(+), 37 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index 616c1db3de..c8918d45fe 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -275,6 +275,9 @@ dependencies: - databricks-connect - milvus==2.3.5 # update to match pymilvus when available - pymilvus==2.3.6 + - langchain-nvidia-ai-endpoints + - langchain-community + - faiss-gpu test_python_morpheus: common: diff --git a/morpheus/llm/services/nvfoundation_llm_service.py b/morpheus/llm/services/nvfoundation_llm_service.py index 63b8a10643..a4ddd99724 100644 --- a/morpheus/llm/services/nvfoundation_llm_service.py +++ b/morpheus/llm/services/nvfoundation_llm_service.py @@ -81,6 +81,10 @@ def __init__(self, parent: "NVFoundationLLMService", *, model_name: str, **model def get_input_names(self) -> list[str]: return [self._prompt_key] + @property + def model_kwargs(self): + return self._model_kwargs + def generate(self, **input_dict) -> str: """ Issue a request to generate a response based on a given prompt. @@ -111,23 +115,36 @@ async def generate_async(self, **input_dict) -> str: return (await self.generate_batch_async(inputs=inputs, **input_dict))[0] - def generate_batch(self, inputs: dict[str, list], **kwargs) -> list[str]: + def generate_batch(self, + inputs: dict[str, list], + return_exceptions: typing.Literal[True] = True, + **kwargs) -> list[str] | list[str | BaseException]: """ Issue a request to generate a list of responses based on a list of prompts. Parameters ---------- inputs : dict Inputs containing prompt data. + return_exceptions : bool + Whether to return exceptions in the output list or raise them immediately. **kwargs : dict Additional keyword arguments for generate batch. """ - prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]] + prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]] final_kwargs = {**self._model_kwargs, **kwargs} - responses = self._client.generate_prompt(prompts=prompts, **final_kwargs) # type: ignore + responses = [] + try: + generated_responses = self._client.generate_prompt(prompts=prompts, **final_kwargs) # type: ignore + responses = [g[0].text for g in generated_responses.generations] + except Exception as e: + if return_exceptions: + responses.append(e) + else: + raise e - return [g[0].text for g in responses.generations] + return responses @typing.overload async def generate_batch_async(self, @@ -143,7 +160,8 @@ async def generate_batch_async(self, async def generate_batch_async(self, inputs: dict[str, list], - return_exceptions=False) -> list[str] | list[str | BaseException]: + return_exceptions=False, + **kwargs) -> list[str] | list[str | BaseException]: """ Issue an asynchronous request to generate a list of responses based on a list of prompts. @@ -153,15 +171,28 @@ async def generate_batch_async(self, Inputs containing prompt data. return_exceptions : bool Whether to return exceptions in the output list or raise them immediately. + **kwargs: dict + Additional keyword arguments for generate batch async. """ - prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]] + # prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]] - final_kwargs = {**self._model_kwargs, **kwargs} + # final_kwargs = {**self._model_kwargs, **kwargs} + + # responses = await self._client.agenerate_prompt(prompts=prompts, **final_kwargs) # type: ignore - responses = await self._client.agenerate_prompt(prompts=prompts, **final_kwargs) # type: ignore + # return [g[0].text for g in responses.generations] + + prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]] + final_kwargs = {**self._model_kwargs, **kwargs} - return [g[0].text for g in responses.generations] + try: + responses = await self._client.agenerate_prompt(prompts=prompts, **final_kwargs) # type: ignore + return [g[0].text for g in responses.generations] + except Exception as e: + if return_exceptions: + return [e] + raise e class NVFoundationLLMService(LLMService): diff --git a/tests/_utils/faiss.py b/tests/_utils/faiss.py index 6e8d1d3bbf..d8060477dc 100644 --- a/tests/_utils/faiss.py +++ b/tests/_utils/faiss.py @@ -18,11 +18,18 @@ class FakeEmbedder: + def embed_query(self, data: str) -> List[float]: + # setting data to arbitrary float since constant value will always be returned + data = 0.0 + return [float(1.0)] * 1023 + [float(0.0) * data] + def embed_documents(self, data: list) -> List[List[float]]: return [[float(3.1)] * 1023 + [float(i)] for i in range(len(data))] - def embed_query(self, data: str) -> List[float]: - return [float(1.0)] * 1023 + [float(0.0)] - async def aembed_query(self, data: str) -> List[float]: - return [float(1.0)] * 1023 + [float(0.0)] + # setting data to arbitrary float since constant value will always be returned + data = 0.0 + return [float(1.0)] * 1023 + [float(0.0) * data] + + async def aembed_documents(self, data: list) -> List[List[float]]: + return [[float(3.1)] * 1023 + [float(i)] for i in range(len(data))] diff --git a/tests/conftest.py b/tests/conftest.py index b4ac6ca95c..4de60687df 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -30,10 +30,9 @@ import pytest import requests -from langchain_community.vectorstores import FAISS # added -from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings # added from _utils import import_or_skip +from _utils.faiss import FakeEmbedder from _utils.kafka import _init_pytest_kafka from _utils.kafka import kafka_bootstrap_servers_fixture # noqa: F401 pylint:disable=unused-import from _utils.kafka import kafka_consumer_fixture # noqa: F401 pylint:disable=unused-import @@ -1011,9 +1010,6 @@ def milvus_server_uri(tmp_path_factory): yield uri -from _utils.faiss import FakeEmbedder - - @pytest.fixture(scope="session") def faiss_test_dir(): # Get path for FAISS directory @@ -1025,7 +1021,6 @@ def faiss_test_dir(): @pytest.fixture(scope="session") def faiss_test_embeddings(): - #embeddings = NVIDIAEmbeddings(model="nvolveqa_40k") embeddings = FakeEmbedder() yield embeddings diff --git a/tests/test_faiss_vector_db_service.py b/tests/test_faiss_vector_db_service.py index f140ecfec0..4f229cc1de 100644 --- a/tests/test_faiss_vector_db_service.py +++ b/tests/test_faiss_vector_db_service.py @@ -18,7 +18,6 @@ from langchain_community.docstore.in_memory import InMemoryDocstore from langchain_community.vectorstores import FAISS from langchain_core.documents import Document -from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings from _utils.faiss import FakeEmbedder from morpheus.service.vdb.faiss_vdb_service import FaissVectorDBResourceService @@ -34,22 +33,6 @@ create_store.save_local(TMP_DIR_PATH, INDEX_NAME) -def test_dir_path(): - import os - - from _utils.faiss import FakeEmbedder - - tmp_dir_path = os.environ.get('FAISS_DIR') - if tmp_dir_path is None: - raise ValueError("set FAISS_DIR to directory with FAISS DB") - - # Can change embedding model - embeddings = FakeEmbedder() - tmp_dir = FAISS.load_local(tmp_dir_path, embeddings=embeddings, allow_dangerous_deserialization=True) - return tmp_dir - - -# scope = function @pytest.fixture(scope="function", name="faiss_service") def faiss_service_fixture(faiss_test_dir: str, faiss_test_embeddings: list): # Fixture for FAISS service; can edit FAISS docstore instantiated outside fixture if need to change @@ -121,7 +104,7 @@ def test_has_store_object(faiss_service: FaissVectorDBService): def test_create(faiss_service: FaissVectorDBService): # Test creating docstore from embeddings - vector = NVIDIAEmbeddings(model="nvolveqa_40k").embed_query("hi") + vector = FakeEmbedder().embed_query(data="hi") test_embedding = list(iter([("hi", vector)])) docstore_name = "index" embeddings_docstore = faiss_service.create(name=docstore_name, text_embeddings=test_embedding) From b7ef6546cc54c6cea0e8f53de2bc2cb509658910 Mon Sep 17 00:00:00 2001 From: Ana Caklovic Date: Fri, 24 May 2024 16:13:49 +0000 Subject: [PATCH 04/12] final fixes --- .../src/stages/inference_client_stage.cpp | 1 + .../llm/services/nvfoundation_llm_service.py | 22 ++++++++----------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/morpheus/_lib/src/stages/inference_client_stage.cpp b/morpheus/_lib/src/stages/inference_client_stage.cpp index 069ccd557e..1366d5691c 100644 --- a/morpheus/_lib/src/stages/inference_client_stage.cpp +++ b/morpheus/_lib/src/stages/inference_client_stage.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include diff --git a/morpheus/llm/services/nvfoundation_llm_service.py b/morpheus/llm/services/nvfoundation_llm_service.py index a4ddd99724..d151961c19 100644 --- a/morpheus/llm/services/nvfoundation_llm_service.py +++ b/morpheus/llm/services/nvfoundation_llm_service.py @@ -160,7 +160,7 @@ async def generate_batch_async(self, async def generate_batch_async(self, inputs: dict[str, list], - return_exceptions=False, + return_exceptions: typing.Literal[True] = True, **kwargs) -> list[str] | list[str | BaseException]: """ Issue an asynchronous request to generate a list of responses based on a list of prompts. @@ -175,24 +175,20 @@ async def generate_batch_async(self, Additional keyword arguments for generate batch async. """ - # prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]] - - # final_kwargs = {**self._model_kwargs, **kwargs} - - # responses = await self._client.agenerate_prompt(prompts=prompts, **final_kwargs) # type: ignore - - # return [g[0].text for g in responses.generations] - prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]] final_kwargs = {**self._model_kwargs, **kwargs} + responses = [] try: - responses = await self._client.agenerate_prompt(prompts=prompts, **final_kwargs) # type: ignore - return [g[0].text for g in responses.generations] + generated_responses = await self._client.agenerate_prompt(prompts=prompts, **final_kwargs) # type: ignore + responses = [g[0].text for g in generated_responses.generations] except Exception as e: if return_exceptions: - return [e] - raise e + responses.append(e) + else: + raise e + + return responses class NVFoundationLLMService(LLMService): From 889de2b04f03322fb630f3289d3a8249f5eb1696 Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Fri, 24 May 2024 14:04:23 -0400 Subject: [PATCH 05/12] Making the API key work the same as base URL --- morpheus/llm/services/nvfoundation_llm_service.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/morpheus/llm/services/nvfoundation_llm_service.py b/morpheus/llm/services/nvfoundation_llm_service.py index b38d468ed7..0915975aba 100644 --- a/morpheus/llm/services/nvfoundation_llm_service.py +++ b/morpheus/llm/services/nvfoundation_llm_service.py @@ -214,15 +214,14 @@ def __init__(self, *, api_key: str = None, base_url: str = None, **model_kwargs) super().__init__() - # Set the base url from the environment if not provided. Default to None to allow the client to set the url. if base_url is None: - self._base_url = os.getenv('NVIDIA_API_BASE', "https://api.nvcf.nvidia.com/v2/nvcf") + self._base_url = os.getenv('NVIDIA_API_BASE', None) else: self._base_url = base_url - if "NVIDIA_API_KEY" in os.environ: - self._api_key = os.getenv('NVIDIA_API_KEY') + if api_key is None: + self._api_key = os.getenv('NVIDIA_API_KEY', None) else: self._api_key = api_key From 5d634a8629c53713815add21a0498753f6a9166f Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Thu, 6 Jun 2024 11:12:19 -0700 Subject: [PATCH 06/12] Updating all LLM services to be the same --- morpheus/llm/services/nemo_llm_service.py | 48 ++++---- .../llm/services/nvfoundation_llm_service.py | 48 ++++---- morpheus/llm/services/openai_chat_service.py | 110 +++++++++--------- tests/test_faiss_vector_db_service.py | 2 +- 4 files changed, 107 insertions(+), 101 deletions(-) diff --git a/morpheus/llm/services/nemo_llm_service.py b/morpheus/llm/services/nemo_llm_service.py index d744948159..364b3928bc 100644 --- a/morpheus/llm/services/nemo_llm_service.py +++ b/morpheus/llm/services/nemo_llm_service.py @@ -188,6 +188,22 @@ async def generate_batch_async(self, class NeMoLLMService(LLMService): """ A service for interacting with NeMo LLM models, this class should be used to create a client for a specific model. + + Parameters + ---------- + api_key : str, optional + The API key for the LLM service, by default None. If `None` the API key will be read from the `NGC_API_KEY` + environment variable. If neither are present an error will be raised., by default None + org_id : str, optional + The organization ID for the LLM service, by default None. If `None` the organization ID will be read from + the `NGC_ORG_ID` environment variable. This value is only required if the account associated with the + `api_key` is a member of multiple NGC organizations, by default None + base_url : str, optional + The api host url, by default None. If `None` the url will be read from the `NGC_BASE_URL` environment + variable. If neither are present the NeMo default will be used, by default None + retry_count : int, optional + The number of times to retry a request before raising an exception, by default 5 + """ class APIKey(EnvConfigValue): @@ -198,7 +214,7 @@ class OrgId(EnvConfigValue): _ENV_KEY: str = "NGC_ORG_ID" _ALLOW_NONE: bool = True - class BaseURI(EnvConfigValue): + class BaseURL(EnvConfigValue): _ENV_KEY: str = "NGC_API_BASE" _ALLOW_NONE: bool = True @@ -206,26 +222,11 @@ def __init__(self, *, api_key: APIKey | str = None, org_id: OrgId | str = None, - base_uri: BaseURI | str = None, + base_url: BaseURL | str = None, retry_count=5) -> None: """ Creates a service for interacting with NeMo LLM models. - Parameters - ---------- - api_key : str, optional - The API key for the LLM service, by default None. If `None` the API key will be read from the `NGC_API_KEY` - environment variable. If neither are present an error will be raised., by default None - org_id : str, optional - The organization ID for the LLM service, by default None. If `None` the organization ID will be read from - the `NGC_ORG_ID` environment variable. This value is only required if the account associated with the - `api_key` is a member of multiple NGC organizations., by default None - base_uri : str, optional - The base URI for the LLM service, by default None. If `None` the base URI will be read from - the `NGC_API_BASE` environment variable. This value is only required if the account associated with the - `api_key` is a member of multiple NGC organizations., by default None - retry_count : int, optional - The number of times to retry a request before raising an exception, by default 5 """ @@ -240,22 +241,25 @@ def __init__(self, if not isinstance(org_id, NeMoLLMService.OrgId): org_id = NeMoLLMService.OrgId(org_id) - if not isinstance(base_uri, NeMoLLMService.BaseURI): - base_uri = NeMoLLMService.BaseURI(base_uri) + if not isinstance(base_url, NeMoLLMService.BaseURL): + base_url = NeMoLLMService.BaseURL(base_url) + self._api_key = api_key + self._org_id = org_id + self._base_url = base_url self._retry_count = retry_count self._conn = nemollm.NemoLLM( - api_host=base_uri.value, + api_host=self._base_url.value, # The client must configure the authentication and authorization parameters # in accordance with the API server security policy. # Configure Bearer authorization - api_key=api_key.value, + api_key=self._api_key.value, # If you are in more than one LLM-enabled organization, you must # specify your org ID in the form of a header. This is optional # if you are only in one LLM-enabled org. - org_id=org_id.value, + org_id=self._org_id.value, ) def get_client(self, *, model_name: str, **model_kwargs) -> NeMoLLMClient: diff --git a/morpheus/llm/services/nvfoundation_llm_service.py b/morpheus/llm/services/nvfoundation_llm_service.py index 0915975aba..9a30daf3f8 100644 --- a/morpheus/llm/services/nvfoundation_llm_service.py +++ b/morpheus/llm/services/nvfoundation_llm_service.py @@ -13,11 +13,11 @@ # limitations under the License. import logging -import os import typing from morpheus.llm.services.llm_service import LLMClient from morpheus.llm.services.llm_service import LLMService +from morpheus.utils.env_config_value import EnvConfigValue logger = logging.getLogger(__name__) @@ -64,8 +64,8 @@ def __init__(self, parent: "NVFoundationLLMService", *, model_name: str, **model chat_kwargs = { "model": model_name, - "api_key": self._parent._api_key, - "base_url": self._parent._base_url, + "api_key": self._parent._api_key.value, + "base_url": self._parent._base_url.value, } # Remove None values set by the environment in the kwargs @@ -160,7 +160,7 @@ async def generate_batch_async(self, async def generate_batch_async(self, inputs: dict[str, list], - return_exceptions: typing.Literal[True] = True, + return_exceptions=True, **kwargs) -> list[str] | list[str | BaseException]: """ Issue an asynchronous request to generate a list of responses based on a list of prompts. @@ -194,37 +194,39 @@ async def generate_batch_async(self, class NVFoundationLLMService(LLMService): """ A service for interacting with NeMo LLM models, this class should be used to create a client for a specific model. + Parameters ---------- api_key : str, optional - The API key for the LLM service, by default None. If `None` the API key will be read from the `NGC_API_KEY` - environment variable. If neither are present an error will be raised. - org_id : str, optional - The organization ID for the LLM service, by default None. If `None` the organization ID will be read from the - `NGC_ORG_ID` environment variable. This value is only required if the account associated with the `api_key` is - a member of multiple NGC organizations. + The API key for the LLM service, by default None. If `None` the API key will be read from the `NVIDIA_API_KEY` + environment variable. If neither are present an error will be raised, by default None base_url : str, optional - The api host url, by default None. If `None` the url will be read from the `NVAI_BASE_URL` environment - variable. If neither are present `https://api.nvcf.nvidia.com/v2/nvcf` will be used by langchain. + The api host url, by default None. If `None` the url will be read from the `NVIDIA_API_BASE` environment + variable. If neither are present the NeMo default will be used, by default None """ - def __init__(self, *, api_key: str = None, base_url: str = None, **model_kwargs) -> None: + class APIKey(EnvConfigValue): + _ENV_KEY: str = "NVIDIA_API_KEY" + _ALLOW_NONE: bool = True + + class BaseURL(EnvConfigValue): + _ENV_KEY: str = "NVIDIA_API_BASE" + _ALLOW_NONE: bool = True + + def __init__(self, *, api_key: APIKey | str = None, base_url: BaseURL | str = None, **model_kwargs) -> None: if IMPORT_EXCEPTION is not None: raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION super().__init__() - # Set the base url from the environment if not provided. Default to None to allow the client to set the url. - if base_url is None: - self._base_url = os.getenv('NVIDIA_API_BASE', None) - else: - self._base_url = base_url + if not isinstance(api_key, NVFoundationLLMService.APIKey): + api_key = NVFoundationLLMService.APIKey(api_key) - if api_key is None: - self._api_key = os.getenv('NVIDIA_API_KEY', None) - else: - self._api_key = api_key + if not isinstance(base_url, NVFoundationLLMService.BaseURL): + base_url = NVFoundationLLMService.BaseURL(base_url) + self._api_key = api_key + self._base_url = base_url self._default_model_kwargs = model_kwargs def _merge_model_kwargs(self, model_kwargs: dict) -> dict: @@ -232,7 +234,7 @@ def _merge_model_kwargs(self, model_kwargs: dict) -> dict: @property def api_key(self): - return self._api_key + return self._api_key.value def get_client(self, *, model_name: str, **model_kwargs) -> NVFoundationLLMClient: """ diff --git a/morpheus/llm/services/openai_chat_service.py b/morpheus/llm/services/openai_chat_service.py index 76d45441f0..8fe1919a90 100644 --- a/morpheus/llm/services/openai_chat_service.py +++ b/morpheus/llm/services/openai_chat_service.py @@ -68,20 +68,6 @@ def set_output(self, output: typing.Any) -> None: self.outputs = output -class OpenAIOrgId(EnvConfigValue): - _ENV_KEY: str = "OPENAI_ORG_ID" - _ALLOW_NONE: bool = True - - -class OpenAIAPIKey(EnvConfigValue): - _ENV_KEY: str = "OPENAI_API_KEY" - - -class OpenAIBaseURL(EnvConfigValue): - _ENV_KEY: str = "OPENAI_BASE_URL" - _ALLOW_NONE: bool = True - - class OpenAIChatClient(LLMClient): """ Client for interacting with a specific OpenAI chat model. This class should be constructed with the @@ -89,6 +75,9 @@ class OpenAIChatClient(LLMClient): Parameters ---------- + parent : OpenAIChatService + The parent service for this client. + model_name : str The name of the model to interact with. @@ -115,9 +104,6 @@ def __init__(self, model_name: str, set_assistant: bool = False, max_retries: int = 10, - org_id: str | OpenAIOrgId = None, - api_key: str | OpenAIAPIKey = None, - base_url: str | OpenAIBaseURL = None, json=False, **model_kwargs) -> None: if IMPORT_EXCEPTION is not None: @@ -125,15 +111,6 @@ def __init__(self, super().__init__() - if not isinstance(org_id, OpenAIOrgId): - org_id = OpenAIOrgId(org_id) - - if not isinstance(api_key, OpenAIOrgId): - api_key = OpenAIOrgId(api_key) - - if not isinstance(base_url, OpenAIBaseURL): - base_url = OpenAIBaseURL(base_url) - assert parent is not None, "Parent service cannot be None." self._parent = parent @@ -152,13 +129,13 @@ def __init__(self, # Create the client objects for both sync and async self._client = openai.OpenAI(max_retries=max_retries, - organization=org_id.value, - api_key=api_key.value, - base_url=base_url.value) + organization=self._parent._org_id.value, + api_key=self._parent._api_key.value, + base_url=self._parent._base_url.value) self._client_async = openai.AsyncOpenAI(max_retries=max_retries, - organization=org_id.value, - api_key=api_key.value, - base_url=base_url.value) + organization=self._parent._org_id.value, + api_key=self._parent._api_key.value, + base_url=self._parent._base_url.value) def get_input_names(self) -> list[str]: input_names = [self._prompt_key] @@ -358,39 +335,62 @@ async def generate_batch_async(self, class OpenAIChatService(LLMService): """ A service for interacting with OpenAI Chat models, this class should be used to create clients. + + Parameters + ---------- + api_key : str, optional + The API key for the LLM service, by default None. If `None` the API key will be read from the + `OPENAI_API_KEY` environment variable. If neither are present an error will be raised. + org_id : str, optional + The organization ID for the LLM service, by default None. If `None` the organization ID will be read from + the `OPENAI_ORG_ID` environment variable. This value is only required if the account associated with the + `api_key` is a member of multiple organizations, by default None + base_url : str, optional + The api host url, by default None. If `None` the url will be read from the `OPENAI_BASE_URL` environment + variable. If neither are present the OpenAI default will be used, by default None + default_model_kwargs : dict, optional + Default arguments to use when creating a client via the `get_client` function. Any argument specified here + will automatically be used when calling `get_client`. Arguments specified in the `get_client` function will + overwrite default values specified here. This is useful to set model arguments before creating multiple + clients. By default None + """ - def __init__(self, *, api_key: str = None, base_url: str = None, default_model_kwargs: dict = None) -> None: - """ - Creates a service for interacting with OpenAI Chat models, this class should be used to create clients. + class APIKey(EnvConfigValue): + _ENV_KEY: str = "OPENAI_API_KEY" + + class OrgId(EnvConfigValue): + _ENV_KEY: str = "OPENAI_ORG_ID" + _ALLOW_NONE: bool = True + + class BaseURL(EnvConfigValue): + _ENV_KEY: str = "OPENAI_BASE_URL" + _ALLOW_NONE: bool = True + + def __init__(self, + *, + api_key: APIKey | str = None, + org_id: OrgId | str = None, + base_url: BaseURL | str = None, + default_model_kwargs: dict = None) -> None: - Parameters - ---------- - api_key : str, optional - The API key for the LLM service, by default None. If `None` the API key will be read from the - `OPENAI_API_KEY` environment variable. If neither are present an error will be raised. - base_url : str, optional - The api host url, by default None. If `None` the url will be read from the `OPENAI_BASE_URL` environment - variable. If neither are present the OpenAI default will be used., by default None - default_model_kwargs : dict, optional - Default arguments to use when creating a client via the `get_client` function. Any argument specified here - will automatically be used when calling `get_client`. Arguments specified in the `get_client` function will - overwrite default values specified here. This is useful to set model arguments before creating multiple - clients. By default None - - Raises - ------ - ImportError - If the `openai` library is not found in the python environment. - """ if IMPORT_EXCEPTION is not None: raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION super().__init__() + if not isinstance(api_key, OpenAIChatService.APIKey): + api_key = OpenAIChatService.APIKey(api_key) + + if not isinstance(org_id, OpenAIChatService.OrgId): + org_id = OpenAIChatService.OrgId(org_id) + + if not isinstance(base_url, OpenAIChatService.BaseURL): + base_url = OpenAIChatService.BaseURL(base_url) + self._api_key = api_key + self._org_id = org_id self._base_url = base_url - self._default_model_kwargs = default_model_kwargs or {} self._logger = logging.getLogger(f"{__package__}.{OpenAIChatService.__name__}") diff --git a/tests/test_faiss_vector_db_service.py b/tests/test_faiss_vector_db_service.py index 4f229cc1de..fe0f898eb7 100644 --- a/tests/test_faiss_vector_db_service.py +++ b/tests/test_faiss_vector_db_service.py @@ -16,7 +16,7 @@ import pytest from langchain_community.docstore.in_memory import InMemoryDocstore -from langchain_community.vectorstores import FAISS +from langchain_community.vectorstores.faiss import FAISS from langchain_core.documents import Document from _utils.faiss import FakeEmbedder From ceeafb10474f7ee167f60521882405ff3d7a1723 Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Tue, 11 Jun 2024 09:08:41 -0700 Subject: [PATCH 07/12] Style cleanup --- conda/environments/all_cuda-121_arch-x86_64.yaml | 3 +++ conda/environments/dev_cuda-121_arch-x86_64.yaml | 3 +++ conda/environments/examples_cuda-121_arch-x86_64.yaml | 3 +++ conda/environments/runtime_cuda-121_arch-x86_64.yaml | 3 +++ dependencies.yaml | 6 +++--- morpheus/llm/nodes/langchain_agent_node.py | 9 +++++++-- morpheus/messages/message_base.py | 3 +-- 7 files changed, 23 insertions(+), 7 deletions(-) diff --git a/conda/environments/all_cuda-121_arch-x86_64.yaml b/conda/environments/all_cuda-121_arch-x86_64.yaml index 3b310995fb..d99bafc3f5 100644 --- a/conda/environments/all_cuda-121_arch-x86_64.yaml +++ b/conda/environments/all_cuda-121_arch-x86_64.yaml @@ -122,7 +122,10 @@ dependencies: - databricks-connect - dgl==2.0.0 - dglgo + - faiss-gpu==1.7 - google-search-results==2.4 + - langchain-community + - langchain-nvidia-ai-endpoints==0.0.11 - langchain==0.1.9 - milvus==2.3.5 - nemollm diff --git a/conda/environments/dev_cuda-121_arch-x86_64.yaml b/conda/environments/dev_cuda-121_arch-x86_64.yaml index 23ff2c707e..075de86f16 100644 --- a/conda/environments/dev_cuda-121_arch-x86_64.yaml +++ b/conda/environments/dev_cuda-121_arch-x86_64.yaml @@ -98,6 +98,9 @@ dependencies: - PyMuPDF==1.23.21 - databricks-cli < 0.100 - databricks-connect + - faiss-gpu==1.7 + - langchain-community + - langchain-nvidia-ai-endpoints==0.0.11 - milvus==2.3.5 - pymilvus==2.3.6 - pytest-kafka==0.6.0 diff --git a/conda/environments/examples_cuda-121_arch-x86_64.yaml b/conda/environments/examples_cuda-121_arch-x86_64.yaml index 11d5e535ce..5247c10145 100644 --- a/conda/environments/examples_cuda-121_arch-x86_64.yaml +++ b/conda/environments/examples_cuda-121_arch-x86_64.yaml @@ -66,7 +66,10 @@ dependencies: - databricks-connect - dgl==2.0.0 - dglgo + - faiss-gpu==1.7 - google-search-results==2.4 + - langchain-community + - langchain-nvidia-ai-endpoints==0.0.11 - langchain==0.1.9 - milvus==2.3.5 - nemollm diff --git a/conda/environments/runtime_cuda-121_arch-x86_64.yaml b/conda/environments/runtime_cuda-121_arch-x86_64.yaml index 80f6f995d2..1bd5996236 100644 --- a/conda/environments/runtime_cuda-121_arch-x86_64.yaml +++ b/conda/environments/runtime_cuda-121_arch-x86_64.yaml @@ -40,6 +40,9 @@ dependencies: - pip: - databricks-cli < 0.100 - databricks-connect + - faiss-gpu==1.7 + - langchain-community + - langchain-nvidia-ai-endpoints==0.0.11 - milvus==2.3.5 - pymilvus==2.3.6 name: runtime_cuda-121_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index 55b917ffcb..b94b521e77 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -274,11 +274,11 @@ dependencies: - pip: - databricks-cli < 0.100 - databricks-connect + - faiss-gpu==1.7 + - langchain-community + - langchain-nvidia-ai-endpoints==0.0.11 - milvus==2.3.5 # update to match pymilvus when available - pymilvus==2.3.6 - - langchain-nvidia-ai-endpoints - - langchain-community - - faiss-gpu test_python_morpheus: common: diff --git a/morpheus/llm/nodes/langchain_agent_node.py b/morpheus/llm/nodes/langchain_agent_node.py index 8ab772c07e..f3b76208b1 100644 --- a/morpheus/llm/nodes/langchain_agent_node.py +++ b/morpheus/llm/nodes/langchain_agent_node.py @@ -111,8 +111,13 @@ async def execute(self, context: LLMContext) -> LLMContext: # pylint: disable=i # If the agent encounters a parsing error or a server error after retries, replace the error # with a default value to prevent the pipeline from crashing results[i][j] = self._replace_exceptions_value - logger.warning(f"Exception encountered in result[{i}][{j}]: {answer}. " - f"Replacing with default message: \"{self._replace_exceptions_value}\".") + logger.warning( + "Exception encountered in result[%d][%d]: %s. " + "Replacing with default message: '%s'.", + i, + j, + answer, + self._replace_exceptions_value) context.set_output(results) diff --git a/morpheus/messages/message_base.py b/morpheus/messages/message_base.py index 3e8a19385f..8edcc751de 100644 --- a/morpheus/messages/message_base.py +++ b/morpheus/messages/message_base.py @@ -21,7 +21,6 @@ from morpheus import messages from morpheus.config import CppConfig -from morpheus.messages import ControlMessage from morpheus.utils import logger as morpheus_logger @@ -51,7 +50,7 @@ def _internal_new(other_cls, *args, **kwargs): # Instantiating MultiMessage and its subclasses from Python or C++ will generate a deprecation warning if issubtype(other_cls, messages.MultiMessage): - morpheus_logger.deprecated_message_warning(other_cls, ControlMessage) + morpheus_logger.deprecated_message_warning(other_cls, messages.ControlMessage) # If _cpp_class is set, and use_cpp is enabled, create the C++ instance if (getattr(other_cls, "_cpp_class", None) is not None and CppConfig.get_should_use_cpp()): From 0500fbf842d340ef6c527926f989f43b47f5877f Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Tue, 11 Jun 2024 09:10:44 -0700 Subject: [PATCH 08/12] Removing separate dependency group for all of the LLM examples. --- .../all_cuda-121_arch-x86_64.yaml | 13 ++- .../dev_cuda-121_arch-x86_64.yaml | 5 -- .../examples_cuda-121_arch-x86_64.yaml | 13 ++- .../runtime_cuda-121_arch-x86_64.yaml | 5 -- dependencies.yaml | 80 +++++-------------- .../utils/langchain_llm_client_wrapper.py | 17 +--- 6 files changed, 36 insertions(+), 97 deletions(-) diff --git a/conda/environments/all_cuda-121_arch-x86_64.yaml b/conda/environments/all_cuda-121_arch-x86_64.yaml index d99bafc3f5..267efb662f 100644 --- a/conda/environments/all_cuda-121_arch-x86_64.yaml +++ b/conda/environments/all_cuda-121_arch-x86_64.yaml @@ -13,7 +13,7 @@ dependencies: - appdirs - arxiv=1.4 - automake -- beautifulsoup4 +- beautifulsoup4=4.12 - benchmark=1.8.3 - boost-cpp=1.84 - boto3 @@ -69,7 +69,7 @@ dependencies: - numexpr - numpydoc=1.5 - nvtabular=23.08.00 -- onnx +- onnx=1.15 - openai=1.13 - papermill=2.4.0 - pip @@ -95,11 +95,11 @@ dependencies: - rdma-core>=48 - requests - requests-cache=1.1 -- requests-toolbelt +- requests-toolbelt=1.0 - s3fs=2023.12.2 - scikit-build=0.17.6 - scikit-learn=1.3.2 -- sentence-transformers +- sentence-transformers=2.7 - sphinx - sphinx_rtd_theme - sqlalchemy @@ -124,11 +124,10 @@ dependencies: - dglgo - faiss-gpu==1.7 - google-search-results==2.4 - - langchain-community - langchain-nvidia-ai-endpoints==0.0.11 - - langchain==0.1.9 + - langchain==0.1.16 - milvus==2.3.5 - - nemollm + - nemollm==0.3.5 - pymilvus==2.3.6 - pytest-kafka==0.6.0 name: all_cuda-121_arch-x86_64 diff --git a/conda/environments/dev_cuda-121_arch-x86_64.yaml b/conda/environments/dev_cuda-121_arch-x86_64.yaml index 075de86f16..55ada60795 100644 --- a/conda/environments/dev_cuda-121_arch-x86_64.yaml +++ b/conda/environments/dev_cuda-121_arch-x86_64.yaml @@ -11,7 +11,6 @@ channels: dependencies: - appdirs - automake -- beautifulsoup4 - benchmark=1.8.3 - boost-cpp=1.84 - breathe=4.35.0 @@ -78,7 +77,6 @@ dependencies: - rdma-core>=48 - requests - requests-cache=1.1 -- requests-toolbelt - scikit-build=0.17.6 - scikit-learn=1.3.2 - sphinx @@ -98,9 +96,6 @@ dependencies: - PyMuPDF==1.23.21 - databricks-cli < 0.100 - databricks-connect - - faiss-gpu==1.7 - - langchain-community - - langchain-nvidia-ai-endpoints==0.0.11 - milvus==2.3.5 - pymilvus==2.3.6 - pytest-kafka==0.6.0 diff --git a/conda/environments/examples_cuda-121_arch-x86_64.yaml b/conda/environments/examples_cuda-121_arch-x86_64.yaml index 5247c10145..a56c41c20b 100644 --- a/conda/environments/examples_cuda-121_arch-x86_64.yaml +++ b/conda/environments/examples_cuda-121_arch-x86_64.yaml @@ -12,7 +12,7 @@ dependencies: - anyio>=3.7 - appdirs - arxiv=1.4 -- beautifulsoup4 +- beautifulsoup4=4.12 - boto3 - click >=8 - cuml=24.02.* @@ -35,7 +35,7 @@ dependencies: - numexpr - numpydoc=1.5 - nvtabular=23.08.00 -- onnx +- onnx=1.15 - openai=1.13 - papermill=2.4.0 - pip @@ -48,10 +48,10 @@ dependencies: - pytorch=*=*cuda* - requests - requests-cache=1.1 -- requests-toolbelt +- requests-toolbelt=1.0 - s3fs=2023.12.2 - scikit-learn=1.3.2 -- sentence-transformers +- sentence-transformers=2.7 - sqlalchemy - tqdm=4 - transformers=4.36.2 @@ -68,10 +68,9 @@ dependencies: - dglgo - faiss-gpu==1.7 - google-search-results==2.4 - - langchain-community - langchain-nvidia-ai-endpoints==0.0.11 - - langchain==0.1.9 + - langchain==0.1.16 - milvus==2.3.5 - - nemollm + - nemollm==0.3.5 - pymilvus==2.3.6 name: examples_cuda-121_arch-x86_64 diff --git a/conda/environments/runtime_cuda-121_arch-x86_64.yaml b/conda/environments/runtime_cuda-121_arch-x86_64.yaml index 1bd5996236..e6b76b43aa 100644 --- a/conda/environments/runtime_cuda-121_arch-x86_64.yaml +++ b/conda/environments/runtime_cuda-121_arch-x86_64.yaml @@ -10,7 +10,6 @@ channels: - pytorch dependencies: - appdirs -- beautifulsoup4 - click >=8 - datacompy=0.10 - dill=0.3.7 @@ -30,7 +29,6 @@ dependencies: - pytorch=*=*cuda* - requests - requests-cache=1.1 -- requests-toolbelt - scikit-learn=1.3.2 - sqlalchemy - tqdm=4 @@ -40,9 +38,6 @@ dependencies: - pip: - databricks-cli < 0.100 - databricks-connect - - faiss-gpu==1.7 - - langchain-community - - langchain-nvidia-ai-endpoints==0.0.11 - milvus==2.3.5 - pymilvus==2.3.6 name: runtime_cuda-121_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index b94b521e77..9011d0b974 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -32,10 +32,7 @@ files: - docs - example-dfp-prod - example-gnn - - example-llm-agents - - example-llm-completion - - example-llm-rag - - example-llm-vdb-upload + - example-llms - python - runtime - test_python_morpheus @@ -86,10 +83,7 @@ files: - development - example-dfp-prod - example-gnn - - example-llm-agents - - example-llm-completion - - example-llm-rag - - example-llm-vdb-upload + - example-llms - python - runtime - test_python_morpheus @@ -107,10 +101,7 @@ files: - docs - example-dfp-prod - example-gnn - - example-llm-agents - - example-llm-completion - - example-llm-rag - - example-llm-vdb-upload + - example-llms - python - runtime @@ -132,10 +123,7 @@ files: - cve-mitigation - example-dfp-prod - example-gnn - - example-llm-agents - - example-llm-completion - - example-llm-rag - - example-llm-vdb-upload + - example-llms - python - runtime @@ -249,7 +237,6 @@ dependencies: - &dill dill=0.3.7 - &scikit-learn scikit-learn=1.3.2 - appdirs - - beautifulsoup4 - datacompy=0.10 - elasticsearch==8.9.0 - feedparser=6.0.10 @@ -264,7 +251,6 @@ dependencies: - pytorch=*=*cuda* - requests - requests-cache=1.1 - - requests-toolbelt # Transitive dep needed by nemollm, specified here to ensure we get a compatible version - sqlalchemy - tqdm=4 - typing_utils=0.1 @@ -274,9 +260,6 @@ dependencies: - pip: - databricks-cli < 0.100 - databricks-connect - - faiss-gpu==1.7 - - langchain-community - - langchain-nvidia-ai-endpoints==0.0.11 - milvus==2.3.5 # update to match pymilvus when available - pymilvus==2.3.6 @@ -321,55 +304,32 @@ dependencies: - dgl==2.0.0 - dglgo - example-llm-agents: + example-llms: common: - output_types: [conda] packages: - - &grpcio-status grpcio-status==1.59 - &transformers transformers=4.36.2 # newer versions are incompatible with our pinned version of huggingface_hub - - huggingface_hub=0.20.2 # work-around for https://github.com/UKPLab/sentence-transformers/issues/1762 - - numexpr - - sentence-transformers - - pip - - pip: - - &langchain langchain==0.1.9 - - nemollm - - example-llm-completion: - common: - - output_types: [conda] - packages: - - *grpcio-status - - &arxiv arxiv=1.4 - - &newspaper3k newspaper3k=0.2 - - &pypdf pypdf=3.17.4 - - example-llm-rag: - common: - - output_types: [conda] - packages: - - *grpcio-status - anyio>=3.7 + - arxiv=1.4 + - beautifulsoup4=4.12 + - grpcio-status==1.59 + - huggingface_hub=0.20.2 # work-around for https://github.com/UKPLab/sentence-transformers/issues/1762 - jsonpatch>=1.33 + - newspaper3k=0.2 + - numexpr + - onnx=1.15 - openai=1.13 + - pypdf=3.17.4 + - requests-toolbelt=1.0 # Transitive dep needed by nemollm, specified here to ensure we get a compatible version + - sentence-transformers=2.7 - pip - pip: - - *langchain + - faiss-gpu==1.7 - google-search-results==2.4 - - example-llm-vdb-upload: - common: - - output_types: [conda] - packages: - - *arxiv - - *grpcio-status - - *newspaper3k - - *pypdf - - onnx - - pip - - pip: - - PyMuPDF==1.23.21 - - *langchain + - langchain-nvidia-ai-endpoints==0.0.11 + - langchain==0.1.16 + - nemollm==0.3.5 + - PyMuPDF==1.23.21 model-training-tuning: common: diff --git a/morpheus/llm/services/utils/langchain_llm_client_wrapper.py b/morpheus/llm/services/utils/langchain_llm_client_wrapper.py index 1215ab35b8..80a30925ac 100644 --- a/morpheus/llm/services/utils/langchain_llm_client_wrapper.py +++ b/morpheus/llm/services/utils/langchain_llm_client_wrapper.py @@ -14,20 +14,11 @@ import typing -from morpheus.llm.services.llm_service import LLMClient +from langchain_core.callbacks import AsyncCallbackManagerForLLMRun +from langchain_core.callbacks import CallbackManagerForLLMRun +from langchain_core.language_models.llms import LLM -IMPORT_EXCEPTION = None -IMPORT_ERROR_MESSAGE = ("LangchainLLMClientWrapper require the langchain package to be installed. " - "Install it by running the following command:\n" - "`conda env update --solver=libmamba -n morpheus " - "--file morpheus/conda/environments/examples_cuda-121_arch-x86_64.yaml --prune`") - -try: - from langchain_core.callbacks import AsyncCallbackManagerForLLMRun - from langchain_core.callbacks import CallbackManagerForLLMRun - from langchain_core.language_models.llms import LLM -except ImportError as import_exc: - IMPORT_EXCEPTION = import_exc +from morpheus.llm.services.llm_service import LLMClient class LangchainLLMClientWrapper(LLM): From fa40b2e764e909f528ed88961fc8619a6172b06e Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Tue, 11 Jun 2024 16:12:12 -0700 Subject: [PATCH 09/12] Final cleanup --- morpheus.code-workspace | 4 +- .../llm/services/nvfoundation_llm_service.py | 12 +- morpheus/service/vdb/faiss_vdb_service.py | 315 +++++++----------- tests/_utils/faiss.py | 35 -- tests/conftest.py | 16 - tests/test_faiss_vector_db_service.py | 176 +++++----- 6 files changed, 214 insertions(+), 344 deletions(-) delete mode 100644 tests/_utils/faiss.py diff --git a/morpheus.code-workspace b/morpheus.code-workspace index 5154095574..f81d904f8e 100644 --- a/morpheus.code-workspace +++ b/morpheus.code-workspace @@ -27,7 +27,6 @@ "launch": { "compounds": [], "configurations": [ - { "args": [ "--log_level=DEBUG", @@ -732,7 +731,6 @@ ], "yapf.args": [ "--style=${workspaceFolder}/setup.cfg" - ], - "python.analysis.inlayHints.pytestParameters": true + ] } } diff --git a/morpheus/llm/services/nvfoundation_llm_service.py b/morpheus/llm/services/nvfoundation_llm_service.py index 9a30daf3f8..5154158538 100644 --- a/morpheus/llm/services/nvfoundation_llm_service.py +++ b/morpheus/llm/services/nvfoundation_llm_service.py @@ -41,7 +41,7 @@ class NVFoundationLLMClient(LLMClient): `NeMoLLMService.get_client` method. Parameters ---------- - parent : NVFoundationMService + parent : NVFoundationMService The parent service for this client. model_name : str The name of the model to interact with. @@ -127,8 +127,8 @@ def generate_batch(self, Inputs containing prompt data. return_exceptions : bool Whether to return exceptions in the output list or raise them immediately. - **kwargs : dict - Additional keyword arguments for generate batch. + **kwargs + Additional keyword arguments for generate batch. """ prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]] @@ -171,8 +171,8 @@ async def generate_batch_async(self, Inputs containing prompt data. return_exceptions : bool Whether to return exceptions in the output list or raise them immediately. - **kwargs: dict - Additional keyword arguments for generate batch async. + **kwargs + Additional keyword arguments for generate batch async. """ prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]] @@ -202,7 +202,7 @@ class NVFoundationLLMService(LLMService): environment variable. If neither are present an error will be raised, by default None base_url : str, optional The api host url, by default None. If `None` the url will be read from the `NVIDIA_API_BASE` environment - variable. If neither are present the NeMo default will be used, by default None + variable. If neither are present the NVIDIA default will be used, by default None """ class APIKey(EnvConfigValue): diff --git a/morpheus/service/vdb/faiss_vdb_service.py b/morpheus/service/vdb/faiss_vdb_service.py index a23d254819..7b7d3362bd 100644 --- a/morpheus/service/vdb/faiss_vdb_service.py +++ b/morpheus/service/vdb/faiss_vdb_service.py @@ -27,9 +27,10 @@ logger = logging.getLogger(__name__) IMPORT_EXCEPTION = None -IMPORT_ERROR_MESSAGE = "FaissDBResourceService requires the FAISS." +IMPORT_ERROR_MESSAGE = "FaissDBResourceService requires the FAISS library to be installed." try: + from langchain.embeddings.base import Embeddings from langchain.vectorstores.faiss import FAISS except ImportError as import_exc: IMPORT_EXCEPTION = import_exc @@ -37,14 +38,14 @@ class FaissVectorDBResourceService(VectorDBResourceService): """ - Represents a service for managing resources in a Milvus Vector Database. + Represents a service for managing resources in a FAISS Vector Database. Parameters ---------- + parent : FaissVectorDBService + The parent service for this resource. name : str - Name of the resource. - client : MilvusClient - An instance of the MilvusClient for interaction with the Milvus Vector Database. + The name of the resource. """ def __init__(self, parent: "FaissVectorDBService", *, name: str) -> None: @@ -54,14 +55,15 @@ def __init__(self, parent: "FaissVectorDBService", *, name: str) -> None: super().__init__() self._parent = parent - self._name = name + self._folder_path = self._parent._local_dir + self._index_name = name self._index = FAISS.load_local(folder_path=self._parent._local_dir, embeddings=self._parent._embeddings, - index_name=self._name, + index_name=self._index_name, allow_dangerous_deserialization=True) - def insert(self, data: list[list] | list[dict], **kwargs: dict[str, typing.Any]) -> dict: + def insert(self, data: list[list] | list[dict], **kwargs) -> dict: """ Insert data into the vector database. @@ -69,7 +71,7 @@ def insert(self, data: list[list] | list[dict], **kwargs: dict[str, typing.Any]) ---------- data : list[list] | list[dict] Data to be inserted into the collection. - **kwargs : dict[str, typing.Any] + **kwargs Extra keyword arguments specific to the vector database implementation. Returns @@ -77,10 +79,9 @@ def insert(self, data: list[list] | list[dict], **kwargs: dict[str, typing.Any]) dict Returns response content as a dictionary. """ - self._index.add_embeddings(data) - return {"status": "success"} + raise NotImplementedError("Insert operation is not supported in FAISS") - def insert_dataframe(self, df: typing.Union[cudf.DataFrame, pd.DataFrame], **kwargs: dict[str, typing.Any]) -> dict: + def insert_dataframe(self, df: typing.Union[cudf.DataFrame, pd.DataFrame], **kwargs) -> dict: """ Insert a dataframe entires into the vector database. @@ -88,7 +89,7 @@ def insert_dataframe(self, df: typing.Union[cudf.DataFrame, pd.DataFrame], **kwa ---------- df : typing.Union[cudf.DataFrame, pd.DataFrame] Dataframe to be inserted into the collection. - **kwargs : dict[str, typing.Any] + **kwargs Extra keyword arguments specific to the vector database implementation. Returns @@ -98,13 +99,13 @@ def insert_dataframe(self, df: typing.Union[cudf.DataFrame, pd.DataFrame], **kwa """ raise NotImplementedError("Insert operation is not supported in FAISS") - def describe(self, **kwargs: dict[str, typing.Any]) -> dict: + def describe(self, **kwargs) -> dict: """ Provides a description of the collection. Parameters ---------- - **kwargs : dict[str, typing.Any] + **kwargs Extra keyword arguments specific to the vector database implementation. Returns @@ -112,42 +113,32 @@ def describe(self, **kwargs: dict[str, typing.Any]) -> dict: dict Returns response content as a dictionary. """ - raise NotImplementedError("Describe operation is not supported in FAISS") + return { + "index_name": self._index_name, + "folder_path": self._folder_path, + } - def query(self, query: str, **kwargs: dict[str, typing.Any]) -> typing.Any: + def query(self, query: str, **kwargs) -> typing.Any: """ - Query data in a collection in the Milvus vector database. - - This method performs a search operation in the specified collection/partition in the Milvus vector database. + Query data in a collection in the vector database. Parameters ---------- query : str, optional The search query, which can be a filter expression, by default None. - **kwargs : dict + **kwargs Additional keyword arguments for the search operation. Returns ------- typing.Any The search result, which can vary depending on the query and options. - - Raises - ------ - RuntimeError - If an error occurs during the search operation. - If query argument is `None` and `data` keyword argument doesn't exist. - If `data` keyword arguement is `None`. """ raise NotImplementedError("Query operation is not supported in FAISS") - async def similarity_search(self, - embeddings: list[list[float]], - k: int = 4, - **kwargs: dict[str, typing.Any]) -> list[list[dict]]: + async def similarity_search(self, embeddings: list[list[float]], k: int = 4, **kwargs) -> list[list[dict]]: """ - Perform a similarity search within the FAISS docstore (asimilarity_search_by_vector - returns docs most similar to embedding vector asynchronously). + Perform a similarity search within the FAISS docstore. Parameters ---------- @@ -155,7 +146,7 @@ async def similarity_search(self, Embeddings for which to perform the similarity search. k : int, optional The number of nearest neighbors to return, by default 4. - **kwargs : dict[str, typing.Any] + **kwargs Extra keyword arguments specific to the vector database implementation. Returns @@ -171,7 +162,7 @@ async def single_search(single_embedding): return list(await asyncio.gather(*[single_search(embedding) for embedding in embeddings])) - def update(self, data: list[typing.Any], **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]: + def update(self, data: list[typing.Any], **kwargs) -> dict[str, typing.Any]: """ Update data in the collection. @@ -179,7 +170,7 @@ def update(self, data: list[typing.Any], **kwargs: dict[str, typing.Any]) -> dic ---------- data : list[typing.Any] Data to be updated in the collection. - **kwargs : dict[str, typing.Any] + **kwargs Extra keyword arguments specific to upsert operation. Returns @@ -189,7 +180,7 @@ def update(self, data: list[typing.Any], **kwargs: dict[str, typing.Any]) -> dic """ raise NotImplementedError("Update operation is not supported in FAISS") - def delete_by_keys(self, keys: int | str | list, **kwargs: dict[str, typing.Any]) -> typing.Any: + def delete_by_keys(self, keys: int | str | list, **kwargs) -> typing.Any: """ Delete vectors by keys from the collection. @@ -197,7 +188,7 @@ def delete_by_keys(self, keys: int | str | list, **kwargs: dict[str, typing.Any] ---------- keys : int | str | list Primary keys to delete vectors. - **kwargs : dict[str, typing.Any] + **kwargs Extra keyword arguments specific to the vector database implementation. Returns @@ -207,7 +198,7 @@ def delete_by_keys(self, keys: int | str | list, **kwargs: dict[str, typing.Any] """ raise NotImplementedError("Delete by keys operation is not supported in FAISS") - def delete(self, expr: str, **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]: + def delete(self, expr: str, **kwargs) -> dict[str, typing.Any]: """ Delete vectors by giving a list of IDs. @@ -215,7 +206,7 @@ def delete(self, expr: str, **kwargs: dict[str, typing.Any]) -> dict[str, typing ---------- expr : str Delete expression. - **kwargs : dict[str, typing.Any] + **kwargs Extra keyword arguments specific to the vector database implementation. Returns @@ -223,10 +214,9 @@ def delete(self, expr: str, **kwargs: dict[str, typing.Any]) -> dict[str, typing dict[str, typing.Any] Returns result of the given keys that are deleted from the collection. """ - self._index.delete(expr) - return {"status": "success"} + raise NotImplementedError("delete operation is not supported in FAISS") - def retrieve_by_keys(self, keys: int | str | list, **kwargs: dict[str, typing.Any]) -> list[typing.Any]: + def retrieve_by_keys(self, keys: int | str | list, **kwargs) -> list[typing.Any]: """ Retrieve the inserted vectors using their primary keys. @@ -235,7 +225,7 @@ def retrieve_by_keys(self, keys: int | str | list, **kwargs: dict[str, typing.An keys : int | str | list Primary keys to get vectors for. Depending on pk_field type it can be int or str or a list of either. - **kwargs : dict[str, typing.Any] + **kwargs Additional keyword arguments for the retrieval operation. Returns @@ -245,13 +235,13 @@ def retrieve_by_keys(self, keys: int | str | list, **kwargs: dict[str, typing.An """ raise NotImplementedError("Retrieve by keys operation is not supported in FAISS") - def count(self, **kwargs: dict[str, typing.Any]) -> int: + def count(self, **kwargs) -> int: """ Returns number of rows/entities. Parameters ---------- - **kwargs : dict[str, typing.Any] + **kwargs Additional keyword arguments for the count operation. Returns @@ -259,19 +249,17 @@ def count(self, **kwargs: dict[str, typing.Any]) -> int: int Returns number of entities in the collection. """ - docstore = self._parent._local_dir - count = len(docstore) - return count + return self._index.index.ntotal - def drop(self, **kwargs: dict[str, typing.Any]) -> None: + def drop(self, **kwargs) -> None: """ - Drop a collection, index, or partition in the Milvus vector database. + Drops the resource from the vector database service. This function allows you to drop a collection. Parameters ---------- - **kwargs : dict + **kwargs Additional keyword arguments for specifying the type and partition name (if applicable). """ raise NotImplementedError("Drop operation is not supported in FAISS") @@ -279,26 +267,22 @@ def drop(self, **kwargs: dict[str, typing.Any]) -> None: class FaissVectorDBService(VectorDBService): """ - Service class for Milvus Vector Database implementation. This class provides functions for interacting - with a Milvus vector database. + Service class for FAISS Vector Database implementation. This class provides functions for interacting + with a FAISS vector database. Parameters ---------- - host : str - The hostname or IP address of the Milvus server. - port : str - The port number for connecting to the Milvus server. - alias : str, optional - Alias for the Milvus connection, by default "default". - **kwargs : dict - Additional keyword arguments specific to the Milvus connection configuration. + local_dir : str + The local directory where the FAISS index files are stored. + embeddings : Embeddings + The embeddings object to use for embedding text. """ _collection_locks = {} _cleanup_interval = 600 # 10mins _last_cleanup_time = time.time() - def __init__(self, local_dir: str, embeddings): + def __init__(self, local_dir: str, embeddings: "Embeddings"): if IMPORT_EXCEPTION is not None: raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION @@ -306,7 +290,26 @@ def __init__(self, local_dir: str, embeddings): self._local_dir = local_dir self._embeddings = embeddings - def load_resource(self, name: str = "index", **kwargs: dict[str, typing.Any]) -> FaissVectorDBResourceService: + @property + def embeddings(self): + return self._embeddings + + def load_resource(self, name: str = "index", **kwargs) -> FaissVectorDBResourceService: + """ + Loads a VDB resource into memory for use. + + Parameters + ---------- + name : str, optional + The VDB resource to load. For FAISS, this corresponds to the index name, by default "index" + **kwargs + Additional keyword arguments specific to the resource service. + + Returns + ------- + FaissVectorDBResourceService + The loaded resource service. + """ return FaissVectorDBResourceService(self, name=name, **kwargs) @@ -331,27 +334,23 @@ def has_store_object(self, name: str) -> bool: index_name=name, allow_dangerous_deserialization=True) return True - except Exception as e: - print(f"Failed to load FAISS with the given index file name: {e}") - # Return False if given index file name cannot be loaded - return False + except Exception: + return False - def list_store_objects(self, **kwargs: dict[str, typing.Any]) -> list[str]: + def list_store_objects(self, **kwargs) -> list[str]: """ - List the names of all collections in the Milvus vector database. + List the names of all resources in the vector database. Returns ------- list[str] A list of collection names. """ - raise NotImplementedError("Drop operation is not supported in FAISS") + raise NotImplementedError("list_store_objects operation is not supported in FAISS") - def create(self, name: str, overwrite: bool = False, **kwargs: dict[str, typing.Any]): + def create(self, name: str, overwrite: bool = False, **kwargs): """ - Create a collection in the Milvus vector database with the specified name and configuration. This method - creates a new collection in the Milvus vector database with the provided name and configuration options. - If the collection already exists, it can be overwritten if the `overwrite` parameter is set to True. + Create a collection. Parameters ---------- @@ -359,7 +358,7 @@ def create(self, name: str, overwrite: bool = False, **kwargs: dict[str, typing. Name of the collection to be created. overwrite : bool, optional If True, the collection will be overwritten if it already exists, by default False. - **kwargs : dict + **kwargs Additional keyword arguments containing collection configuration. Raises @@ -367,33 +366,13 @@ def create(self, name: str, overwrite: bool = False, **kwargs: dict[str, typing. ValueError If the provided schema fields configuration is empty. """ - # can create with: from_embeddings, from_texts, or from_documents - - resource = self.load_resource(name) - - if "documents" in kwargs: - documents = kwargs["documents"] - return resource._index.from_documents(documents, self._embeddings) - - if "text_embeddings" in kwargs: - text_embeddings = kwargs["text_embeddings"] - metadatas = kwargs.get("metadatas") - ids = kwargs.get("ids") - return resource._index.from_embeddings(text_embeddings, self._embeddings, metadatas, ids) - - if "texts" in kwargs: - texts = kwargs["texts"] - metadatas = kwargs.get("metadatas") - ids = kwargs.get("ids") - return resource._index.from_texts(texts, self._embeddings, metadatas, ids) - - raise ValueError("You must provide documents, texts, or text_embeddings along with embeddings in kwargs.") + raise NotImplementedError("create operation is not supported in FAISS") def create_from_dataframe(self, name: str, df: typing.Union[cudf.DataFrame, pd.DataFrame], overwrite: bool = False, - **kwargs: dict[str, typing.Any]) -> None: + **kwargs) -> None: """ Create collections in the vector database. @@ -405,16 +384,15 @@ def create_from_dataframe(self, The dataframe to create the collection from. overwrite : bool, optional Whether to overwrite the collection if it already exists. Default is False. - **kwargs : dict[str, typing.Any] + **kwargs Extra keyword arguments specific to the vector database implementation. """ - raise NotImplementedError("Describe operation is not supported in FAISS") + raise NotImplementedError("create_from_dataframe operation is not supported in FAISS") - def insert(self, name: str, data: list[list] | list[dict], **kwargs: dict[str, - typing.Any]) -> dict[str, typing.Any]: + def insert(self, name: str, data: list[list] | list[dict], **kwargs) -> dict[str, typing.Any]: """ - Insert a collection specific data in the Milvus vector database. + Insert a collection specific data in the vector database. Parameters ---------- @@ -422,7 +400,7 @@ def insert(self, name: str, data: list[list] | list[dict], **kwargs: dict[str, Name of the collection to be inserted. data : list[list] | list[dict] Data to be inserted in the collection. - **kwargs : dict[str, typing.Any] + **kwargs Additional keyword arguments containing collection configuration. Returns @@ -436,16 +414,12 @@ def insert(self, name: str, data: list[list] | list[dict], **kwargs: dict[str, If the collection not exists exists. """ - resource = self.load_resource(name) - - return resource.insert(data, **kwargs) + raise NotImplementedError("create_from_dataframe operation is not supported in FAISS") - def insert_dataframe(self, - name: str, - df: typing.Union[cudf.DataFrame, pd.DataFrame], - **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]: + def insert_dataframe(self, name: str, df: typing.Union[cudf.DataFrame, pd.DataFrame], + **kwargs) -> dict[str, typing.Any]: """ - Converts dataframe to rows and insert to a collection in the Milvus vector database. + Converts dataframe to rows and insert to the vector database. Parameters ---------- @@ -453,7 +427,7 @@ def insert_dataframe(self, Name of the collection to be inserted. df : typing.Union[cudf.DataFrame, pd.DataFrame] Dataframe to be inserted in the collection. - **kwargs : dict[str, typing.Any] + **kwargs Additional keyword arguments containing collection configuration. Returns @@ -466,15 +440,11 @@ def insert_dataframe(self, RuntimeError If the collection not exists exists. """ - resource = self.load_resource(name) - - return resource.insert_dataframe(df=df, **kwargs) + raise NotImplementedError("insert_dataframe operation is not supported in FAISS") - def query(self, name: str, query: str = None, **kwargs: dict[str, typing.Any]) -> typing.Any: + def query(self, name: str, query: str = None, **kwargs) -> typing.Any: """ - Query data in a collection in the Milvus vector database. - - This method performs a search operation in the specified collection/partition in the Milvus vector database. + Query data in a vector database. Parameters ---------- @@ -482,7 +452,7 @@ def query(self, name: str, query: str = None, **kwargs: dict[str, typing.Any]) - Name of the collection to search within. query : str The search query, which can be a filter expression. - **kwargs : dict + **kwargs Additional keyword arguments for the search operation. Returns @@ -491,11 +461,9 @@ def query(self, name: str, query: str = None, **kwargs: dict[str, typing.Any]) - The search result, which can vary depending on the query and options. """ - resource = self.load_resource(name) - - return resource.query(query, **kwargs) + raise NotImplementedError("query operation is not supported in FAISS") - async def similarity_search(self, name: str, **kwargs: dict[str, typing.Any]) -> list[dict]: + async def similarity_search(self, name: str, **kwargs) -> list[dict]: """ Perform a similarity search within the collection. @@ -503,7 +471,7 @@ async def similarity_search(self, name: str, **kwargs: dict[str, typing.Any]) -> ---------- name : str Name of the collection. - **kwargs : dict[str, typing.Any] + **kwargs Extra keyword arguments specific to the vector database implementation. Returns @@ -512,11 +480,9 @@ async def similarity_search(self, name: str, **kwargs: dict[str, typing.Any]) -> Returns a list of dictionaries representing the results of the similarity search. """ - resource = self.load_resource(name) + raise NotImplementedError("similarity_search operation is not supported in FAISS") - return resource.similarity_search(**kwargs) - - def update(self, name: str, data: list[typing.Any], **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]: + def update(self, name: str, data: list[typing.Any], **kwargs) -> dict[str, typing.Any]: """ Update data in the vector database. @@ -526,7 +492,7 @@ def update(self, name: str, data: list[typing.Any], **kwargs: dict[str, typing.A Name of the collection. data : list[typing.Any] Data to be updated in the collection. - **kwargs : dict[str, typing.Any] + **kwargs Extra keyword arguments specific to upsert operation. Returns @@ -535,14 +501,9 @@ def update(self, name: str, data: list[typing.Any], **kwargs: dict[str, typing.A Returns result of the updated operation stats. """ - if not isinstance(data, list): - raise RuntimeError("Data is not of type list.") - - resource = self.load_resource(name) - - return resource.update(data=data, **kwargs) + raise NotImplementedError("update operation is not supported in FAISS") - def delete_by_keys(self, name: str, keys: int | str | list, **kwargs: dict[str, typing.Any]) -> typing.Any: + def delete_by_keys(self, name: str, keys: int | str | list, **kwargs) -> typing.Any: """ Delete vectors by keys from the collection. @@ -552,7 +513,7 @@ def delete_by_keys(self, name: str, keys: int | str | list, **kwargs: dict[str, Name of the collection. keys : int | str | list Primary keys to delete vectors. - **kwargs : dict[str, typing.Any] + **kwargs Extra keyword arguments specific to the vector database implementation. Returns @@ -561,11 +522,9 @@ def delete_by_keys(self, name: str, keys: int | str | list, **kwargs: dict[str, Returns result of the given keys that are delete from the collection. """ - resource = self.load_resource(name) + raise NotImplementedError("delete_by_keys operation is not supported in FAISS") - return resource.delete_by_keys(keys=keys, **kwargs) - - def delete(self, name: str, expr: str, **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]: + def delete(self, name: str, expr: str, **kwargs) -> dict[str, typing.Any]: """ Delete vectors from the collection using expressions. @@ -575,7 +534,7 @@ def delete(self, name: str, expr: str, **kwargs: dict[str, typing.Any]) -> dict[ Name of the collection. expr : str Delete expression. - **kwargs : dict[str, typing.Any] + **kwargs Extra keyword arguments specific to the vector database implementation. Returns @@ -584,12 +543,9 @@ def delete(self, name: str, expr: str, **kwargs: dict[str, typing.Any]) -> dict[ Returns result of the given keys that are delete from the collection. """ - resource = self.load_resource(name) - result = resource.delete(expr=expr, **kwargs) - - return result + raise NotImplementedError("delete operation is not supported in FAISS") - def retrieve_by_keys(self, name: str, keys: int | str | list, **kwargs: dict[str, typing.Any]) -> list[typing.Any]: + def retrieve_by_keys(self, name: str, keys: int | str | list, **kwargs) -> list[typing.Any]: """ Retrieve the inserted vectors using their primary keys from the Collection. @@ -600,7 +556,7 @@ def retrieve_by_keys(self, name: str, keys: int | str | list, **kwargs: dict[str keys : int | str | list Primary keys to get vectors for. Depending on pk_field type it can be int or str or a list of either. - **kwargs : dict[str, typing.Any] + **kwargs Additional keyword arguments for the retrieval operation. Returns @@ -609,13 +565,9 @@ def retrieve_by_keys(self, name: str, keys: int | str | list, **kwargs: dict[str Returns result rows of the given keys from the collection. """ - resource = self.load_resource(name) + raise NotImplementedError("retrieve_by_keys operation is not supported in FAISS") - result = resource.retrieve_by_keys(keys=keys, **kwargs) - - return result - - def count(self, name: str, **kwargs: dict[str, typing.Any]) -> int: + def count(self, name: str, **kwargs) -> int: """ Returns number of rows/entities in the given collection. @@ -623,7 +575,7 @@ def count(self, name: str, **kwargs: dict[str, typing.Any]) -> int: ---------- name : str Name of the collection. - **kwargs : dict[str, typing.Any] + **kwargs Additional keyword arguments for the count operation. Returns @@ -631,47 +583,29 @@ def count(self, name: str, **kwargs: dict[str, typing.Any]) -> int: int Returns number of entities in the collection. """ - resource = self.load_resource(name) - return resource.count(**kwargs) + raise NotImplementedError("count operation is not supported in FAISS") - def drop(self, name: str, **kwargs: dict[str, typing.Any]) -> None: + def drop(self, name: str, **kwargs) -> None: """ - Drop a collection, index, or partition in the Milvus vector database. - - This method allows you to drop a collection, an index within a collection, - or a specific partition within a collection in the Milvus vector database. + Drop a collection. Parameters ---------- name : str Name of the collection, index, or partition to be dropped. - **kwargs : dict + **kwargs Additional keyword arguments for specifying the type and partition name (if applicable). - Notes on Expected Keyword Arguments: - ------------------------------------ - - 'collection' (str, optional): - Specifies the type of collection to drop. Possible values: 'collection' (default), 'index', 'partition'. - - - 'partition_name' (str, optional): - Required when dropping a specific partition within a collection. Specifies the partition name to be dropped. - - - 'field_name' (str, optional): - Required when dropping an index within a collection. Specifies the field name for which the index is created. - - - 'index_name' (str, optional): - Required when dropping an index within a collection. Specifies the name of the index to be dropped. - Raises ------ ValueError If mandatory arguments are missing or if the provided 'collection' value is invalid. """ - raise NotImplementedError("Describe operation is not supported in FAISS") + raise NotImplementedError("drop operation is not supported in FAISS") - def describe(self, name: str, **kwargs: dict[str, typing.Any]) -> dict: + def describe(self, name: str, **kwargs) -> dict: """ Describe the collection in the vector database. @@ -679,8 +613,8 @@ def describe(self, name: str, **kwargs: dict[str, typing.Any]) -> dict: ---------- name : str Name of the collection. - **kwargs : dict[str, typing.Any] - Additional keyword arguments specific to the Milvus vector database. + **kwargs + Additional keyword arguments specific to the vector database. Returns ------- @@ -688,9 +622,7 @@ def describe(self, name: str, **kwargs: dict[str, typing.Any]) -> dict: Returns collection information. """ - resource = self.load_resource(name) - - return resource.describe(**kwargs) + raise NotImplementedError("describe operation is not supported in FAISS") def release_resource(self, name: str) -> None: """ @@ -702,13 +634,10 @@ def release_resource(self, name: str) -> None: Name of the collection to release. """ - raise NotImplementedError("Describe operation is not supported in FAISS") + raise NotImplementedError("release_resource operation is not supported in FAISS") def close(self) -> None: """ - Close the connection to the Milvus vector database. - - This method disconnects from the Milvus vector database by removing the connection. - + Close the vector database service and release all resources. """ - raise NotImplementedError("Describe operation is not supported in FAISS") + raise NotImplementedError("close operation is not supported in FAISS") diff --git a/tests/_utils/faiss.py b/tests/_utils/faiss.py deleted file mode 100644 index d8060477dc..0000000000 --- a/tests/_utils/faiss.py +++ /dev/null @@ -1,35 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Utilities for testing Morpheus with FAISS""" -from typing import List - - -class FakeEmbedder: - - def embed_query(self, data: str) -> List[float]: - # setting data to arbitrary float since constant value will always be returned - data = 0.0 - return [float(1.0)] * 1023 + [float(0.0) * data] - - def embed_documents(self, data: list) -> List[List[float]]: - return [[float(3.1)] * 1023 + [float(i)] for i in range(len(data))] - - async def aembed_query(self, data: str) -> List[float]: - # setting data to arbitrary float since constant value will always be returned - data = 0.0 - return [float(1.0)] * 1023 + [float(0.0) * data] - - async def aembed_documents(self, data: list) -> List[List[float]]: - return [[float(3.1)] * 1023 + [float(i)] for i in range(len(data))] diff --git a/tests/conftest.py b/tests/conftest.py index 075011589b..ee5181d3bc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -32,7 +32,6 @@ import requests from _utils import import_or_skip -from _utils.faiss import FakeEmbedder from _utils.kafka import _init_pytest_kafka from _utils.kafka import kafka_bootstrap_servers_fixture # noqa: F401 pylint:disable=unused-import from _utils.kafka import kafka_consumer_fixture # noqa: F401 pylint:disable=unused-import @@ -1021,21 +1020,6 @@ def milvus_server_uri(tmp_path_factory): yield uri -@pytest.fixture(scope="session") -def faiss_test_dir(): - # Get path for FAISS directory - tmp_dir_path = os.environ.get('FAISS_DIR') - if tmp_dir_path is None: - raise ValueError("set FAISS_DIR to directory with FAISS DB") - yield tmp_dir_path - - -@pytest.fixture(scope="session") -def faiss_test_embeddings(): - embeddings = FakeEmbedder() - yield embeddings - - @pytest.fixture(scope="session", name="milvus_data") def milvus_data_fixture(): inital_data = [{"id": i, "embedding": [i / 10.0] * 3, "age": 25 + i} for i in range(10)] diff --git a/tests/test_faiss_vector_db_service.py b/tests/test_faiss_vector_db_service.py index fe0f898eb7..98a428bbe3 100644 --- a/tests/test_faiss_vector_db_service.py +++ b/tests/test_faiss_vector_db_service.py @@ -14,132 +14,126 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import typing +from pathlib import Path + import pytest -from langchain_community.docstore.in_memory import InMemoryDocstore -from langchain_community.vectorstores.faiss import FAISS -from langchain_core.documents import Document -from _utils.faiss import FakeEmbedder from morpheus.service.vdb.faiss_vdb_service import FaissVectorDBResourceService from morpheus.service.vdb.faiss_vdb_service import FaissVectorDBService -# create FAISS docstore for testing -texts = ["for", "the", "test"] -embeddings = FakeEmbedder() -ids = ["a", "b", "c"] -create_store = FAISS.from_texts(texts, embeddings, ids=ids) -INDEX_NAME = "index" -TMP_DIR_PATH = "/workspace/.tmp/faiss_test_index" -create_store.save_local(TMP_DIR_PATH, INDEX_NAME) +if (typing.TYPE_CHECKING): + from langchain_core.embeddings import Embeddings +else: + lc_core_embeddings = pytest.importorskip("langchain_core.embeddings", reason="langchain_core not installed") + Embeddings = lc_core_embeddings.Embeddings + + +class FakeEmbedder(Embeddings): + + def embed_query(self, text: str) -> list[float]: + # One-hot encoding using length of text + vec = [float(0.0)] * 1024 + + vec[len(text) % 1024] = 1.0 + + return vec + + def embed_documents(self, texts: list[str]) -> list[list[float]]: + return [self.embed_query(text) for text in texts] + + async def aembed_query(self, text: str) -> list[float]: + return self.embed_query(text) + + async def aembed_documents(self, texts: list[str]) -> list[list[float]]: + return self.embed_documents(texts) + + +@pytest.fixture(scope="function", name="faiss_simple_store_dir") +def faiss_simple_store_dir_fixture(tmpdir_path: Path): + + from langchain_community.vectorstores.faiss import FAISS + + embeddings = FakeEmbedder() + + # create FAISS docstore for testing + index_store = FAISS.from_texts([str(x) * x for x in range(3)], embeddings, ids=[chr(x + 97) for x in range(3)]) + + index_store.save_local(str(tmpdir_path), index_name="index") + + # create a second index for testing + other_store = FAISS.from_texts([str(x) * x for x in range(3, 8)], + embeddings, + ids=[chr(x + 97) for x in range(3, 8)]) + other_store.save_local(str(tmpdir_path), index_name="other_index") + + return str(tmpdir_path) @pytest.fixture(scope="function", name="faiss_service") -def faiss_service_fixture(faiss_test_dir: str, faiss_test_embeddings: list): +def faiss_service_fixture(faiss_simple_store_dir: str): # Fixture for FAISS service; can edit FAISS docstore instantiated outside fixture if need to change # embedding model, et. - service = FaissVectorDBService(local_dir=faiss_test_dir, embeddings=faiss_test_embeddings) + service = FaissVectorDBService(local_dir=faiss_simple_store_dir, embeddings=FakeEmbedder()) yield service def test_load_resource(faiss_service: FaissVectorDBService): + + # Check the default implementation resource = faiss_service.load_resource() assert isinstance(resource, FaissVectorDBResourceService) - assert resource._name == "index" - - -def test_count(faiss_service: FaissVectorDBService): - docstore = "index" - count = faiss_service.count(docstore) - assert count == len(faiss_service._local_dir) + # Check specifying a name + resource = faiss_service.load_resource("index") + assert resource.describe()["index_name"] == "index" -def test_insert(faiss_service: FaissVectorDBService): - # Test for inserting embeddings (not docs, texts) into docstore - vector = FakeEmbedder().embed_query(data="hi") - test_data = list(iter([("hi", vector)])) - docstore_name = "index" - response = faiss_service.insert(name=docstore_name, data=test_data) - assert response == {"status": "success"} + # Check another name + resource = faiss_service.load_resource("other_index") + assert resource.describe()["index_name"] == "other_index" -def test_delete(faiss_service: FaissVectorDBService): - # specify name of docstore and ID to delete - docstore_name = "index" - delete_id = "a" - response_delete = faiss_service.delete(name=docstore_name, expr=delete_id) - assert response_delete == {"status": "success"} +def test_describe(faiss_service: FaissVectorDBService): + desc_dict = faiss_service.load_resource().describe() + assert desc_dict["index_name"] == "index" + assert os.path.exists(desc_dict["folder_path"]) + # Room for other properties -async def test_similarity_search(): - index_to_id = create_store.index_to_docstore_id - in_mem_docstore = InMemoryDocstore({ - index_to_id[0]: Document(page_content="for"), - index_to_id[1]: Document(page_content="the"), - index_to_id[2]: Document(page_content="test"), - }) - assert create_store.docstore.__dict__ == in_mem_docstore.__dict__ - - query_vec = await embeddings.aembed_query("for") - output = await create_store.asimilarity_search_by_vector(query_vec, k=1) - - assert output == [Document(page_content="for")] - - -def test_has_store_object(faiss_service: FaissVectorDBService): - # create FAISS docstore to test with - object_store = FAISS.from_texts(texts, embeddings, ids=ids) - object_name = "store_object_index" - object_store.save_local(TMP_DIR_PATH, object_name) +def test_count(faiss_service: FaissVectorDBService): - # attempt to load docstore with given index name - load_attempt = faiss_service.has_store_object(object_name) - assert load_attempt is True + count = faiss_service.load_resource().count() + assert count == 3 - # attempt to load docstore with wrong index name - object_name = "wrong_index_name" - load_attempt = faiss_service.has_store_object(object_name) - assert load_attempt is False +async def test_similarity_search(faiss_service: FaissVectorDBService): -def test_create(faiss_service: FaissVectorDBService): - # Test creating docstore from embeddings - vector = FakeEmbedder().embed_query(data="hi") - test_embedding = list(iter([("hi", vector)])) - docstore_name = "index" - embeddings_docstore = faiss_service.create(name=docstore_name, text_embeddings=test_embedding) + vdb = faiss_service.load_resource() - # save created docstore - index_name_embeddings = "embeddings_index" - embeddings_docstore.save_local(TMP_DIR_PATH, index_name_embeddings) + query_vec = await faiss_service.embeddings.aembed_query("22") - # attempt to load created docstore - load_attempt = faiss_service.has_store_object(index_name_embeddings) + k_1 = await vdb.similarity_search(embeddings=[query_vec], k=1) - assert load_attempt is True + assert len(k_1[0]) == 1 + assert k_1[0][0]["page_content"] == "22" - # Test creating docstore from texts - test_texts = ["for", "the", "test"] - texts_docstore = faiss_service.create(name=docstore_name, texts=test_texts) + k_3 = await vdb.similarity_search(embeddings=[query_vec], k=3) - # save created docstore - index_name_texts = "texts_index" - texts_docstore.save_local(TMP_DIR_PATH, index_name_texts) + assert len(k_3[0]) == 3 + assert k_3[0][0]["page_content"] == "22" - # attempt to load created docstore - load_attempt = faiss_service.has_store_object(index_name_texts) + # Exceed the number of documents in the docstore + k_5 = await vdb.similarity_search(embeddings=[query_vec], k=vdb.count() + 2) - assert load_attempt is True + assert len(k_5[0]) == vdb.count() + assert k_5[0][0]["page_content"] == "22" - # Test creating docstore from documents - test_documents = [Document(page_content="This is for the test.")] - docs_docstore = faiss_service.create(name=docstore_name, documents=test_documents) - # save created docstore - index_name_docs = "docs_index" - docs_docstore.save_local(TMP_DIR_PATH, index_name_docs) +def test_has_store_object(faiss_service: FaissVectorDBService): + assert faiss_service.has_store_object("index") - # attempt to load created docstore - load_attempt = faiss_service.has_store_object(index_name_docs) + assert faiss_service.has_store_object("other_index") - assert load_attempt is True + assert not faiss_service.has_store_object("not_an_index") From 564ece33fa38c7b76d122543ce9b37d9a3d4fbec Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Tue, 11 Jun 2024 16:54:47 -0700 Subject: [PATCH 10/12] Loosening hotfix restrictions --- conda/environments/all_cuda-121_arch-x86_64.yaml | 5 +++-- conda/environments/examples_cuda-121_arch-x86_64.yaml | 6 +++--- dependencies.yaml | 6 +++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/conda/environments/all_cuda-121_arch-x86_64.yaml b/conda/environments/all_cuda-121_arch-x86_64.yaml index 267efb662f..f320d3ac86 100644 --- a/conda/environments/all_cuda-121_arch-x86_64.yaml +++ b/conda/environments/all_cuda-121_arch-x86_64.yaml @@ -117,13 +117,14 @@ dependencies: - pip: - --find-links https://data.dgl.ai/wheels-test/repo.html - --find-links https://data.dgl.ai/wheels/cu121/repo.html + - PyMuPDF==1.23.* - PyMuPDF==1.23.21 - databricks-cli < 0.100 - databricks-connect - dgl==2.0.0 - dglgo - - faiss-gpu==1.7 - - google-search-results==2.4 + - faiss-gpu==1.7.* + - google-search-results==2.4.* - langchain-nvidia-ai-endpoints==0.0.11 - langchain==0.1.16 - milvus==2.3.5 diff --git a/conda/environments/examples_cuda-121_arch-x86_64.yaml b/conda/environments/examples_cuda-121_arch-x86_64.yaml index a56c41c20b..cda5d37df4 100644 --- a/conda/environments/examples_cuda-121_arch-x86_64.yaml +++ b/conda/environments/examples_cuda-121_arch-x86_64.yaml @@ -61,13 +61,13 @@ dependencies: - pip: - --find-links https://data.dgl.ai/wheels-test/repo.html - --find-links https://data.dgl.ai/wheels/cu121/repo.html - - PyMuPDF==1.23.21 + - PyMuPDF==1.23.* - databricks-cli < 0.100 - databricks-connect - dgl==2.0.0 - dglgo - - faiss-gpu==1.7 - - google-search-results==2.4 + - faiss-gpu==1.7.* + - google-search-results==2.4.* - langchain-nvidia-ai-endpoints==0.0.11 - langchain==0.1.16 - milvus==2.3.5 diff --git a/dependencies.yaml b/dependencies.yaml index 9011d0b974..8d41be4f50 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -324,12 +324,12 @@ dependencies: - sentence-transformers=2.7 - pip - pip: - - faiss-gpu==1.7 - - google-search-results==2.4 + - faiss-gpu==1.7.* + - google-search-results==2.4.* - langchain-nvidia-ai-endpoints==0.0.11 - langchain==0.1.16 - nemollm==0.3.5 - - PyMuPDF==1.23.21 + - PyMuPDF==1.23.* model-training-tuning: common: From 5611fc0bf2ccf0321046289b7b0d0a438c1a47b3 Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Wed, 12 Jun 2024 09:09:57 -0700 Subject: [PATCH 11/12] Fixing OpenAI tests --- morpheus.code-workspace | 2 +- morpheus/llm/services/openai_chat_service.py | 25 ++ tests/llm/services/test_openai_chat_client.py | 151 ------------ .../llm/services/test_openai_chat_service.py | 230 ++++++++++++++++-- 4 files changed, 231 insertions(+), 177 deletions(-) delete mode 100644 tests/llm/services/test_openai_chat_client.py diff --git a/morpheus.code-workspace b/morpheus.code-workspace index f81d904f8e..9e25e5b414 100644 --- a/morpheus.code-workspace +++ b/morpheus.code-workspace @@ -695,7 +695,7 @@ "tests" ], "python.testing.pytestEnabled": true, - "python.testing.unittestEnabled": true, + "python.testing.unittestEnabled": false, "rewrap.wrappingColumn": 120, "testMate.cpp.debug.configTemplate": { "args": "${argsArray}", diff --git a/morpheus/llm/services/openai_chat_service.py b/morpheus/llm/services/openai_chat_service.py index 8fe1919a90..3b2c87b4f2 100644 --- a/morpheus/llm/services/openai_chat_service.py +++ b/morpheus/llm/services/openai_chat_service.py @@ -137,6 +137,31 @@ def __init__(self, api_key=self._parent._api_key.value, base_url=self._parent._base_url.value) + @property + def model_name(self): + """ + Get the name of the model associated with this client. + + Returns + ------- + str + The name of the model. + """ + return self._model_name + + @property + def model_kwargs(self): + """ + Get the keyword args that will be passed to the model when calling generation functions. + + Returns + ------- + dict + The keyword arguments dictionary. + """ + # Return a copy to avoid modification of the original + return self._model_kwargs.copy() + def get_input_names(self) -> list[str]: input_names = [self._prompt_key] if self._set_assistant: diff --git a/tests/llm/services/test_openai_chat_client.py b/tests/llm/services/test_openai_chat_client.py deleted file mode 100644 index 628274f68b..0000000000 --- a/tests/llm/services/test_openai_chat_client.py +++ /dev/null @@ -1,151 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import asyncio -from unittest import mock - -import pytest - -from _utils.llm import mk_mock_openai_response -from morpheus.llm.services.openai_chat_service import OpenAIChatService - - -@pytest.mark.parametrize("api_key", ["12345", None]) -@pytest.mark.parametrize("base_url", ["http://test.openai.com/v1", None]) -@pytest.mark.parametrize("max_retries", [5, 10]) -def test_constructor(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], - api_key: str, - base_url: str, - max_retries: int): - OpenAIChatService(api_key=api_key, base_url=base_url).get_client(model_name="test_model", max_retries=max_retries) - - for mock_client in mock_chat_completion: - mock_client.assert_called_once_with(api_key=api_key, base_url=base_url, max_retries=max_retries) - - -@pytest.mark.parametrize("max_retries", [5, 10]) -def test_constructor_default_service_constructor(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], - max_retries: int): - OpenAIChatService().get_client(model_name="test_model", max_retries=max_retries) - - for mock_client in mock_chat_completion: - mock_client.assert_called_once_with(max_retries=max_retries, organization=None, api_key=None, base_url=None) - - -@pytest.mark.parametrize("use_async", [True, False]) -@pytest.mark.parametrize( - "input_dict, set_assistant, expected_messages", - [({ - "prompt": "test_prompt", "assistant": "assistant_response" - }, - True, [{ - "role": "user", "content": "test_prompt" - }, { - "role": "assistant", "content": "assistant_response" - }]), ({ - "prompt": "test_prompt" - }, False, [{ - "role": "user", "content": "test_prompt" - }])]) -@pytest.mark.parametrize("temperature", [0, 1, 2]) -def test_generate(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], - use_async: bool, - input_dict: dict[str, str], - set_assistant: bool, - expected_messages: list[dict], - temperature: int): - (mock_client, mock_async_client) = mock_chat_completion - client = OpenAIChatService().get_client(model_name="test_model", - set_assistant=set_assistant, - temperature=temperature) - - if use_async: - results = asyncio.run(client.generate_async(**input_dict)) - mock_async_client.chat.completions.create.assert_called_once_with(model="test_model", - messages=expected_messages, - temperature=temperature) - mock_client.chat.completions.create.assert_not_called() - - else: - results = client.generate(**input_dict) - mock_client.chat.completions.create.assert_called_once_with(model="test_model", - messages=expected_messages, - temperature=temperature) - mock_async_client.chat.completions.create.assert_not_called() - - assert results == "test_output" - - -@pytest.mark.parametrize("use_async", [True, False]) -@pytest.mark.parametrize("inputs, set_assistant, expected_messages", - [({ - "prompt": ["prompt1", "prompt2"], "assistant": ["assistant1", "assistant2"] - }, - True, - [[{ - "role": "user", "content": "prompt1" - }, { - "role": "assistant", "content": "assistant1" - }], [{ - "role": "user", "content": "prompt2" - }, { - "role": "assistant", "content": "assistant2" - }]]), - ({ - "prompt": ["prompt1", "prompt2"] - }, - False, [[{ - "role": "user", "content": "prompt1" - }], [{ - "role": "user", "content": "prompt2" - }]])]) -@pytest.mark.parametrize("temperature", [0, 1, 2]) -def test_generate_batch(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], - use_async: bool, - inputs: dict[str, list[str]], - set_assistant: bool, - expected_messages: list[list[dict]], - temperature: int): - (mock_client, mock_async_client) = mock_chat_completion - client = OpenAIChatService().get_client(model_name="test_model", - set_assistant=set_assistant, - temperature=temperature) - - expected_results = ["test_output" for _ in range(len(inputs["prompt"]))] - expected_calls = [ - mock.call(model="test_model", messages=messages, temperature=temperature) for messages in expected_messages - ] - - if use_async: - results = asyncio.run(client.generate_batch_async(inputs)) - mock_async_client.chat.completions.create.assert_has_calls(expected_calls, any_order=False) - mock_client.chat.completions.create.assert_not_called() - - else: - results = client.generate_batch(inputs) - mock_client.chat.completions.create.assert_has_calls(expected_calls, any_order=False) - mock_async_client.chat.completions.create.assert_not_called() - - assert results == expected_results - - -@pytest.mark.parametrize("completion", [[], [None]], ids=["no_choices", "no_content"]) -@pytest.mark.usefixtures("mock_chat_completion") -def test_extract_completion_errors(completion: list): - client = OpenAIChatService().get_client(model_name="test_model") - mock_completion = mk_mock_openai_response(completion) - - with pytest.raises(ValueError): - client._extract_completion(mock_completion) diff --git a/tests/llm/services/test_openai_chat_service.py b/tests/llm/services/test_openai_chat_service.py index f3adc1023a..54b4290ded 100644 --- a/tests/llm/services/test_openai_chat_service.py +++ b/tests/llm/services/test_openai_chat_service.py @@ -13,50 +13,230 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio +import os from unittest import mock import pytest -from morpheus.llm.services.llm_service import LLMService -from morpheus.llm.services.openai_chat_service import OpenAIChatClient +from _utils.llm import mk_mock_openai_response from morpheus.llm.services.openai_chat_service import OpenAIChatService -def test_constructor(): - service = OpenAIChatService() - assert isinstance(service, LLMService) +@pytest.fixture(name="set_default_openai_api_key", autouse=True, scope="function") +def set_default_openai_api_key_fixture(): + # Must have an API key set to create the openai client + with mock.patch.dict(os.environ, clear=True, values={"OPENAI_API_KEY": "testing_api_key"}): + yield + + +def assert_called_once_with_relaxed(mock_obj, *args, **kwargs): + + if (len(mock_obj.call_args_list) == 1): + + recent_call = mock_obj.call_args_list[-1] + + # Ensure that the number of arguments matches by adding ANY to the back of the args + if (len(args) < len(recent_call.args)): + args = tuple(list(args) + [mock.ANY] * (len(recent_call.args) - len(args))) + + addl_kwargs = {key: mock.ANY for key in recent_call.kwargs.keys() if key not in kwargs} + + kwargs.update(addl_kwargs) + + mock_obj.assert_called_once_with(*args, **kwargs) + + +@pytest.mark.parametrize("api_key", ["12345", None]) +@pytest.mark.parametrize("base_url", ["http://test.openai.com/v1", None]) +@pytest.mark.parametrize("org_id", ["my-org-124", None]) +def test_constructor(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], + api_key: str, + base_url: str, + org_id: str): + + OpenAIChatService(api_key=api_key, base_url=base_url, org_id=org_id).get_client(model_name="test_model") + + if (api_key is None): + api_key = os.environ["OPENAI_API_KEY"] + + for mock_client in mock_chat_completion: + assert_called_once_with_relaxed(mock_client, organization=org_id, api_key=api_key, base_url=base_url) + + +@pytest.mark.parametrize("max_retries", [5, 10, -1, None]) +def test_max_retries(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], max_retries: int): + OpenAIChatService().get_client(model_name="test_model", max_retries=max_retries) + + for mock_client in mock_chat_completion: + assert_called_once_with_relaxed(mock_client, max_retries=max_retries) + + +@pytest.mark.parametrize("use_json", [True, False]) +def test_client_json(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], use_json: bool): + client = OpenAIChatService().get_client(model_name="test_model", json=use_json) + + # Perform a dummy generate call + client.generate(prompt="test_prompt") + + if (use_json): + assert_called_once_with_relaxed(mock_chat_completion[0].chat.completions.create, + response_format={"type": "json_object"}) + else: + assert mock_chat_completion[0].chat.completions.create.call_args_list[-1].kwargs.get("response_format") is None + + +@pytest.mark.parametrize("set_assistant", [True, False]) +def test_client_set_assistant(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], set_assistant: bool): + client = OpenAIChatService().get_client(model_name="test_model", set_assistant=set_assistant) + + # Perform a dummy generate call + client.generate(prompt="test_prompt", assistant="assistant_message") + + messages = mock_chat_completion[0].chat.completions.create.call_args_list[-1].kwargs["messages"] + + found_assistant = False + + for message in messages: + if (message.get("role") == "assistant"): + found_assistant = True + break + + assert found_assistant == set_assistant + + +@pytest.mark.parametrize("use_async", [True, False]) +@pytest.mark.parametrize( + "input_dict, set_assistant, expected_messages", + [({ + "prompt": "test_prompt", "assistant": "assistant_response" + }, + True, [{ + "role": "user", "content": "test_prompt" + }, { + "role": "assistant", "content": "assistant_response" + }]), ({ + "prompt": "test_prompt" + }, False, [{ + "role": "user", "content": "test_prompt" + }])]) +@pytest.mark.parametrize("temperature", [0, 1, 2]) +def test_generate(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], + use_async: bool, + input_dict: dict[str, str], + set_assistant: bool, + expected_messages: list[dict], + temperature: int): + (mock_client, mock_async_client) = mock_chat_completion + client = OpenAIChatService().get_client(model_name="test_model", + set_assistant=set_assistant, + temperature=temperature) + + if use_async: + results = asyncio.run(client.generate_async(**input_dict)) + mock_async_client.chat.completions.create.assert_called_once_with(model="test_model", + messages=expected_messages, + temperature=temperature) + mock_client.chat.completions.create.assert_not_called() + + else: + results = client.generate(**input_dict) + mock_client.chat.completions.create.assert_called_once_with(model="test_model", + messages=expected_messages, + temperature=temperature) + mock_async_client.chat.completions.create.assert_not_called() + + assert results == "test_output" + + +@pytest.mark.parametrize("use_async", [True, False]) +@pytest.mark.parametrize("inputs, set_assistant, expected_messages", + [({ + "prompt": ["prompt1", "prompt2"], "assistant": ["assistant1", "assistant2"] + }, + True, + [[{ + "role": "user", "content": "prompt1" + }, { + "role": "assistant", "content": "assistant1" + }], [{ + "role": "user", "content": "prompt2" + }, { + "role": "assistant", "content": "assistant2" + }]]), + ({ + "prompt": ["prompt1", "prompt2"] + }, + False, [[{ + "role": "user", "content": "prompt1" + }], [{ + "role": "user", "content": "prompt2" + }]])]) +@pytest.mark.parametrize("temperature", [0, 1, 2]) +def test_generate_batch(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], + use_async: bool, + inputs: dict[str, list[str]], + set_assistant: bool, + expected_messages: list[list[dict]], + temperature: int): + (mock_client, mock_async_client) = mock_chat_completion + client = OpenAIChatService().get_client(model_name="test_model", + set_assistant=set_assistant, + temperature=temperature) + + expected_results = ["test_output" for _ in range(len(inputs["prompt"]))] + expected_calls = [ + mock.call(model="test_model", messages=messages, temperature=temperature) for messages in expected_messages + ] + + if use_async: + results = asyncio.run(client.generate_batch_async(inputs)) + mock_async_client.chat.completions.create.assert_has_calls(expected_calls, any_order=False) + mock_client.chat.completions.create.assert_not_called() + + else: + results = client.generate_batch(inputs) + mock_client.chat.completions.create.assert_has_calls(expected_calls, any_order=False) + mock_async_client.chat.completions.create.assert_not_called() + + assert results == expected_results + + +@pytest.mark.parametrize("completion", [[], [None]], ids=["no_choices", "no_content"]) +@pytest.mark.usefixtures("mock_chat_completion") +def test_extract_completion_errors(completion: list): + client = OpenAIChatService().get_client(model_name="test_model") + mock_completion = mk_mock_openai_response(completion) + + with pytest.raises(ValueError): + client._extract_completion(mock_completion) def test_get_client(): service = OpenAIChatService() client = service.get_client(model_name="test_model") - assert isinstance(client, OpenAIChatClient) + assert client.model_name == "test_model" + + client = service.get_client(model_name="test_model2", extra_arg="test_arg") + + assert client.model_name == "test_model2" + assert client.model_kwargs == {"extra_arg": "test_arg"} -@pytest.mark.parametrize("use_json", [True, False]) -@pytest.mark.parametrize("set_assistant", [True, False]) @pytest.mark.parametrize("temperature", [0, 1, 2]) @pytest.mark.parametrize("max_retries", [5, 10]) -@mock.patch("morpheus.llm.services.openai_chat_service.OpenAIChatClient") -def test_get_client_passed_args(mock_client: mock.MagicMock, - set_assistant: bool, - use_json: bool, +def test_get_client_passed_args(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], temperature: int, max_retries: int): service = OpenAIChatService() - service.get_client(model_name="test_model", - set_assistant=set_assistant, - json=use_json, - temperature=temperature, - test='this', - max_retries=max_retries) + client = service.get_client(model_name="test_model", temperature=temperature, test='this', max_retries=max_retries) + + # Perform a dummy generate call + client.generate(prompt="test_prompt") # Ensure the get_client method passed on the set_assistant and model kwargs - mock_client.assert_called_once_with(service, - model_name="test_model", - set_assistant=set_assistant, - json=use_json, - temperature=temperature, - test='this', - max_retries=max_retries) + assert_called_once_with_relaxed(mock_chat_completion[0].chat.completions.create, + model="test_model", + temperature=temperature, + test='this') From aeb05446d7651dd8fa459699d52a1657dac2b47c Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Wed, 12 Jun 2024 09:49:26 -0700 Subject: [PATCH 12/12] Fixing tests --- .../llm/services/nvfoundation_llm_service.py | 5 ++- tests/llm/services/test_llm_service_pipe.py | 33 ++++++++------ ...nt.py => test_nvfoundation_llm_service.py} | 44 +++++++++--------- tests/llm/test_completion_pipe.py | 45 +++++++++---------- 4 files changed, 65 insertions(+), 62 deletions(-) rename tests/llm/services/{test_nvfoundation_llm_client.py => test_nvfoundation_llm_service.py} (75%) diff --git a/morpheus/llm/services/nvfoundation_llm_service.py b/morpheus/llm/services/nvfoundation_llm_service.py index 5154158538..62bc355662 100644 --- a/morpheus/llm/services/nvfoundation_llm_service.py +++ b/morpheus/llm/services/nvfoundation_llm_service.py @@ -207,7 +207,6 @@ class NVFoundationLLMService(LLMService): class APIKey(EnvConfigValue): _ENV_KEY: str = "NVIDIA_API_KEY" - _ALLOW_NONE: bool = True class BaseURL(EnvConfigValue): _ENV_KEY: str = "NVIDIA_API_BASE" @@ -236,6 +235,10 @@ def _merge_model_kwargs(self, model_kwargs: dict) -> dict: def api_key(self): return self._api_key.value + @property + def base_url(self): + return self._base_url.value + def get_client(self, *, model_name: str, **model_kwargs) -> NVFoundationLLMClient: """ Returns a client for interacting with a specific model. This method is the preferred way to create a client. diff --git a/tests/llm/services/test_llm_service_pipe.py b/tests/llm/services/test_llm_service_pipe.py index e6e2f8bbf3..13fb5f652e 100644 --- a/tests/llm/services/test_llm_service_pipe.py +++ b/tests/llm/services/test_llm_service_pipe.py @@ -18,12 +18,13 @@ import cudf from _utils import assert_results +from _utils.environment import set_env from _utils.llm import mk_mock_openai_response from morpheus.config import Config from morpheus.llm import LLMEngine from morpheus.llm.nodes.extracter_node import ExtracterNode from morpheus.llm.nodes.llm_generate_node import LLMGenerateNode -from morpheus.llm.services.llm_service import LLMService +from morpheus.llm.services.llm_service import LLMClient from morpheus.llm.services.nemo_llm_service import NeMoLLMService from morpheus.llm.services.openai_chat_service import OpenAIChatService from morpheus.llm.task_handlers.simple_task_handler import SimpleTaskHandler @@ -35,22 +36,17 @@ from morpheus.stages.preprocess.deserialize_stage import DeserializeStage -def _build_engine(llm_service_cls: type[LLMService]): - llm_service = llm_service_cls() - llm_clinet = llm_service.get_client(model_name="test_model") +def _build_engine(llm_client: LLMClient): engine = LLMEngine() engine.add_node("extracter", node=ExtracterNode()) - engine.add_node("completion", inputs=["/extracter"], node=LLMGenerateNode(llm_client=llm_clinet)) + engine.add_node("completion", inputs=["/extracter"], node=LLMGenerateNode(llm_client=llm_client)) engine.add_task_handler(inputs=["/completion"], handler=SimpleTaskHandler()) return engine -def _run_pipeline(config: Config, - llm_service_cls: type[LLMService], - country_prompts: list[str], - capital_responses: list[str]): +def _run_pipeline(config: Config, llm_client: LLMClient, country_prompts: list[str], capital_responses: list[str]): """ Loosely patterned after `examples/llm/completion` """ @@ -66,7 +62,7 @@ def _run_pipeline(config: Config, pipe.add_stage( DeserializeStage(config, message_type=ControlMessage, task_type="llm_engine", task_payload=completion_task)) - pipe.add_stage(LLMEngineStage(config, engine=_build_engine(llm_service_cls))) + pipe.add_stage(LLMEngineStage(config, engine=_build_engine(llm_client))) sink = pipe.add_stage(CompareDataFrameStage(config, compare_df=expected_df)) pipe.run() @@ -79,7 +75,13 @@ def test_completion_pipe_nemo(config: Config, country_prompts: list[str], capital_responses: list[str]): mock_nemollm.post_process_generate_response.side_effect = [{"text": response} for response in capital_responses] - _run_pipeline(config, NeMoLLMService, country_prompts, capital_responses) + + # Set a dummy key to bypass the API key check + with set_env(NGC_API_KEY="test"): + + llm_client = NeMoLLMService().get_client(model_name="test_model") + + _run_pipeline(config, llm_client, country_prompts, capital_responses) def test_completion_pipe_openai(config: Config, @@ -91,7 +93,10 @@ def test_completion_pipe_openai(config: Config, mk_mock_openai_response([response]) for response in capital_responses ] - _run_pipeline(config, OpenAIChatService, country_prompts, capital_responses) + with set_env(OPENAI_API_KEY="test"): + llm_client = OpenAIChatService().get_client(model_name="test_model") + + _run_pipeline(config, llm_client, country_prompts, capital_responses) - mock_client.chat.completions.create.assert_not_called() - mock_async_client.chat.completions.create.assert_called() + mock_client.chat.completions.create.assert_not_called() + mock_async_client.chat.completions.create.assert_called() diff --git a/tests/llm/services/test_nvfoundation_llm_client.py b/tests/llm/services/test_nvfoundation_llm_service.py similarity index 75% rename from tests/llm/services/test_nvfoundation_llm_client.py rename to tests/llm/services/test_nvfoundation_llm_service.py index dc02c1836a..dec76060e8 100644 --- a/tests/llm/services/test_nvfoundation_llm_client.py +++ b/tests/llm/services/test_nvfoundation_llm_service.py @@ -25,23 +25,24 @@ from morpheus.llm.services.nvfoundation_llm_service import NVFoundationLLMService -@pytest.mark.usefixtures("restore_environ") -@pytest.mark.parametrize("api_key", [None, "test_api_key"]) -@pytest.mark.parametrize("set_env", [True, False]) -def test_constructor(api_key: str, set_env: bool): - """ - Test that the constructor prefers explicit arguments over environment variables. - """ - env_api_key = "test_env_api_key" +@pytest.fixture(name="set_default_nvidia_api_key", autouse=True, scope="function") +def set_default_nvidia_api_key_fixture(): + # Must have an API key set to create the openai client + with mock.patch.dict(os.environ, clear=True, values={"NVIDIA_API_KEY": "nvapi-testing_api_key"}): + yield - if set_env: - os.environ["NVIDIA_API_KEY"] = env_api_key - service = NVFoundationLLMService(api_key=api_key) +@pytest.mark.parametrize("api_key", ["nvapi-12345", None]) +@pytest.mark.parametrize("base_url", ["http://test.nvidia.com/v1", None]) +def test_constructor(api_key: str, base_url: bool): - expected_api_key = api_key if "NVIDIA_API_KEY" not in os.environ else env_api_key + service = NVFoundationLLMService(api_key=api_key, base_url=base_url) - assert service.api_key == expected_api_key + if (api_key is None): + api_key = os.environ["NVIDIA_API_KEY"] + + assert service.api_key == api_key + assert service.base_url == base_url def test_get_client(): @@ -61,7 +62,7 @@ def test_model_kwargs(): def test_get_input_names(): - client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model", additional_arg="test_arg") + client = NVFoundationLLMService().get_client(model_name="test_model", additional_arg="test_arg") assert client.get_input_names() == ["prompt"] @@ -76,7 +77,7 @@ def mock_generation_side_effect(*_, **kwargs): mock_nvfoundationllm.side_effect = mock_generation_side_effect - client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model") + client = NVFoundationLLMService().get_client(model_name="test_model") assert client.generate(prompt="test_prompt") == "test_prompt" @@ -90,7 +91,7 @@ def mock_generation_side_effect(*_, **kwargs): mock_nvfoundationllm.side_effect = mock_generation_side_effect - client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model") + client = NVFoundationLLMService().get_client(model_name="test_model") assert client.generate_batch({'prompt': ["prompt1", "prompt2"]}) == ["prompt1", "prompt2"] @@ -105,7 +106,7 @@ def mock_generation_side_effect(*_, **kwargs): mock_nvfoundationllm.side_effect = mock_generation_side_effect - client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model") + client = NVFoundationLLMService().get_client(model_name="test_model") assert await client.generate_async(prompt="test_prompt") == "test_prompt" @@ -120,7 +121,7 @@ def mock_generation_side_effect(*_, **kwargs): mock_nvfoundationllm.side_effect = mock_generation_side_effect - client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model") + client = NVFoundationLLMService().get_client(model_name="test_model") assert await client.generate_batch_async({'prompt': ["prompt1", "prompt2"]}) @@ -129,12 +130,11 @@ async def test_generate_batch_async_error(): with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.agenerate_prompt", autospec=True) as mock_nvfoundationllm: def mock_generation_side_effect(*_, **kwargs): - return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))] - for x in kwargs["prompts"]]) + raise RuntimeError("unittest") mock_nvfoundationllm.side_effect = mock_generation_side_effect - client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model") + client = NVFoundationLLMService().get_client(model_name="test_model") with pytest.raises(RuntimeError, match="unittest"): - await client.generate_batch_async({'prompt': ["prompt1", "prompt2"]}) + await client.generate_batch_async({'prompt': ["prompt1", "prompt2"]}, return_exceptions=False) diff --git a/tests/llm/test_completion_pipe.py b/tests/llm/test_completion_pipe.py index 106eb39586..e57e36f09f 100644 --- a/tests/llm/test_completion_pipe.py +++ b/tests/llm/test_completion_pipe.py @@ -28,7 +28,7 @@ from morpheus.llm.nodes.extracter_node import ExtracterNode from morpheus.llm.nodes.llm_generate_node import LLMGenerateNode from morpheus.llm.nodes.prompt_template_node import PromptTemplateNode -from morpheus.llm.services.llm_service import LLMService +from morpheus.llm.services.llm_service import LLMClient from morpheus.llm.services.nemo_llm_service import NeMoLLMService from morpheus.llm.services.openai_chat_service import OpenAIChatService from morpheus.llm.task_handlers.simple_task_handler import SimpleTaskHandler @@ -42,9 +42,7 @@ logger = logging.getLogger(__name__) -def _build_engine(llm_service_cls: type[LLMService], model_name: str = "test_model"): - llm_service = llm_service_cls() - llm_client = llm_service.get_client(model_name=model_name) +def _build_engine(llm_client: LLMClient): engine = LLMEngine() engine.add_node("extracter", node=ExtracterNode()) @@ -57,11 +55,7 @@ def _build_engine(llm_service_cls: type[LLMService], model_name: str = "test_mod return engine -def _run_pipeline(config: Config, - llm_service_cls: type[LLMService], - countries: list[str], - capital_responses: list[str], - model_name: str = "test_model") -> dict: +def _run_pipeline(config: Config, llm_client: LLMClient, countries: list[str], capital_responses: list[str]) -> dict: """ Loosely patterned after `examples/llm/completion` """ @@ -81,7 +75,7 @@ def _run_pipeline(config: Config, task_type="llm_engine", task_payload=completion_task)) - pipe.add_stage(LLMEngineStage(config, engine=_build_engine(llm_service_cls, model_name=model_name))) + pipe.add_stage(LLMEngineStage(config, engine=_build_engine(llm_client))) sink = pipe.add_stage(CompareDataFrameStage(config, compare_df=expected_df)) @@ -99,8 +93,10 @@ def test_completion_pipe_nemo(config: Config, # Set a dummy key to bypass the API key check with set_env(NGC_API_KEY="test"): + llm_client = NeMoLLMService().get_client(model_name="test_model") + mock_nemollm.post_process_generate_response.side_effect = [{"text": response} for response in capital_responses] - results = _run_pipeline(config, NeMoLLMService, countries=countries, capital_responses=capital_responses) + results = _run_pipeline(config, llm_client, countries=countries, capital_responses=capital_responses) assert_results(results) @@ -114,20 +110,21 @@ def test_completion_pipe_openai(config: Config, mk_mock_openai_response([response]) for response in capital_responses ] - results = _run_pipeline(config, OpenAIChatService, countries=countries, capital_responses=capital_responses) - assert_results(results) - mock_client.chat.completions.create.assert_not_called() - mock_async_client.chat.completions.create.assert_called() + with set_env(OPENAI_API_KEY="test"): + llm_client = OpenAIChatService().get_client(model_name="test_model") + + results = _run_pipeline(config, llm_client, countries=countries, capital_responses=capital_responses) + assert_results(results) + mock_client.chat.completions.create.assert_not_called() + mock_async_client.chat.completions.create.assert_called() @pytest.mark.usefixtures("nemollm") @pytest.mark.usefixtures("ngc_api_key") def test_completion_pipe_integration_nemo(config: Config, countries: list[str], capital_responses: list[str]): - results = _run_pipeline(config, - NeMoLLMService, - countries=countries, - capital_responses=capital_responses, - model_name="gpt-43b-002") + llm_client = NeMoLLMService().get_client(model_name="gpt-43b-002") + + results = _run_pipeline(config, llm_client, countries=countries, capital_responses=capital_responses) assert results['diff_cols'] == 0 assert results['total_rows'] == len(countries) assert results['matching_rows'] + results['diff_rows'] == len(countries) @@ -136,11 +133,9 @@ def test_completion_pipe_integration_nemo(config: Config, countries: list[str], @pytest.mark.usefixtures("openai") @pytest.mark.usefixtures("openai_api_key") def test_completion_pipe_integration_openai(config: Config, countries: list[str], capital_responses: list[str]): - results = _run_pipeline(config, - OpenAIChatService, - countries=countries, - capital_responses=capital_responses, - model_name="gpt-3.5-turbo") + llm_client = NeMoLLMService().get_client(model_name="gpt-3.5-turbo") + + results = _run_pipeline(config, llm_client, countries=countries, capital_responses=capital_responses) assert results['diff_cols'] == 0 assert results['total_rows'] == len(countries) assert results['matching_rows'] + results['diff_rows'] == len(countries)