From 81dfc1a89fcc501e158568204fb48431a18c50c7 Mon Sep 17 00:00:00 2001
From: Ana Caklovic <acaklovic@nvidia.com>
Date: Mon, 20 May 2024 11:14:45 -0700
Subject: [PATCH 01/12] tests for services

---
 morpheus.code-workspace                       |   6 +-
 .../llm/services/nvfoundation_llm_service.py  |  35 ++--
 morpheus/service/vdb/faiss_vdb_service.py     |  86 +++++-----
 tests/conftest.py                             |  36 ++++-
 .../services/test_nvfoundation_llm_client.py  | 142 ++++++++++++++++
 tests/test_faiss_vector_db_service.py         | 152 ++++++++++++++++++
 6 files changed, 401 insertions(+), 56 deletions(-)
 create mode 100644 tests/llm/services/test_nvfoundation_llm_client.py
 create mode 100644 tests/test_faiss_vector_db_service.py

diff --git a/morpheus.code-workspace b/morpheus.code-workspace
index cbeadce076..f8886b8ac6 100644
--- a/morpheus.code-workspace
+++ b/morpheus.code-workspace
@@ -27,6 +27,7 @@
     "launch": {
         "compounds": [],
         "configurations": [
+
             {
                 "args": [
                     "--log_level=DEBUG",
@@ -694,7 +695,7 @@
             "tests"
         ],
         "python.testing.pytestEnabled": true,
-        "python.testing.unittestEnabled": false,
+        "python.testing.unittestEnabled": true,
         "rewrap.wrappingColumn": 120,
         "testMate.cpp.test.advancedExecutables": [
             {
@@ -730,6 +731,7 @@
         },
         "yapf.args": [
             "--style=${workspaceFolder}/setup.cfg"
-        ]
+        ],
+        "python.analysis.inlayHints.pytestParameters": true
     }
 }
diff --git a/morpheus/llm/services/nvfoundation_llm_service.py b/morpheus/llm/services/nvfoundation_llm_service.py
index 9be5130bf9..bde0cfd4b8 100644
--- a/morpheus/llm/services/nvfoundation_llm_service.py
+++ b/morpheus/llm/services/nvfoundation_llm_service.py
@@ -42,7 +42,7 @@ class NVFoundationLLMClient(LLMClient):
     `NeMoLLMService.get_client` method.
     Parameters
     ----------
-    parent : NeMoLLMService
+    parent :  NVFoundationMService
         The parent service for this client.
     model_name : str
         The name of the model to interact with.
@@ -63,7 +63,10 @@ def __init__(self, parent: "NVFoundationLLMService", *, model_name: str, **model
         self._model_kwargs = model_kwargs
         self._prompt_key = "prompt"
 
-        self._client = ChatNVIDIA(client=self._parent._nve_client, model=model_name, **model_kwargs)
+        self._client = ChatNVIDIA(api_key=self._parent._api_key,
+                                  base_url=self._parent._base_url,
+                                  model=model_name,
+                                  **model_kwargs)  # type: ignore
 
     def get_input_names(self) -> list[str]:
         schema = self._client.get_input_schema()
@@ -144,24 +147,30 @@ class NVFoundationLLMService(LLMService):
             variable. If neither are present `https://api.nvcf.nvidia.com/v2` will be used., by default None
     """
 
-    def __init__(self, *, api_key: str = None, base_url: str = None) -> None:
+    def __init__(self, *, api_key: str = None, base_url: str = None, **model_kwargs) -> None:
         if IMPORT_EXCEPTION is not None:
             raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION
 
         super().__init__()
 
-        self._api_key = api_key
         if base_url is None:
-            self._base_url = os.getenv('NVIDIA_API_BASE', 'https://api.nvcf.nvidia.com/v2')
+            self._base_url = os.getenv('NVIDIA_API_BASE', "https://api.nvcf.nvidia.com/v2/nvcf")
         else:
             self._base_url = base_url
 
-        self._nve_client = NVEModel(
-            nvidia_api_key=self._api_key,
-            fetch_url_format=f"{self._base_url}/nvcf/pexec/status/",
-            call_invoke_base=f"{self._base_url}/nvcf/pexec/functions",
-            func_list_format=f"{self._base_url}/nvcf/functions",
-        )  # type: ignore
+        if "NVIDIA_API_KEY" in os.environ:
+            self._api_key = os.getenv('NVIDIA_API_KEY')
+        else:
+            self._api_key = api_key
+
+        self._default_model_kwargs = model_kwargs
+
+    def _merge_model_kwargs(self, model_kwargs: dict) -> dict:
+        return {**self._default_model_kwargs, **model_kwargs}
+
+    @property
+    def api_key(self):
+        return self._api_key
 
     def get_client(self, *, model_name: str, **model_kwargs) -> NVFoundationLLMClient:
         """
@@ -174,4 +183,6 @@ def get_client(self, *, model_name: str, **model_kwargs) -> NVFoundationLLMClien
             Additional keyword arguments to pass to the model when generating text.
         """
 
-        return NVFoundationLLMClient(self, model_name=model_name, **model_kwargs)
+        final_model_kwargs = self._merge_model_kwargs(model_kwargs)
+
+        return NVFoundationLLMClient(self, model_name=model_name, **final_model_kwargs)
diff --git a/morpheus/service/vdb/faiss_vdb_service.py b/morpheus/service/vdb/faiss_vdb_service.py
index 81f63aef5b..30f6d7d766 100644
--- a/morpheus/service/vdb/faiss_vdb_service.py
+++ b/morpheus/service/vdb/faiss_vdb_service.py
@@ -21,7 +21,10 @@
 import typing
 from functools import wraps
 
+import numpy as np
 import pandas as pd
+from langchain.docstore.document import Document
+from langchain_community.vectorstores import FAISS
 
 import cudf
 
@@ -81,7 +84,9 @@ def insert(self, data: list[list] | list[dict], **kwargs: dict[str, typing.Any])
         dict
             Returns response content as a dictionary.
         """
-        raise NotImplementedError("Insert operation is not supported in FAISS")
+        self._index.add_embeddings(data)
+        return {"status": "success"}
+        #return list_of_ids
 
     def insert_dataframe(self, df: typing.Union[cudf.DataFrame, pd.DataFrame], **kwargs: dict[str, typing.Any]) -> dict:
         """
@@ -149,7 +154,7 @@ async def similarity_search(self,
                                 k: int = 4,
                                 **kwargs: dict[str, typing.Any]) -> list[list[dict]]:
         """
-        Perform a similarity search within the collection.
+        Perform a similarity search within the FAISS docstore (asimilarity_search_by_vector returns docs most similar to embedding vector asynchronously).
 
         Parameters
         ----------
@@ -211,7 +216,7 @@ def delete_by_keys(self, keys: int | str | list, **kwargs: dict[str, typing.Any]
 
     def delete(self, expr: str, **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]:
         """
-        Delete vectors from the collection using expressions.
+        Delete vectors by giving a list of IDs.
 
         Parameters
         ----------
@@ -225,7 +230,8 @@ def delete(self, expr: str, **kwargs: dict[str, typing.Any]) -> dict[str, typing
         dict[str, typing.Any]
             Returns result of the given keys that are deleted from the collection.
         """
-        raise NotImplementedError("Delete operation is not supported in FAISS")
+        self._index.delete(expr)
+        return {"status": "success"}
 
     def retrieve_by_keys(self, keys: int | str | list, **kwargs: dict[str, typing.Any]) -> list[typing.Any]:
         """
@@ -260,7 +266,9 @@ def count(self, **kwargs: dict[str, typing.Any]) -> int:
         int
             Returns number of entities in the collection.
         """
-        raise NotImplementedError("Count operation is not supported in FAISS")
+        docstore = self._parent._local_dir
+        count = len(docstore)
+        return count
 
     def drop(self, **kwargs: dict[str, typing.Any]) -> None:
         """
@@ -311,19 +319,28 @@ def load_resource(self, name: str = "index", **kwargs: dict[str, typing.Any]) ->
 
     def has_store_object(self, name: str) -> bool:
         """
-        Check if a collection exists in the Milvus vector database.
+        Check if specific index file name exists by attempting to load FAISS index, docstore, and index_to_docstore_id from disk with the index file name.
 
         Parameters
         ----------
         name : str
-            Name of the collection to check.
+            Name of the FAISS index file to check.
 
         Returns
         -------
         bool
-            True if the collection exists, False otherwise.
-        """
-        return self._client.has_collection(collection_name=name)
+            True if the file exists, False otherwise.
+        """
+        try:
+            FAISS.load_local(folder_path=self._local_dir,
+                             embeddings=self._embeddings,
+                             index_name=name,
+                             allow_dangerous_deserialization=True)
+            return True
+        except Exception as e:
+            print(f"Failed to load FAISS with the given index file name: {e}")
+        # Return False if given index file name cannot be loaded
+        return False
 
     def list_store_objects(self, **kwargs: dict[str, typing.Any]) -> list[str]:
         """
@@ -362,42 +379,28 @@ def create(self, name: str, overwrite: bool = False, **kwargs: dict[str, typing.
         ValueError
             If the provided schema fields configuration is empty.
         """
-        logger.debug("Creating collection: %s, overwrite=%s, kwargs=%s", name, overwrite, kwargs)
-
-        # Preserve original configuration.
-        collection_conf = copy.deepcopy(kwargs)
+        # can create with: from_embeddings, from_texts, or from_documents
 
-        auto_id = collection_conf.get("auto_id", False)
-        index_conf = collection_conf.get("index_conf", None)
-        partition_conf = collection_conf.get("partition_conf", None)
-
-        schema_conf = collection_conf.get("schema_conf")
-        schema_fields_conf = schema_conf.pop("schema_fields")
-
-        if not self.has_store_object(name) or overwrite:
-            if overwrite and self.has_store_object(name):
-                self.drop(name)
-
-            if len(schema_fields_conf) == 0:
-                raise ValueError("Cannot create collection as provided empty schema_fields configuration")
+        resource = self.load_resource(name)
 
-            schema_fields = [FieldSchemaEncoder.from_dict(field_conf) for field_conf in schema_fields_conf]
+        if "documents" in kwargs:
+            documents = kwargs["documents"]
+            return resource._index.from_documents(documents, self._embeddings)
 
-            schema = pymilvus.CollectionSchema(fields=schema_fields, **schema_conf)
+        elif "text_embeddings" in kwargs:
+            text_embeddings = kwargs["text_embeddings"]
+            metadatas = kwargs.get("metadatas")
+            ids = kwargs.get("ids")
+            return resource._index.from_embeddings(text_embeddings, self._embeddings, metadatas, ids)
 
-            self._client.create_collection_with_schema(collection_name=name,
-                                                       schema=schema,
-                                                       index_params=index_conf,
-                                                       auto_id=auto_id,
-                                                       shards_num=collection_conf.get("shards", 2),
-                                                       consistency_level=collection_conf.get(
-                                                           "consistency_level", "Strong"))
+        elif "texts" in kwargs:
+            texts = kwargs["texts"]
+            metadatas = kwargs.get("metadatas")
+            ids = kwargs.get("ids")
+            return resource._index.from_texts(texts, self._embeddings, metadatas, ids)
 
-            if partition_conf:
-                timeout = partition_conf.get("timeout", 1.0)
-                # Iterate over each partition configuration
-                for part in partition_conf["partitions"]:
-                    self._client.create_partition(collection_name=name, partition_name=part["name"], timeout=timeout)
+        else:
+            raise ValueError("You must provide documents, texts, or text_embeddings along with embeddings in kwargs.")
 
     def create_from_dataframe(self,
                               name: str,
@@ -468,6 +471,7 @@ def insert(self, name: str, data: list[list] | list[dict], **kwargs: dict[str,
         """
 
         resource = self.load_resource(name)
+
         return resource.insert(data, **kwargs)
 
     def insert_dataframe(self,
diff --git a/tests/conftest.py b/tests/conftest.py
index 0a33fa7891..2fe3f923e8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -25,10 +25,13 @@
 import types
 import typing
 import warnings
+from pathlib import Path
 from unittest import mock
 
 import pytest
 import requests
+from langchain_community.vectorstores import FAISS  # added
+from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings  # added
 
 from _utils import import_or_skip
 from _utils.kafka import _init_pytest_kafka
@@ -483,7 +486,7 @@ def seed_fn(seed=42):
 
 
 @pytest.fixture(scope="function")
-def chdir_tmpdir(request: pytest.FixtureRequest, tmp_path):
+def chdir_tmpdir(request: pytest.FixtureRequest, tmp_path: Path):
     """
     Executes a test in the tmp_path directory
     """
@@ -1008,6 +1011,25 @@ def milvus_server_uri(tmp_path_factory):
             yield uri
 
 
+@pytest.fixture(scope="session")
+def faiss_test_dir():
+    # Get oath for FAISS directory
+    tmp_dir_path = os.environ.get('FAISS_DIR')
+    if tmp_dir_path is None:
+        raise ValueError("set FAISS_DIR to directory with FAISS DB")
+
+    # Can change embedding model
+    embeddings = NVIDIAEmbeddings(model="nvolveqa_40k")
+    tmp_dir = FAISS.load_local(tmp_dir_path, embeddings=embeddings, allow_dangerous_deserialization=True)
+    yield tmp_dir
+
+
+@pytest.fixture(scope="session")
+def test_embeddings():
+    embeddings = NVIDIAEmbeddings(model="nvolveqa_40k")
+    yield embeddings
+
+
 @pytest.fixture(scope="session", name="milvus_data")
 def milvus_data_fixture():
     inital_data = [{"id": i, "embedding": [i / 10.0] * 3, "age": 25 + i} for i in range(10)]
@@ -1037,6 +1059,18 @@ def nemollm_fixture(fail_missing: bool):
     yield import_or_skip("nemollm", reason=skip_reason, fail_missing=fail_missing)
 
 
+@pytest.fixture(name="nvfoundationllm", scope='session')
+def nvfoundationllm_fixture(fail_missing: bool):
+    """
+    Fixture to ensure nvfoundationllm is installed
+    """
+    skip_reason = (
+        "Tests for NVFoundation require the langchain-nvidia-ai-endpoints package to be installed, to install this run:\n"
+        "`conda env update --solver=libmamba -n morpheus "
+        "--file conda/environments/dev_cuda-121_arch-x86_64.yaml --prune`")
+    yield import_or_skip("langchain_nvidia_ai_endpoints", reason=skip_reason, fail_missing=fail_missing)
+
+
 @pytest.fixture(name="openai", scope='session')
 def openai_fixture(fail_missing: bool):
     """
diff --git a/tests/llm/services/test_nvfoundation_llm_client.py b/tests/llm/services/test_nvfoundation_llm_client.py
new file mode 100644
index 0000000000..72218d2880
--- /dev/null
+++ b/tests/llm/services/test_nvfoundation_llm_client.py
@@ -0,0 +1,142 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from unittest import mock
+
+import pytest
+from langchain_core.messages import BaseMessage
+from langchain_core.messages import ChatMessage
+from langchain_core.outputs import ChatGeneration
+from langchain_core.outputs import LLMResult
+
+from morpheus.llm.services.llm_service import LLMClient
+from morpheus.llm.services.nvfoundation_llm_service import NVFoundationLLMClient
+from morpheus.llm.services.nvfoundation_llm_service import NVFoundationLLMService
+
+
+@pytest.mark.usefixtures("restore_environ")
+@pytest.mark.parametrize("api_key", [None, "test_api_key"])
+@pytest.mark.parametrize("set_env", [True, False])
+def test_constructor(mock_nvfoundationllm: mock.MagicMock, api_key: str, set_env: bool):
+    """
+    Test that the constructor prefers explicit arguments over environment variables.
+    """
+    env_api_key = "test_env_api_key"
+
+    if set_env:
+        os.environ["NVIDIA_API_KEY"] = env_api_key
+
+    service = NVFoundationLLMService(api_key=api_key)
+
+    expected_api_key = api_key if "NVIDIA_API_KEY" not in os.environ else env_api_key
+
+    assert service.api_key == expected_api_key
+
+
+def test_get_client():
+    service = NVFoundationLLMService(api_key="test_api_key")
+    client = service.get_client(model_name="test_model")
+
+    assert isinstance(client, NVFoundationLLMClient)
+
+
+def test_model_kwargs():
+    service = NVFoundationLLMService(arg1="default_value1", arg2="default_value2")
+
+    client = service.get_client(model_name="model_name", arg2="value2")
+
+    assert client.model_kwargs["arg1"] == "default_value1"
+    assert client.model_kwargs["arg2"] == "value2"
+
+
+def test_get_input_names():
+    client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model", additional_arg="test_arg")
+
+    assert client.get_input_names() == ["prompt"]
+
+
+def test_generate():
+    with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.generate_prompt", autospec=True) as mock_nvfoundationllm:
+
+        def mock_generation_side_effect(*args, **kwargs):
+            return LLMResult(generations=[[
+                ChatGeneration(message=ChatMessage(content=x.text, role="assistant")) for x in kwargs["prompts"]
+            ]])
+
+        mock_nvfoundationllm.side_effect = mock_generation_side_effect
+
+        client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model")
+        assert client.generate(prompt="test_prompt") == "test_prompt"
+
+
+def test_generate_batch():
+
+    with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.generate_prompt", autospec=True) as mock_nvfoundationllm:
+
+        def mock_generation_side_effect(*args, **kwargs):
+            return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))]
+                                          for x in kwargs["prompts"]])
+
+        mock_nvfoundationllm.side_effect = mock_generation_side_effect
+
+        client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model")
+
+        assert client.generate_batch({'prompt': ["prompt1", "prompt2"]}) == ["prompt1", "prompt2"]
+
+
+async def test_generate_async():
+
+    with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.agenerate_prompt", autospec=True) as mock_nvfoundationllm:
+
+        def mock_generation_side_effect(*args, **kwargs):
+            return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))]
+                                          for x in kwargs["prompts"]])
+
+        mock_nvfoundationllm.side_effect = mock_generation_side_effect
+
+        client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model")
+
+        assert await client.generate_async(prompt="test_prompt") == "test_prompt"
+
+
+async def test_generate_batch_async():
+
+    with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.agenerate_prompt", autospec=True) as mock_nvfoundationllm:
+
+        def mock_generation_side_effect(*args, **kwargs):
+            return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))]
+                                          for x in kwargs["prompts"]])
+
+        mock_nvfoundationllm.side_effect = mock_generation_side_effect
+
+        client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model")
+
+        assert await client.generate_batch_async({'prompt': ["prompt1", "prompt2"]})
+
+
+async def test_generate_batch_async_error():
+    with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.agenerate_prompt", autospec=True) as mock_nvfoundationllm:
+
+        def mock_generation_side_effect(*args, **kwargs):
+            return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))]
+                                          for x in kwargs["prompts"]])
+
+        mock_nvfoundationllm.side_effect = mock_generation_side_effect
+
+        client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model")
+
+        with pytest.raises(RuntimeError, match="unittest"):
+            await client.generate_batch_async({'prompt': ["prompt1", "prompt2"]})
diff --git a/tests/test_faiss_vector_db_service.py b/tests/test_faiss_vector_db_service.py
new file mode 100644
index 0000000000..9217d5f7f8
--- /dev/null
+++ b/tests/test_faiss_vector_db_service.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pathlib import Path
+from typing import Union
+
+import numpy as np
+import pytest
+from langchain_community.docstore.in_memory import InMemoryDocstore
+from langchain_community.vectorstores import FAISS
+from langchain_core.documents import Document
+from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
+
+import cudf
+
+from morpheus.service.vdb.faiss_vdb_service import FaissVectorDBResourceService
+from morpheus.service.vdb.faiss_vdb_service import FaissVectorDBService
+
+# create FAISS docstore for testing
+texts = ["for", "the", "test"]
+embeddings = NVIDIAEmbeddings(model="nvolveqa_40k")
+ids = ["a", "b", "c"]
+create_store = FAISS.from_texts(texts, embeddings, ids=ids)
+index_name = "index"
+tmp_dir_path = "/workspace/.tmp/faiss_test_index"
+create_store.save_local(tmp_dir_path, index_name)
+faiss_service = FaissVectorDBService(local_dir=tmp_dir_path, embeddings=embeddings)
+
+
+@pytest.fixture(scope="module", name="faiss_service")
+def faiss_service_fixture(faiss_test_dir: str, test_embeddings: list):
+    # Fixture for FAISS service; can edit FAISS docstore instantiated outside fixture if need to change
+    #  embedding model, et.
+    service = FaissVectorDBService(local_dir=faiss_test_dir, embeddings=test_embeddings)
+    yield service
+
+
+def test_load_resource(faiss_service: FaissVectorDBService):
+    resource = faiss_service.load_resource(name="index")
+    assert isinstance(resource, FaissVectorDBResourceService)
+    assert resource._name == "index"
+
+
+def test_count(faiss_service: FaissVectorDBService):
+    collection = "index"
+    count = faiss_service.count(collection)
+    assert count == len(faiss_service._local_dir)
+
+
+def test_insert():
+    # Test for inserting embeddings (not docs, texts) into docsotre
+    vector = NVIDIAEmbeddings(model="nvolveqa_40k").embed_query("hi")
+    test_data = list(iter([("hi", vector)]))
+    docstore_name = "index"
+    response = faiss_service.insert(name=docstore_name, data=test_data)
+    assert response == {"status": "success"}
+
+
+def test_delete():
+    # specify name of docstore and ID to delete
+    docstore_name = "index"
+    delete_id = "a"
+    response_delete = faiss_service.delete(name=docstore_name, expr=delete_id)
+    assert response_delete == {"status": "success"}
+
+
+async def test_similarity_search():
+    index_to_id = create_store.index_to_docstore_id
+    in_mem_docstore = InMemoryDocstore({
+        index_to_id[0]: Document(page_content="for"),
+        index_to_id[1]: Document(page_content="the"),
+        index_to_id[2]: Document(page_content="test"),
+    })
+
+    assert create_store.docstore.__dict__ == in_mem_docstore.__dict__
+
+    query_vec = await embeddings.aembed_query(text="for")
+    output = await create_store.asimilarity_search_by_vector(query_vec, k=1)
+
+    assert output == [Document(page_content="for")]
+
+
+def test_has_store_object():
+    # create FAISS docstore to test with
+    object_store = FAISS.from_texts(texts, embeddings, ids=ids)
+    object_name = "store_object_index"
+    object_store.save_local(tmp_dir_path, object_name)
+
+    # attempt to load docstore with given index name
+    load_attempt = faiss_service.has_store_object(object_name)
+    assert load_attempt is True
+
+    # attempt to load docstore with wrong index name
+    object_name = "wrong_index_name"
+    load_attempt = faiss_service.has_store_object(object_name)
+    assert load_attempt is False
+
+
+def test_create():
+    # Test creating docstore from embeddings
+    vector = NVIDIAEmbeddings(model="nvolveqa_40k").embed_query("hi")
+    test_embedding = list(iter([("hi", vector)]))
+    docstore_name = "index"
+    embeddings_docstore = faiss_service.create(name=docstore_name, text_embeddings=test_embedding)
+
+    # save created docstore
+    index_name_embeddings = "embeddings_index"
+    embeddings_docstore.save_local(tmp_dir_path, index_name_embeddings)
+
+    # attempt to load created docstore
+    load_attempt = faiss_service.has_store_object(index_name_embeddings)
+
+    assert load_attempt is True
+
+    # Test creating docstore from texts
+    test_texts = ["for", "the", "test"]
+    texts_docstore = faiss_service.create(name=docstore_name, texts=test_texts)
+
+    # save created docstore
+    index_name_texts = "texts_index"
+    texts_docstore.save_local(tmp_dir_path, index_name_texts)
+
+    # attempt to load created docstore
+    load_attempt = faiss_service.has_store_object(index_name_texts)
+
+    assert load_attempt is True
+
+    # Test creating docstore from documents
+    test_documents = [Document(page_content="This is for the test.")]
+    docs_docstore = faiss_service.create(name=docstore_name, documents=test_documents)
+
+    # save created docstore
+    index_name_docs = "docs_index"
+    docs_docstore.save_local(tmp_dir_path, index_name_docs)
+
+    # attempt to load created docstore
+    load_attempt = faiss_service.has_store_object(index_name_docs)
+
+    assert load_attempt is True

From 1f1db6cacbd3102940e60f6ed7f5ddfadac23bac Mon Sep 17 00:00:00 2001
From: Ana Caklovic <acaklovic@nvidia.com>
Date: Wed, 22 May 2024 19:55:09 +0000
Subject: [PATCH 02/12] faiss fixes

---
 .../llm/services/nvfoundation_llm_service.py  | 27 +++++-
 morpheus/service/vdb/faiss_vdb_service.py     | 83 ++++---------------
 tests/_utils/faiss.py                         | 28 +++++++
 tests/conftest.py                             | 20 ++---
 .../services/test_nvfoundation_llm_client.py  | 14 ++--
 tests/test_faiss_vector_db_service.py         | 66 ++++++++-------
 6 files changed, 119 insertions(+), 119 deletions(-)
 create mode 100644 tests/_utils/faiss.py

diff --git a/morpheus/llm/services/nvfoundation_llm_service.py b/morpheus/llm/services/nvfoundation_llm_service.py
index d808c6f43d..63b8a10643 100644
--- a/morpheus/llm/services/nvfoundation_llm_service.py
+++ b/morpheus/llm/services/nvfoundation_llm_service.py
@@ -14,6 +14,7 @@
 
 import logging
 import os
+import typing
 
 from morpheus.llm.services.llm_service import LLMClient
 from morpheus.llm.services.llm_service import LLMService
@@ -22,7 +23,8 @@
 
 IMPORT_EXCEPTION = None
 IMPORT_ERROR_MESSAGE = (
-    "The `langchain-nvidia-ai-endpoints` package was not found. Install it and other additional dependencies by running the following command:\n"
+    "The `langchain-nvidia-ai-endpoints` package was not found. Install it and other additional dependencies by "
+    "running the following command:"
     "`conda env update --solver=libmamba -n morpheus "
     "--file morpheus/conda/environments/dev_cuda-121_arch-x86_64.yaml --prune`")
 
@@ -77,8 +79,6 @@ def __init__(self, parent: "NVFoundationLLMService", *, model_name: str, **model
         self._client = ChatNVIDIA(**{**chat_kwargs, **model_kwargs})  # type: ignore
 
     def get_input_names(self) -> list[str]:
-        schema = self._client.get_input_schema()
-
         return [self._prompt_key]
 
     def generate(self, **input_dict) -> str:
@@ -118,6 +118,8 @@ def generate_batch(self, inputs: dict[str, list], **kwargs) -> list[str]:
         ----------
         inputs : dict
             Inputs containing prompt data.
+        **kwargs : dict
+        Additional keyword arguments for generate batch.
         """
         prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]]
 
@@ -127,13 +129,30 @@ def generate_batch(self, inputs: dict[str, list], **kwargs) -> list[str]:
 
         return [g[0].text for g in responses.generations]
 
-    async def generate_batch_async(self, inputs: dict[str, list], **kwargs) -> list[str]:
+    @typing.overload
+    async def generate_batch_async(self,
+                                   inputs: dict[str, list],
+                                   return_exceptions: typing.Literal[True] = True) -> list[str | BaseException]:
+        ...
+
+    @typing.overload
+    async def generate_batch_async(self,
+                                   inputs: dict[str, list],
+                                   return_exceptions: typing.Literal[False] = False) -> list[str]:
+        ...
+
+    async def generate_batch_async(self,
+                                   inputs: dict[str, list],
+                                   return_exceptions=False) -> list[str] | list[str | BaseException]:
         """
         Issue an asynchronous request to generate a list of responses based on a list of prompts.
+
         Parameters
         ----------
         inputs : dict
             Inputs containing prompt data.
+        return_exceptions : bool
+            Whether to return exceptions in the output list or raise them immediately.
         """
 
         prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]]
diff --git a/morpheus/service/vdb/faiss_vdb_service.py b/morpheus/service/vdb/faiss_vdb_service.py
index 30f6d7d766..a23d254819 100644
--- a/morpheus/service/vdb/faiss_vdb_service.py
+++ b/morpheus/service/vdb/faiss_vdb_service.py
@@ -13,18 +13,11 @@
 # limitations under the License.
 
 import asyncio
-import copy
-import json
 import logging
-import threading
 import time
 import typing
-from functools import wraps
 
-import numpy as np
 import pandas as pd
-from langchain.docstore.document import Document
-from langchain_community.vectorstores import FAISS
 
 import cudf
 
@@ -34,7 +27,7 @@
 logger = logging.getLogger(__name__)
 
 IMPORT_EXCEPTION = None
-IMPORT_ERROR_MESSAGE = "MilvusVectorDBResourceService requires the milvus and pymilvus packages to be installed."
+IMPORT_ERROR_MESSAGE = "FaissDBResourceService requires the FAISS."
 
 try:
     from langchain.vectorstores.faiss import FAISS
@@ -86,7 +79,6 @@ def insert(self, data: list[list] | list[dict], **kwargs: dict[str, typing.Any])
         """
         self._index.add_embeddings(data)
         return {"status": "success"}
-        #return list_of_ids
 
     def insert_dataframe(self, df: typing.Union[cudf.DataFrame, pd.DataFrame], **kwargs: dict[str, typing.Any]) -> dict:
         """
@@ -154,7 +146,8 @@ async def similarity_search(self,
                                 k: int = 4,
                                 **kwargs: dict[str, typing.Any]) -> list[list[dict]]:
         """
-        Perform a similarity search within the FAISS docstore (asimilarity_search_by_vector returns docs most similar to embedding vector asynchronously).
+        Perform a similarity search within the FAISS docstore (asimilarity_search_by_vector
+        returns docs most similar to embedding vector asynchronously).
 
         Parameters
         ----------
@@ -305,7 +298,7 @@ class FaissVectorDBService(VectorDBService):
     _cleanup_interval = 600  # 10mins
     _last_cleanup_time = time.time()
 
-    def __init__(self, local_dir: str, embeddings, **kwargs: dict[str, typing.Any]):
+    def __init__(self, local_dir: str, embeddings):
 
         if IMPORT_EXCEPTION is not None:
             raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION
@@ -319,7 +312,8 @@ def load_resource(self, name: str = "index", **kwargs: dict[str, typing.Any]) ->
 
     def has_store_object(self, name: str) -> bool:
         """
-        Check if specific index file name exists by attempting to load FAISS index, docstore, and index_to_docstore_id from disk with the index file name.
+        Check if specific index file name exists by attempting to load FAISS index, docstore,
+        and index_to_docstore_id from disk with the index file name.
 
         Parameters
         ----------
@@ -351,13 +345,7 @@ def list_store_objects(self, **kwargs: dict[str, typing.Any]) -> list[str]:
         list[str]
             A list of collection names.
         """
-        return self._client.list_collections(**kwargs)
-
-    def _create_schema_field(self, field_conf: dict) -> "pymilvus.FieldSchema":
-
-        field_schema = pymilvus.FieldSchema.construct_from_dict(field_conf)
-
-        return field_schema
+        raise NotImplementedError("Drop operation is not supported in FAISS")
 
     def create(self, name: str, overwrite: bool = False, **kwargs: dict[str, typing.Any]):
         """
@@ -387,20 +375,19 @@ def create(self, name: str, overwrite: bool = False, **kwargs: dict[str, typing.
             documents = kwargs["documents"]
             return resource._index.from_documents(documents, self._embeddings)
 
-        elif "text_embeddings" in kwargs:
+        if "text_embeddings" in kwargs:
             text_embeddings = kwargs["text_embeddings"]
             metadatas = kwargs.get("metadatas")
             ids = kwargs.get("ids")
             return resource._index.from_embeddings(text_embeddings, self._embeddings, metadatas, ids)
 
-        elif "texts" in kwargs:
+        if "texts" in kwargs:
             texts = kwargs["texts"]
             metadatas = kwargs.get("metadatas")
             ids = kwargs.get("ids")
             return resource._index.from_texts(texts, self._embeddings, metadatas, ids)
 
-        else:
-            raise ValueError("You must provide documents, texts, or text_embeddings along with embeddings in kwargs.")
+        raise ValueError("You must provide documents, texts, or text_embeddings along with embeddings in kwargs.")
 
     def create_from_dataframe(self,
                               name: str,
@@ -422,28 +409,7 @@ def create_from_dataframe(self,
             Extra keyword arguments specific to the vector database implementation.
         """
 
-        fields = self._build_schema_conf(df=df)
-
-        create_kwargs = {
-            "schema_conf": {
-                "description": "Auto generated schema from DataFrame in Morpheus",
-                "schema_fields": fields,
-            }
-        }
-
-        if (kwargs.get("index_field", None) is not None):
-            # Check to make sure the column name exists in the fields
-            create_kwargs["index_conf"] = {
-                "field_name": kwargs.get("index_field"),  # Default index type
-                "metric_type": "L2",
-                "index_type": "HNSW",
-                "params": {
-                    "M": 8,
-                    "efConstruction": 64,
-                },
-            }
-
-        self.create(name=name, overwrite=overwrite, **create_kwargs)
+        raise NotImplementedError("Describe operation is not supported in FAISS")
 
     def insert(self, name: str, data: list[list] | list[dict], **kwargs: dict[str,
                                                                               typing.Any]) -> dict[str, typing.Any]:
@@ -703,28 +669,7 @@ def drop(self, name: str, **kwargs: dict[str, typing.Any]) -> None:
             If mandatory arguments are missing or if the provided 'collection' value is invalid.
         """
 
-        logger.debug("Dropping collection: %s, kwargs=%s", name, kwargs)
-
-        if self.has_store_object(name):
-            resource = kwargs.get("resource", "collection")
-            if resource == "collection":
-                self._client.drop_collection(collection_name=name)
-            elif resource == "partition":
-                if "partition_name" not in kwargs:
-                    raise ValueError("Mandatory argument 'partition_name' is required when resource='partition'")
-                partition_name = kwargs["partition_name"]
-                if self._client.has_partition(collection_name=name, partition_name=partition_name):
-                    # Collection need to be released before dropping the partition.
-                    self._client.release_collection(collection_name=name)
-                    self._client.drop_partition(collection_name=name, partition_name=partition_name)
-            elif resource == "index":
-                if "field_name" in kwargs and "index_name" in kwargs:
-                    self._client.drop_index(collection_name=name,
-                                            field_name=kwargs["field_name"],
-                                            index_name=kwargs["index_name"])
-                else:
-                    raise ValueError(
-                        "Mandatory arguments 'field_name' and 'index_name' are required when resource='index'")
+        raise NotImplementedError("Describe operation is not supported in FAISS")
 
     def describe(self, name: str, **kwargs: dict[str, typing.Any]) -> dict:
         """
@@ -757,7 +702,7 @@ def release_resource(self, name: str) -> None:
             Name of the collection to release.
         """
 
-        self._client.release_collection(collection_name=name)
+        raise NotImplementedError("Describe operation is not supported in FAISS")
 
     def close(self) -> None:
         """
@@ -766,4 +711,4 @@ def close(self) -> None:
         This method disconnects from the Milvus vector database by removing the connection.
 
         """
-        self._client.close()
+        raise NotImplementedError("Describe operation is not supported in FAISS")
diff --git a/tests/_utils/faiss.py b/tests/_utils/faiss.py
new file mode 100644
index 0000000000..6e8d1d3bbf
--- /dev/null
+++ b/tests/_utils/faiss.py
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utilities for testing Morpheus with FAISS"""
+from typing import List
+
+
+class FakeEmbedder:
+
+    def embed_documents(self, data: list) -> List[List[float]]:
+        return [[float(3.1)] * 1023 + [float(i)] for i in range(len(data))]
+
+    def embed_query(self, data: str) -> List[float]:
+        return [float(1.0)] * 1023 + [float(0.0)]
+
+    async def aembed_query(self, data: str) -> List[float]:
+        return [float(1.0)] * 1023 + [float(0.0)]
diff --git a/tests/conftest.py b/tests/conftest.py
index 2fe3f923e8..b4ac6ca95c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1011,22 +1011,22 @@ def milvus_server_uri(tmp_path_factory):
             yield uri
 
 
+from _utils.faiss import FakeEmbedder
+
+
 @pytest.fixture(scope="session")
 def faiss_test_dir():
-    # Get oath for FAISS directory
+    # Get path for FAISS directory
     tmp_dir_path = os.environ.get('FAISS_DIR')
     if tmp_dir_path is None:
         raise ValueError("set FAISS_DIR to directory with FAISS DB")
-
-    # Can change embedding model
-    embeddings = NVIDIAEmbeddings(model="nvolveqa_40k")
-    tmp_dir = FAISS.load_local(tmp_dir_path, embeddings=embeddings, allow_dangerous_deserialization=True)
-    yield tmp_dir
+    yield tmp_dir_path
 
 
 @pytest.fixture(scope="session")
-def test_embeddings():
-    embeddings = NVIDIAEmbeddings(model="nvolveqa_40k")
+def faiss_test_embeddings():
+    #embeddings = NVIDIAEmbeddings(model="nvolveqa_40k")
+    embeddings = FakeEmbedder()
     yield embeddings
 
 
@@ -1065,8 +1065,8 @@ def nvfoundationllm_fixture(fail_missing: bool):
     Fixture to ensure nvfoundationllm is installed
     """
     skip_reason = (
-        "Tests for NVFoundation require the langchain-nvidia-ai-endpoints package to be installed, to install this run:\n"
-        "`conda env update --solver=libmamba -n morpheus "
+        "Tests for NVFoundation require the langchain-nvidia-ai-endpoints package to be installed, to install this "
+        "run:\n `conda env update --solver=libmamba -n morpheus "
         "--file conda/environments/dev_cuda-121_arch-x86_64.yaml --prune`")
     yield import_or_skip("langchain_nvidia_ai_endpoints", reason=skip_reason, fail_missing=fail_missing)
 
diff --git a/tests/llm/services/test_nvfoundation_llm_client.py b/tests/llm/services/test_nvfoundation_llm_client.py
index 72218d2880..dc02c1836a 100644
--- a/tests/llm/services/test_nvfoundation_llm_client.py
+++ b/tests/llm/services/test_nvfoundation_llm_client.py
@@ -17,12 +17,10 @@
 from unittest import mock
 
 import pytest
-from langchain_core.messages import BaseMessage
 from langchain_core.messages import ChatMessage
 from langchain_core.outputs import ChatGeneration
 from langchain_core.outputs import LLMResult
 
-from morpheus.llm.services.llm_service import LLMClient
 from morpheus.llm.services.nvfoundation_llm_service import NVFoundationLLMClient
 from morpheus.llm.services.nvfoundation_llm_service import NVFoundationLLMService
 
@@ -30,7 +28,7 @@
 @pytest.mark.usefixtures("restore_environ")
 @pytest.mark.parametrize("api_key", [None, "test_api_key"])
 @pytest.mark.parametrize("set_env", [True, False])
-def test_constructor(mock_nvfoundationllm: mock.MagicMock, api_key: str, set_env: bool):
+def test_constructor(api_key: str, set_env: bool):
     """
     Test that the constructor prefers explicit arguments over environment variables.
     """
@@ -71,7 +69,7 @@ def test_get_input_names():
 def test_generate():
     with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.generate_prompt", autospec=True) as mock_nvfoundationllm:
 
-        def mock_generation_side_effect(*args, **kwargs):
+        def mock_generation_side_effect(*_, **kwargs):
             return LLMResult(generations=[[
                 ChatGeneration(message=ChatMessage(content=x.text, role="assistant")) for x in kwargs["prompts"]
             ]])
@@ -86,7 +84,7 @@ def test_generate_batch():
 
     with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.generate_prompt", autospec=True) as mock_nvfoundationllm:
 
-        def mock_generation_side_effect(*args, **kwargs):
+        def mock_generation_side_effect(*_, **kwargs):
             return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))]
                                           for x in kwargs["prompts"]])
 
@@ -101,7 +99,7 @@ async def test_generate_async():
 
     with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.agenerate_prompt", autospec=True) as mock_nvfoundationllm:
 
-        def mock_generation_side_effect(*args, **kwargs):
+        def mock_generation_side_effect(*_, **kwargs):
             return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))]
                                           for x in kwargs["prompts"]])
 
@@ -116,7 +114,7 @@ async def test_generate_batch_async():
 
     with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.agenerate_prompt", autospec=True) as mock_nvfoundationllm:
 
-        def mock_generation_side_effect(*args, **kwargs):
+        def mock_generation_side_effect(*_, **kwargs):
             return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))]
                                           for x in kwargs["prompts"]])
 
@@ -130,7 +128,7 @@ def mock_generation_side_effect(*args, **kwargs):
 async def test_generate_batch_async_error():
     with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.agenerate_prompt", autospec=True) as mock_nvfoundationllm:
 
-        def mock_generation_side_effect(*args, **kwargs):
+        def mock_generation_side_effect(*_, **kwargs):
             return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))]
                                           for x in kwargs["prompts"]])
 
diff --git a/tests/test_faiss_vector_db_service.py b/tests/test_faiss_vector_db_service.py
index 9217d5f7f8..f140ecfec0 100644
--- a/tests/test_faiss_vector_db_service.py
+++ b/tests/test_faiss_vector_db_service.py
@@ -14,62 +14,72 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from pathlib import Path
-from typing import Union
-
-import numpy as np
 import pytest
 from langchain_community.docstore.in_memory import InMemoryDocstore
 from langchain_community.vectorstores import FAISS
 from langchain_core.documents import Document
 from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
 
-import cudf
-
+from _utils.faiss import FakeEmbedder
 from morpheus.service.vdb.faiss_vdb_service import FaissVectorDBResourceService
 from morpheus.service.vdb.faiss_vdb_service import FaissVectorDBService
 
 # create FAISS docstore for testing
 texts = ["for", "the", "test"]
-embeddings = NVIDIAEmbeddings(model="nvolveqa_40k")
+embeddings = FakeEmbedder()
 ids = ["a", "b", "c"]
 create_store = FAISS.from_texts(texts, embeddings, ids=ids)
-index_name = "index"
-tmp_dir_path = "/workspace/.tmp/faiss_test_index"
-create_store.save_local(tmp_dir_path, index_name)
-faiss_service = FaissVectorDBService(local_dir=tmp_dir_path, embeddings=embeddings)
+INDEX_NAME = "index"
+TMP_DIR_PATH = "/workspace/.tmp/faiss_test_index"
+create_store.save_local(TMP_DIR_PATH, INDEX_NAME)
+
+
+def test_dir_path():
+    import os
 
+    from _utils.faiss import FakeEmbedder
 
-@pytest.fixture(scope="module", name="faiss_service")
-def faiss_service_fixture(faiss_test_dir: str, test_embeddings: list):
+    tmp_dir_path = os.environ.get('FAISS_DIR')
+    if tmp_dir_path is None:
+        raise ValueError("set FAISS_DIR to directory with FAISS DB")
+
+    # Can change embedding model
+    embeddings = FakeEmbedder()
+    tmp_dir = FAISS.load_local(tmp_dir_path, embeddings=embeddings, allow_dangerous_deserialization=True)
+    return tmp_dir
+
+
+# scope = function
+@pytest.fixture(scope="function", name="faiss_service")
+def faiss_service_fixture(faiss_test_dir: str, faiss_test_embeddings: list):
     # Fixture for FAISS service; can edit FAISS docstore instantiated outside fixture if need to change
     #  embedding model, et.
-    service = FaissVectorDBService(local_dir=faiss_test_dir, embeddings=test_embeddings)
+    service = FaissVectorDBService(local_dir=faiss_test_dir, embeddings=faiss_test_embeddings)
     yield service
 
 
 def test_load_resource(faiss_service: FaissVectorDBService):
-    resource = faiss_service.load_resource(name="index")
+    resource = faiss_service.load_resource()
     assert isinstance(resource, FaissVectorDBResourceService)
     assert resource._name == "index"
 
 
 def test_count(faiss_service: FaissVectorDBService):
-    collection = "index"
-    count = faiss_service.count(collection)
+    docstore = "index"
+    count = faiss_service.count(docstore)
     assert count == len(faiss_service._local_dir)
 
 
-def test_insert():
-    # Test for inserting embeddings (not docs, texts) into docsotre
-    vector = NVIDIAEmbeddings(model="nvolveqa_40k").embed_query("hi")
+def test_insert(faiss_service: FaissVectorDBService):
+    # Test for inserting embeddings (not docs, texts) into docstore
+    vector = FakeEmbedder().embed_query(data="hi")
     test_data = list(iter([("hi", vector)]))
     docstore_name = "index"
     response = faiss_service.insert(name=docstore_name, data=test_data)
     assert response == {"status": "success"}
 
 
-def test_delete():
+def test_delete(faiss_service: FaissVectorDBService):
     # specify name of docstore and ID to delete
     docstore_name = "index"
     delete_id = "a"
@@ -87,17 +97,17 @@ async def test_similarity_search():
 
     assert create_store.docstore.__dict__ == in_mem_docstore.__dict__
 
-    query_vec = await embeddings.aembed_query(text="for")
+    query_vec = await embeddings.aembed_query("for")
     output = await create_store.asimilarity_search_by_vector(query_vec, k=1)
 
     assert output == [Document(page_content="for")]
 
 
-def test_has_store_object():
+def test_has_store_object(faiss_service: FaissVectorDBService):
     # create FAISS docstore to test with
     object_store = FAISS.from_texts(texts, embeddings, ids=ids)
     object_name = "store_object_index"
-    object_store.save_local(tmp_dir_path, object_name)
+    object_store.save_local(TMP_DIR_PATH, object_name)
 
     # attempt to load docstore with given index name
     load_attempt = faiss_service.has_store_object(object_name)
@@ -109,7 +119,7 @@ def test_has_store_object():
     assert load_attempt is False
 
 
-def test_create():
+def test_create(faiss_service: FaissVectorDBService):
     # Test creating docstore from embeddings
     vector = NVIDIAEmbeddings(model="nvolveqa_40k").embed_query("hi")
     test_embedding = list(iter([("hi", vector)]))
@@ -118,7 +128,7 @@ def test_create():
 
     # save created docstore
     index_name_embeddings = "embeddings_index"
-    embeddings_docstore.save_local(tmp_dir_path, index_name_embeddings)
+    embeddings_docstore.save_local(TMP_DIR_PATH, index_name_embeddings)
 
     # attempt to load created docstore
     load_attempt = faiss_service.has_store_object(index_name_embeddings)
@@ -131,7 +141,7 @@ def test_create():
 
     # save created docstore
     index_name_texts = "texts_index"
-    texts_docstore.save_local(tmp_dir_path, index_name_texts)
+    texts_docstore.save_local(TMP_DIR_PATH, index_name_texts)
 
     # attempt to load created docstore
     load_attempt = faiss_service.has_store_object(index_name_texts)
@@ -144,7 +154,7 @@ def test_create():
 
     # save created docstore
     index_name_docs = "docs_index"
-    docs_docstore.save_local(tmp_dir_path, index_name_docs)
+    docs_docstore.save_local(TMP_DIR_PATH, index_name_docs)
 
     # attempt to load created docstore
     load_attempt = faiss_service.has_store_object(index_name_docs)

From 53df8a05558eafcdb486601b3f781b2b0b3ce44b Mon Sep 17 00:00:00 2001
From: Ana Caklovic <acaklovic@nvidia.com>
Date: Fri, 24 May 2024 04:35:44 +0000
Subject: [PATCH 03/12] nvfoundation fixes

---
 dependencies.yaml                             |  3 ++
 .../llm/services/nvfoundation_llm_service.py  | 49 +++++++++++++++----
 tests/_utils/faiss.py                         | 15 ++++--
 tests/conftest.py                             |  7 +--
 tests/test_faiss_vector_db_service.py         | 19 +------
 5 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/dependencies.yaml b/dependencies.yaml
index 616c1db3de..c8918d45fe 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -275,6 +275,9 @@ dependencies:
             - databricks-connect
             - milvus==2.3.5 # update to match pymilvus when available
             - pymilvus==2.3.6
+            - langchain-nvidia-ai-endpoints
+            - langchain-community
+            - faiss-gpu
 
   test_python_morpheus:
     common:
diff --git a/morpheus/llm/services/nvfoundation_llm_service.py b/morpheus/llm/services/nvfoundation_llm_service.py
index 63b8a10643..a4ddd99724 100644
--- a/morpheus/llm/services/nvfoundation_llm_service.py
+++ b/morpheus/llm/services/nvfoundation_llm_service.py
@@ -81,6 +81,10 @@ def __init__(self, parent: "NVFoundationLLMService", *, model_name: str, **model
     def get_input_names(self) -> list[str]:
         return [self._prompt_key]
 
+    @property
+    def model_kwargs(self):
+        return self._model_kwargs
+
     def generate(self, **input_dict) -> str:
         """
         Issue a request to generate a response based on a given prompt.
@@ -111,23 +115,36 @@ async def generate_async(self, **input_dict) -> str:
 
         return (await self.generate_batch_async(inputs=inputs, **input_dict))[0]
 
-    def generate_batch(self, inputs: dict[str, list], **kwargs) -> list[str]:
+    def generate_batch(self,
+                       inputs: dict[str, list],
+                       return_exceptions: typing.Literal[True] = True,
+                       **kwargs) -> list[str] | list[str | BaseException]:
         """
         Issue a request to generate a list of responses based on a list of prompts.
         Parameters
         ----------
         inputs : dict
             Inputs containing prompt data.
+        return_exceptions : bool
+            Whether to return exceptions in the output list or raise them immediately.
         **kwargs : dict
         Additional keyword arguments for generate batch.
         """
-        prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]]
 
+        prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]]
         final_kwargs = {**self._model_kwargs, **kwargs}
 
-        responses = self._client.generate_prompt(prompts=prompts, **final_kwargs)  # type: ignore
+        responses = []
+        try:
+            generated_responses = self._client.generate_prompt(prompts=prompts, **final_kwargs)  # type: ignore
+            responses = [g[0].text for g in generated_responses.generations]
+        except Exception as e:
+            if return_exceptions:
+                responses.append(e)
+            else:
+                raise e
 
-        return [g[0].text for g in responses.generations]
+        return responses
 
     @typing.overload
     async def generate_batch_async(self,
@@ -143,7 +160,8 @@ async def generate_batch_async(self,
 
     async def generate_batch_async(self,
                                    inputs: dict[str, list],
-                                   return_exceptions=False) -> list[str] | list[str | BaseException]:
+                                   return_exceptions=False,
+                                   **kwargs) -> list[str] | list[str | BaseException]:
         """
         Issue an asynchronous request to generate a list of responses based on a list of prompts.
 
@@ -153,15 +171,28 @@ async def generate_batch_async(self,
             Inputs containing prompt data.
         return_exceptions : bool
             Whether to return exceptions in the output list or raise them immediately.
+        **kwargs: dict
+        Additional keyword arguments for generate batch async.
         """
 
-        prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]]
+        # prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]]
 
-        final_kwargs = {**self._model_kwargs, **kwargs}
+        # final_kwargs = {**self._model_kwargs, **kwargs}
+
+        # responses = await self._client.agenerate_prompt(prompts=prompts, **final_kwargs)  # type: ignore
 
-        responses = await self._client.agenerate_prompt(prompts=prompts, **final_kwargs)  # type: ignore
+        # return [g[0].text for g in responses.generations]
+
+        prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]]
+        final_kwargs = {**self._model_kwargs, **kwargs}
 
-        return [g[0].text for g in responses.generations]
+        try:
+            responses = await self._client.agenerate_prompt(prompts=prompts, **final_kwargs)  # type: ignore
+            return [g[0].text for g in responses.generations]
+        except Exception as e:
+            if return_exceptions:
+                return [e]
+            raise e
 
 
 class NVFoundationLLMService(LLMService):
diff --git a/tests/_utils/faiss.py b/tests/_utils/faiss.py
index 6e8d1d3bbf..d8060477dc 100644
--- a/tests/_utils/faiss.py
+++ b/tests/_utils/faiss.py
@@ -18,11 +18,18 @@
 
 class FakeEmbedder:
 
+    def embed_query(self, data: str) -> List[float]:
+        # setting data to arbitrary float since constant value will always be returned
+        data = 0.0
+        return [float(1.0)] * 1023 + [float(0.0) * data]
+
     def embed_documents(self, data: list) -> List[List[float]]:
         return [[float(3.1)] * 1023 + [float(i)] for i in range(len(data))]
 
-    def embed_query(self, data: str) -> List[float]:
-        return [float(1.0)] * 1023 + [float(0.0)]
-
     async def aembed_query(self, data: str) -> List[float]:
-        return [float(1.0)] * 1023 + [float(0.0)]
+        # setting data to arbitrary float since constant value will always be returned
+        data = 0.0
+        return [float(1.0)] * 1023 + [float(0.0) * data]
+
+    async def aembed_documents(self, data: list) -> List[List[float]]:
+        return [[float(3.1)] * 1023 + [float(i)] for i in range(len(data))]
diff --git a/tests/conftest.py b/tests/conftest.py
index b4ac6ca95c..4de60687df 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -30,10 +30,9 @@
 
 import pytest
 import requests
-from langchain_community.vectorstores import FAISS  # added
-from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings  # added
 
 from _utils import import_or_skip
+from _utils.faiss import FakeEmbedder
 from _utils.kafka import _init_pytest_kafka
 from _utils.kafka import kafka_bootstrap_servers_fixture  # noqa: F401 pylint:disable=unused-import
 from _utils.kafka import kafka_consumer_fixture  # noqa: F401 pylint:disable=unused-import
@@ -1011,9 +1010,6 @@ def milvus_server_uri(tmp_path_factory):
             yield uri
 
 
-from _utils.faiss import FakeEmbedder
-
-
 @pytest.fixture(scope="session")
 def faiss_test_dir():
     # Get path for FAISS directory
@@ -1025,7 +1021,6 @@ def faiss_test_dir():
 
 @pytest.fixture(scope="session")
 def faiss_test_embeddings():
-    #embeddings = NVIDIAEmbeddings(model="nvolveqa_40k")
     embeddings = FakeEmbedder()
     yield embeddings
 
diff --git a/tests/test_faiss_vector_db_service.py b/tests/test_faiss_vector_db_service.py
index f140ecfec0..4f229cc1de 100644
--- a/tests/test_faiss_vector_db_service.py
+++ b/tests/test_faiss_vector_db_service.py
@@ -18,7 +18,6 @@
 from langchain_community.docstore.in_memory import InMemoryDocstore
 from langchain_community.vectorstores import FAISS
 from langchain_core.documents import Document
-from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
 
 from _utils.faiss import FakeEmbedder
 from morpheus.service.vdb.faiss_vdb_service import FaissVectorDBResourceService
@@ -34,22 +33,6 @@
 create_store.save_local(TMP_DIR_PATH, INDEX_NAME)
 
 
-def test_dir_path():
-    import os
-
-    from _utils.faiss import FakeEmbedder
-
-    tmp_dir_path = os.environ.get('FAISS_DIR')
-    if tmp_dir_path is None:
-        raise ValueError("set FAISS_DIR to directory with FAISS DB")
-
-    # Can change embedding model
-    embeddings = FakeEmbedder()
-    tmp_dir = FAISS.load_local(tmp_dir_path, embeddings=embeddings, allow_dangerous_deserialization=True)
-    return tmp_dir
-
-
-# scope = function
 @pytest.fixture(scope="function", name="faiss_service")
 def faiss_service_fixture(faiss_test_dir: str, faiss_test_embeddings: list):
     # Fixture for FAISS service; can edit FAISS docstore instantiated outside fixture if need to change
@@ -121,7 +104,7 @@ def test_has_store_object(faiss_service: FaissVectorDBService):
 
 def test_create(faiss_service: FaissVectorDBService):
     # Test creating docstore from embeddings
-    vector = NVIDIAEmbeddings(model="nvolveqa_40k").embed_query("hi")
+    vector = FakeEmbedder().embed_query(data="hi")
     test_embedding = list(iter([("hi", vector)]))
     docstore_name = "index"
     embeddings_docstore = faiss_service.create(name=docstore_name, text_embeddings=test_embedding)

From b7ef6546cc54c6cea0e8f53de2bc2cb509658910 Mon Sep 17 00:00:00 2001
From: Ana Caklovic <acaklovic@nvidia.com>
Date: Fri, 24 May 2024 16:13:49 +0000
Subject: [PATCH 04/12] final fixes

---
 .../src/stages/inference_client_stage.cpp     |  1 +
 .../llm/services/nvfoundation_llm_service.py  | 22 ++++++++-----------
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/morpheus/_lib/src/stages/inference_client_stage.cpp b/morpheus/_lib/src/stages/inference_client_stage.cpp
index 069ccd557e..1366d5691c 100644
--- a/morpheus/_lib/src/stages/inference_client_stage.cpp
+++ b/morpheus/_lib/src/stages/inference_client_stage.cpp
@@ -30,6 +30,7 @@
 #include <cuda_runtime.h>
 #include <glog/logging.h>
 #include <mrc/cuda/common.hpp>
+#include <rxcpp/rx.hpp>
 
 #include <chrono>
 #include <compare>
diff --git a/morpheus/llm/services/nvfoundation_llm_service.py b/morpheus/llm/services/nvfoundation_llm_service.py
index a4ddd99724..d151961c19 100644
--- a/morpheus/llm/services/nvfoundation_llm_service.py
+++ b/morpheus/llm/services/nvfoundation_llm_service.py
@@ -160,7 +160,7 @@ async def generate_batch_async(self,
 
     async def generate_batch_async(self,
                                    inputs: dict[str, list],
-                                   return_exceptions=False,
+                                   return_exceptions: typing.Literal[True] = True,
                                    **kwargs) -> list[str] | list[str | BaseException]:
         """
         Issue an asynchronous request to generate a list of responses based on a list of prompts.
@@ -175,24 +175,20 @@ async def generate_batch_async(self,
         Additional keyword arguments for generate batch async.
         """
 
-        # prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]]
-
-        # final_kwargs = {**self._model_kwargs, **kwargs}
-
-        # responses = await self._client.agenerate_prompt(prompts=prompts, **final_kwargs)  # type: ignore
-
-        # return [g[0].text for g in responses.generations]
-
         prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]]
         final_kwargs = {**self._model_kwargs, **kwargs}
 
+        responses = []
         try:
-            responses = await self._client.agenerate_prompt(prompts=prompts, **final_kwargs)  # type: ignore
-            return [g[0].text for g in responses.generations]
+            generated_responses = await self._client.agenerate_prompt(prompts=prompts, **final_kwargs)  # type: ignore
+            responses = [g[0].text for g in generated_responses.generations]
         except Exception as e:
             if return_exceptions:
-                return [e]
-            raise e
+                responses.append(e)
+            else:
+                raise e
+
+        return responses
 
 
 class NVFoundationLLMService(LLMService):

From 889de2b04f03322fb630f3289d3a8249f5eb1696 Mon Sep 17 00:00:00 2001
From: Michael Demoret <mdemoret@nvidia.com>
Date: Fri, 24 May 2024 14:04:23 -0400
Subject: [PATCH 05/12] Making the API key work the same as base URL

---
 morpheus/llm/services/nvfoundation_llm_service.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/morpheus/llm/services/nvfoundation_llm_service.py b/morpheus/llm/services/nvfoundation_llm_service.py
index b38d468ed7..0915975aba 100644
--- a/morpheus/llm/services/nvfoundation_llm_service.py
+++ b/morpheus/llm/services/nvfoundation_llm_service.py
@@ -214,15 +214,14 @@ def __init__(self, *, api_key: str = None, base_url: str = None, **model_kwargs)
 
         super().__init__()
 
-
         # Set the base url from the environment if not provided. Default to None to allow the client to set the url.
         if base_url is None:
-            self._base_url = os.getenv('NVIDIA_API_BASE', "https://api.nvcf.nvidia.com/v2/nvcf")
+            self._base_url = os.getenv('NVIDIA_API_BASE', None)
         else:
             self._base_url = base_url
 
-        if "NVIDIA_API_KEY" in os.environ:
-            self._api_key = os.getenv('NVIDIA_API_KEY')
+        if api_key is None:
+            self._api_key = os.getenv('NVIDIA_API_KEY', None)
         else:
             self._api_key = api_key
 

From 5d634a8629c53713815add21a0498753f6a9166f Mon Sep 17 00:00:00 2001
From: Michael Demoret <mdemoret@nvidia.com>
Date: Thu, 6 Jun 2024 11:12:19 -0700
Subject: [PATCH 06/12] Updating all LLM services to be the same

---
 morpheus/llm/services/nemo_llm_service.py     |  48 ++++----
 .../llm/services/nvfoundation_llm_service.py  |  48 ++++----
 morpheus/llm/services/openai_chat_service.py  | 110 +++++++++---------
 tests/test_faiss_vector_db_service.py         |   2 +-
 4 files changed, 107 insertions(+), 101 deletions(-)

diff --git a/morpheus/llm/services/nemo_llm_service.py b/morpheus/llm/services/nemo_llm_service.py
index d744948159..364b3928bc 100644
--- a/morpheus/llm/services/nemo_llm_service.py
+++ b/morpheus/llm/services/nemo_llm_service.py
@@ -188,6 +188,22 @@ async def generate_batch_async(self,
 class NeMoLLMService(LLMService):
     """
     A service for interacting with NeMo LLM models, this class should be used to create a client for a specific model.
+
+    Parameters
+    ----------
+    api_key : str, optional
+        The API key for the LLM service, by default None. If `None` the API key will be read from the `NGC_API_KEY`
+        environment variable. If neither are present an error will be raised., by default None
+    org_id : str, optional
+        The organization ID for the LLM service, by default None. If `None` the organization ID will be read from
+        the `NGC_ORG_ID` environment variable. This value is only required if the account associated with the
+        `api_key` is a member of multiple NGC organizations, by default None
+    base_url : str, optional
+        The api host url, by default None. If `None` the url will be read from the `NGC_BASE_URL` environment
+        variable. If neither are present the NeMo default will be used, by default None
+    retry_count : int, optional
+        The number of times to retry a request before raising an exception, by default 5
+
     """
 
     class APIKey(EnvConfigValue):
@@ -198,7 +214,7 @@ class OrgId(EnvConfigValue):
         _ENV_KEY: str = "NGC_ORG_ID"
         _ALLOW_NONE: bool = True
 
-    class BaseURI(EnvConfigValue):
+    class BaseURL(EnvConfigValue):
         _ENV_KEY: str = "NGC_API_BASE"
         _ALLOW_NONE: bool = True
 
@@ -206,26 +222,11 @@ def __init__(self,
                  *,
                  api_key: APIKey | str = None,
                  org_id: OrgId | str = None,
-                 base_uri: BaseURI | str = None,
+                 base_url: BaseURL | str = None,
                  retry_count=5) -> None:
         """
         Creates a service for interacting with NeMo LLM models.
 
-        Parameters
-        ----------
-        api_key : str, optional
-            The API key for the LLM service, by default None. If `None` the API key will be read from the `NGC_API_KEY`
-            environment variable. If neither are present an error will be raised., by default None
-        org_id : str, optional
-            The organization ID for the LLM service, by default None. If `None` the organization ID will be read from
-            the `NGC_ORG_ID` environment variable. This value is only required if the account associated with the
-            `api_key` is a member of multiple NGC organizations., by default None
-        base_uri : str, optional
-            The base URI for the LLM service, by default None. If `None` the base URI will be read from
-            the `NGC_API_BASE` environment variable. This value is only required if the account associated with the
-            `api_key` is a member of multiple NGC organizations., by default None
-        retry_count : int, optional
-            The number of times to retry a request before raising an exception, by default 5
 
         """
 
@@ -240,22 +241,25 @@ def __init__(self,
         if not isinstance(org_id, NeMoLLMService.OrgId):
             org_id = NeMoLLMService.OrgId(org_id)
 
-        if not isinstance(base_uri, NeMoLLMService.BaseURI):
-            base_uri = NeMoLLMService.BaseURI(base_uri)
+        if not isinstance(base_url, NeMoLLMService.BaseURL):
+            base_url = NeMoLLMService.BaseURL(base_url)
 
+        self._api_key = api_key
+        self._org_id = org_id
+        self._base_url = base_url
         self._retry_count = retry_count
 
         self._conn = nemollm.NemoLLM(
-            api_host=base_uri.value,
+            api_host=self._base_url.value,
             # The client must configure the authentication and authorization parameters
             # in accordance with the API server security policy.
             # Configure Bearer authorization
-            api_key=api_key.value,
+            api_key=self._api_key.value,
 
             # If you are in more than one LLM-enabled organization, you must
             # specify your org ID in the form of a header. This is optional
             # if you are only in one LLM-enabled org.
-            org_id=org_id.value,
+            org_id=self._org_id.value,
         )
 
     def get_client(self, *, model_name: str, **model_kwargs) -> NeMoLLMClient:
diff --git a/morpheus/llm/services/nvfoundation_llm_service.py b/morpheus/llm/services/nvfoundation_llm_service.py
index 0915975aba..9a30daf3f8 100644
--- a/morpheus/llm/services/nvfoundation_llm_service.py
+++ b/morpheus/llm/services/nvfoundation_llm_service.py
@@ -13,11 +13,11 @@
 # limitations under the License.
 
 import logging
-import os
 import typing
 
 from morpheus.llm.services.llm_service import LLMClient
 from morpheus.llm.services.llm_service import LLMService
+from morpheus.utils.env_config_value import EnvConfigValue
 
 logger = logging.getLogger(__name__)
 
@@ -64,8 +64,8 @@ def __init__(self, parent: "NVFoundationLLMService", *, model_name: str, **model
 
         chat_kwargs = {
             "model": model_name,
-            "api_key": self._parent._api_key,
-            "base_url": self._parent._base_url,
+            "api_key": self._parent._api_key.value,
+            "base_url": self._parent._base_url.value,
         }
 
         # Remove None values set by the environment in the kwargs
@@ -160,7 +160,7 @@ async def generate_batch_async(self,
 
     async def generate_batch_async(self,
                                    inputs: dict[str, list],
-                                   return_exceptions: typing.Literal[True] = True,
+                                   return_exceptions=True,
                                    **kwargs) -> list[str] | list[str | BaseException]:
         """
         Issue an asynchronous request to generate a list of responses based on a list of prompts.
@@ -194,37 +194,39 @@ async def generate_batch_async(self,
 class NVFoundationLLMService(LLMService):
     """
     A service for interacting with NeMo LLM models, this class should be used to create a client for a specific model.
+
     Parameters
     ----------
     api_key : str, optional
-        The API key for the LLM service, by default None. If `None` the API key will be read from the `NGC_API_KEY`
-        environment variable. If neither are present an error will be raised.
-    org_id : str, optional
-        The organization ID for the LLM service, by default None. If `None` the organization ID will be read from the
-        `NGC_ORG_ID` environment variable. This value is only required if the account associated with the `api_key` is
-        a member of multiple NGC organizations.
+        The API key for the LLM service, by default None. If `None` the API key will be read from the `NVIDIA_API_KEY`
+        environment variable. If neither are present an error will be raised, by default None
     base_url : str, optional
-            The api host url, by default None. If `None` the url will be read from the `NVAI_BASE_URL` environment
-            variable. If neither are present `https://api.nvcf.nvidia.com/v2/nvcf` will be used by langchain.
+        The api host url, by default None. If `None` the url will be read from the `NVIDIA_API_BASE` environment
+        variable. If neither are present the NeMo default will be used, by default None
     """
 
-    def __init__(self, *, api_key: str = None, base_url: str = None, **model_kwargs) -> None:
+    class APIKey(EnvConfigValue):
+        _ENV_KEY: str = "NVIDIA_API_KEY"
+        _ALLOW_NONE: bool = True
+
+    class BaseURL(EnvConfigValue):
+        _ENV_KEY: str = "NVIDIA_API_BASE"
+        _ALLOW_NONE: bool = True
+
+    def __init__(self, *, api_key: APIKey | str = None, base_url: BaseURL | str = None, **model_kwargs) -> None:
         if IMPORT_EXCEPTION is not None:
             raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION
 
         super().__init__()
 
-        # Set the base url from the environment if not provided. Default to None to allow the client to set the url.
-        if base_url is None:
-            self._base_url = os.getenv('NVIDIA_API_BASE', None)
-        else:
-            self._base_url = base_url
+        if not isinstance(api_key, NVFoundationLLMService.APIKey):
+            api_key = NVFoundationLLMService.APIKey(api_key)
 
-        if api_key is None:
-            self._api_key = os.getenv('NVIDIA_API_KEY', None)
-        else:
-            self._api_key = api_key
+        if not isinstance(base_url, NVFoundationLLMService.BaseURL):
+            base_url = NVFoundationLLMService.BaseURL(base_url)
 
+        self._api_key = api_key
+        self._base_url = base_url
         self._default_model_kwargs = model_kwargs
 
     def _merge_model_kwargs(self, model_kwargs: dict) -> dict:
@@ -232,7 +234,7 @@ def _merge_model_kwargs(self, model_kwargs: dict) -> dict:
 
     @property
     def api_key(self):
-        return self._api_key
+        return self._api_key.value
 
     def get_client(self, *, model_name: str, **model_kwargs) -> NVFoundationLLMClient:
         """
diff --git a/morpheus/llm/services/openai_chat_service.py b/morpheus/llm/services/openai_chat_service.py
index 76d45441f0..8fe1919a90 100644
--- a/morpheus/llm/services/openai_chat_service.py
+++ b/morpheus/llm/services/openai_chat_service.py
@@ -68,20 +68,6 @@ def set_output(self, output: typing.Any) -> None:
         self.outputs = output
 
 
-class OpenAIOrgId(EnvConfigValue):
-    _ENV_KEY: str = "OPENAI_ORG_ID"
-    _ALLOW_NONE: bool = True
-
-
-class OpenAIAPIKey(EnvConfigValue):
-    _ENV_KEY: str = "OPENAI_API_KEY"
-
-
-class OpenAIBaseURL(EnvConfigValue):
-    _ENV_KEY: str = "OPENAI_BASE_URL"
-    _ALLOW_NONE: bool = True
-
-
 class OpenAIChatClient(LLMClient):
     """
     Client for interacting with a specific OpenAI chat model. This class should be constructed with the
@@ -89,6 +75,9 @@ class OpenAIChatClient(LLMClient):
 
     Parameters
     ----------
+    parent : OpenAIChatService
+        The parent service for this client.
+
     model_name : str
         The name of the model to interact with.
 
@@ -115,9 +104,6 @@ def __init__(self,
                  model_name: str,
                  set_assistant: bool = False,
                  max_retries: int = 10,
-                 org_id: str | OpenAIOrgId = None,
-                 api_key: str | OpenAIAPIKey = None,
-                 base_url: str | OpenAIBaseURL = None,
                  json=False,
                  **model_kwargs) -> None:
         if IMPORT_EXCEPTION is not None:
@@ -125,15 +111,6 @@ def __init__(self,
 
         super().__init__()
 
-        if not isinstance(org_id, OpenAIOrgId):
-            org_id = OpenAIOrgId(org_id)
-
-        if not isinstance(api_key, OpenAIOrgId):
-            api_key = OpenAIOrgId(api_key)
-
-        if not isinstance(base_url, OpenAIBaseURL):
-            base_url = OpenAIBaseURL(base_url)
-
         assert parent is not None, "Parent service cannot be None."
 
         self._parent = parent
@@ -152,13 +129,13 @@ def __init__(self,
 
         # Create the client objects for both sync and async
         self._client = openai.OpenAI(max_retries=max_retries,
-                                     organization=org_id.value,
-                                     api_key=api_key.value,
-                                     base_url=base_url.value)
+                                     organization=self._parent._org_id.value,
+                                     api_key=self._parent._api_key.value,
+                                     base_url=self._parent._base_url.value)
         self._client_async = openai.AsyncOpenAI(max_retries=max_retries,
-                                                organization=org_id.value,
-                                                api_key=api_key.value,
-                                                base_url=base_url.value)
+                                                organization=self._parent._org_id.value,
+                                                api_key=self._parent._api_key.value,
+                                                base_url=self._parent._base_url.value)
 
     def get_input_names(self) -> list[str]:
         input_names = [self._prompt_key]
@@ -358,39 +335,62 @@ async def generate_batch_async(self,
 class OpenAIChatService(LLMService):
     """
     A service for interacting with OpenAI Chat models, this class should be used to create clients.
+
+    Parameters
+    ----------
+    api_key : str, optional
+        The API key for the LLM service, by default None. If `None` the API key will be read from the
+        `OPENAI_API_KEY` environment variable. If neither are present an error will be raised.
+    org_id : str, optional
+        The organization ID for the LLM service, by default None. If `None` the organization ID will be read from
+        the `OPENAI_ORG_ID` environment variable. This value is only required if the account associated with the
+        `api_key` is a member of multiple organizations, by default None
+    base_url : str, optional
+        The api host url, by default None. If `None` the url will be read from the `OPENAI_BASE_URL` environment
+        variable. If neither are present the OpenAI default will be used, by default None
+    default_model_kwargs : dict, optional
+        Default arguments to use when creating a client via the `get_client` function. Any argument specified here
+        will automatically be used when calling `get_client`. Arguments specified in the `get_client` function will
+        overwrite default values specified here. This is useful to set model arguments before creating multiple
+        clients. By default None
+
     """
 
-    def __init__(self, *, api_key: str = None, base_url: str = None, default_model_kwargs: dict = None) -> None:
-        """
-        Creates a service for interacting with OpenAI Chat models, this class should be used to create clients.
+    class APIKey(EnvConfigValue):
+        _ENV_KEY: str = "OPENAI_API_KEY"
+
+    class OrgId(EnvConfigValue):
+        _ENV_KEY: str = "OPENAI_ORG_ID"
+        _ALLOW_NONE: bool = True
+
+    class BaseURL(EnvConfigValue):
+        _ENV_KEY: str = "OPENAI_BASE_URL"
+        _ALLOW_NONE: bool = True
+
+    def __init__(self,
+                 *,
+                 api_key: APIKey | str = None,
+                 org_id: OrgId | str = None,
+                 base_url: BaseURL | str = None,
+                 default_model_kwargs: dict = None) -> None:
 
-        Parameters
-        ----------
-        api_key : str, optional
-            The API key for the LLM service, by default None. If `None` the API key will be read from the
-            `OPENAI_API_KEY` environment variable. If neither are present an error will be raised.
-        base_url : str, optional
-            The api host url, by default None. If `None` the url will be read from the `OPENAI_BASE_URL` environment
-            variable. If neither are present the OpenAI default will be used., by default None
-        default_model_kwargs : dict, optional
-            Default arguments to use when creating a client via the `get_client` function. Any argument specified here
-            will automatically be used when calling `get_client`. Arguments specified in the `get_client` function will
-            overwrite default values specified here. This is useful to set model arguments before creating multiple
-            clients. By default None
-
-        Raises
-        ------
-        ImportError
-            If the `openai` library is not found in the python environment.
-        """
         if IMPORT_EXCEPTION is not None:
             raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION
 
         super().__init__()
 
+        if not isinstance(api_key, OpenAIChatService.APIKey):
+            api_key = OpenAIChatService.APIKey(api_key)
+
+        if not isinstance(org_id, OpenAIChatService.OrgId):
+            org_id = OpenAIChatService.OrgId(org_id)
+
+        if not isinstance(base_url, OpenAIChatService.BaseURL):
+            base_url = OpenAIChatService.BaseURL(base_url)
+
         self._api_key = api_key
+        self._org_id = org_id
         self._base_url = base_url
-
         self._default_model_kwargs = default_model_kwargs or {}
 
         self._logger = logging.getLogger(f"{__package__}.{OpenAIChatService.__name__}")
diff --git a/tests/test_faiss_vector_db_service.py b/tests/test_faiss_vector_db_service.py
index 4f229cc1de..fe0f898eb7 100644
--- a/tests/test_faiss_vector_db_service.py
+++ b/tests/test_faiss_vector_db_service.py
@@ -16,7 +16,7 @@
 
 import pytest
 from langchain_community.docstore.in_memory import InMemoryDocstore
-from langchain_community.vectorstores import FAISS
+from langchain_community.vectorstores.faiss import FAISS
 from langchain_core.documents import Document
 
 from _utils.faiss import FakeEmbedder

From ceeafb10474f7ee167f60521882405ff3d7a1723 Mon Sep 17 00:00:00 2001
From: Michael Demoret <mdemoret@nvidia.com>
Date: Tue, 11 Jun 2024 09:08:41 -0700
Subject: [PATCH 07/12] Style cleanup

---
 conda/environments/all_cuda-121_arch-x86_64.yaml      | 3 +++
 conda/environments/dev_cuda-121_arch-x86_64.yaml      | 3 +++
 conda/environments/examples_cuda-121_arch-x86_64.yaml | 3 +++
 conda/environments/runtime_cuda-121_arch-x86_64.yaml  | 3 +++
 dependencies.yaml                                     | 6 +++---
 morpheus/llm/nodes/langchain_agent_node.py            | 9 +++++++--
 morpheus/messages/message_base.py                     | 3 +--
 7 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/conda/environments/all_cuda-121_arch-x86_64.yaml b/conda/environments/all_cuda-121_arch-x86_64.yaml
index 3b310995fb..d99bafc3f5 100644
--- a/conda/environments/all_cuda-121_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-121_arch-x86_64.yaml
@@ -122,7 +122,10 @@ dependencies:
   - databricks-connect
   - dgl==2.0.0
   - dglgo
+  - faiss-gpu==1.7
   - google-search-results==2.4
+  - langchain-community
+  - langchain-nvidia-ai-endpoints==0.0.11
   - langchain==0.1.9
   - milvus==2.3.5
   - nemollm
diff --git a/conda/environments/dev_cuda-121_arch-x86_64.yaml b/conda/environments/dev_cuda-121_arch-x86_64.yaml
index 23ff2c707e..075de86f16 100644
--- a/conda/environments/dev_cuda-121_arch-x86_64.yaml
+++ b/conda/environments/dev_cuda-121_arch-x86_64.yaml
@@ -98,6 +98,9 @@ dependencies:
   - PyMuPDF==1.23.21
   - databricks-cli < 0.100
   - databricks-connect
+  - faiss-gpu==1.7
+  - langchain-community
+  - langchain-nvidia-ai-endpoints==0.0.11
   - milvus==2.3.5
   - pymilvus==2.3.6
   - pytest-kafka==0.6.0
diff --git a/conda/environments/examples_cuda-121_arch-x86_64.yaml b/conda/environments/examples_cuda-121_arch-x86_64.yaml
index 11d5e535ce..5247c10145 100644
--- a/conda/environments/examples_cuda-121_arch-x86_64.yaml
+++ b/conda/environments/examples_cuda-121_arch-x86_64.yaml
@@ -66,7 +66,10 @@ dependencies:
   - databricks-connect
   - dgl==2.0.0
   - dglgo
+  - faiss-gpu==1.7
   - google-search-results==2.4
+  - langchain-community
+  - langchain-nvidia-ai-endpoints==0.0.11
   - langchain==0.1.9
   - milvus==2.3.5
   - nemollm
diff --git a/conda/environments/runtime_cuda-121_arch-x86_64.yaml b/conda/environments/runtime_cuda-121_arch-x86_64.yaml
index 80f6f995d2..1bd5996236 100644
--- a/conda/environments/runtime_cuda-121_arch-x86_64.yaml
+++ b/conda/environments/runtime_cuda-121_arch-x86_64.yaml
@@ -40,6 +40,9 @@ dependencies:
 - pip:
   - databricks-cli < 0.100
   - databricks-connect
+  - faiss-gpu==1.7
+  - langchain-community
+  - langchain-nvidia-ai-endpoints==0.0.11
   - milvus==2.3.5
   - pymilvus==2.3.6
 name: runtime_cuda-121_arch-x86_64
diff --git a/dependencies.yaml b/dependencies.yaml
index 55b917ffcb..b94b521e77 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -274,11 +274,11 @@ dependencies:
           - pip:
             - databricks-cli < 0.100
             - databricks-connect
+            - faiss-gpu==1.7
+            - langchain-community
+            - langchain-nvidia-ai-endpoints==0.0.11
             - milvus==2.3.5 # update to match pymilvus when available
             - pymilvus==2.3.6
-            - langchain-nvidia-ai-endpoints
-            - langchain-community
-            - faiss-gpu
 
   test_python_morpheus:
     common:
diff --git a/morpheus/llm/nodes/langchain_agent_node.py b/morpheus/llm/nodes/langchain_agent_node.py
index 8ab772c07e..f3b76208b1 100644
--- a/morpheus/llm/nodes/langchain_agent_node.py
+++ b/morpheus/llm/nodes/langchain_agent_node.py
@@ -111,8 +111,13 @@ async def execute(self, context: LLMContext) -> LLMContext:  # pylint: disable=i
                         # If the agent encounters a parsing error or a server error after retries, replace the error
                         # with a default value to prevent the pipeline from crashing
                         results[i][j] = self._replace_exceptions_value
-                        logger.warning(f"Exception encountered in result[{i}][{j}]: {answer}. "
-                                       f"Replacing with default message: \"{self._replace_exceptions_value}\".")
+                        logger.warning(
+                            "Exception encountered in result[%d][%d]: %s. "
+                            "Replacing with default message: '%s'.",
+                            i,
+                            j,
+                            answer,
+                            self._replace_exceptions_value)
 
         context.set_output(results)
 
diff --git a/morpheus/messages/message_base.py b/morpheus/messages/message_base.py
index 3e8a19385f..8edcc751de 100644
--- a/morpheus/messages/message_base.py
+++ b/morpheus/messages/message_base.py
@@ -21,7 +21,6 @@
 
 from morpheus import messages
 from morpheus.config import CppConfig
-from morpheus.messages import ControlMessage
 from morpheus.utils import logger as morpheus_logger
 
 
@@ -51,7 +50,7 @@ def _internal_new(other_cls, *args, **kwargs):
 
                 # Instantiating MultiMessage and its subclasses from Python or C++ will generate a deprecation warning
                 if issubtype(other_cls, messages.MultiMessage):
-                    morpheus_logger.deprecated_message_warning(other_cls, ControlMessage)
+                    morpheus_logger.deprecated_message_warning(other_cls, messages.ControlMessage)
 
                 # If _cpp_class is set, and use_cpp is enabled, create the C++ instance
                 if (getattr(other_cls, "_cpp_class", None) is not None and CppConfig.get_should_use_cpp()):

From 0500fbf842d340ef6c527926f989f43b47f5877f Mon Sep 17 00:00:00 2001
From: Michael Demoret <mdemoret@nvidia.com>
Date: Tue, 11 Jun 2024 09:10:44 -0700
Subject: [PATCH 08/12] Removing separate dependency group for all of the LLM
 examples.

---
 .../all_cuda-121_arch-x86_64.yaml             | 13 ++-
 .../dev_cuda-121_arch-x86_64.yaml             |  5 --
 .../examples_cuda-121_arch-x86_64.yaml        | 13 ++-
 .../runtime_cuda-121_arch-x86_64.yaml         |  5 --
 dependencies.yaml                             | 80 +++++--------------
 .../utils/langchain_llm_client_wrapper.py     | 17 +---
 6 files changed, 36 insertions(+), 97 deletions(-)

diff --git a/conda/environments/all_cuda-121_arch-x86_64.yaml b/conda/environments/all_cuda-121_arch-x86_64.yaml
index d99bafc3f5..267efb662f 100644
--- a/conda/environments/all_cuda-121_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-121_arch-x86_64.yaml
@@ -13,7 +13,7 @@ dependencies:
 - appdirs
 - arxiv=1.4
 - automake
-- beautifulsoup4
+- beautifulsoup4=4.12
 - benchmark=1.8.3
 - boost-cpp=1.84
 - boto3
@@ -69,7 +69,7 @@ dependencies:
 - numexpr
 - numpydoc=1.5
 - nvtabular=23.08.00
-- onnx
+- onnx=1.15
 - openai=1.13
 - papermill=2.4.0
 - pip
@@ -95,11 +95,11 @@ dependencies:
 - rdma-core>=48
 - requests
 - requests-cache=1.1
-- requests-toolbelt
+- requests-toolbelt=1.0
 - s3fs=2023.12.2
 - scikit-build=0.17.6
 - scikit-learn=1.3.2
-- sentence-transformers
+- sentence-transformers=2.7
 - sphinx
 - sphinx_rtd_theme
 - sqlalchemy
@@ -124,11 +124,10 @@ dependencies:
   - dglgo
   - faiss-gpu==1.7
   - google-search-results==2.4
-  - langchain-community
   - langchain-nvidia-ai-endpoints==0.0.11
-  - langchain==0.1.9
+  - langchain==0.1.16
   - milvus==2.3.5
-  - nemollm
+  - nemollm==0.3.5
   - pymilvus==2.3.6
   - pytest-kafka==0.6.0
 name: all_cuda-121_arch-x86_64
diff --git a/conda/environments/dev_cuda-121_arch-x86_64.yaml b/conda/environments/dev_cuda-121_arch-x86_64.yaml
index 075de86f16..55ada60795 100644
--- a/conda/environments/dev_cuda-121_arch-x86_64.yaml
+++ b/conda/environments/dev_cuda-121_arch-x86_64.yaml
@@ -11,7 +11,6 @@ channels:
 dependencies:
 - appdirs
 - automake
-- beautifulsoup4
 - benchmark=1.8.3
 - boost-cpp=1.84
 - breathe=4.35.0
@@ -78,7 +77,6 @@ dependencies:
 - rdma-core>=48
 - requests
 - requests-cache=1.1
-- requests-toolbelt
 - scikit-build=0.17.6
 - scikit-learn=1.3.2
 - sphinx
@@ -98,9 +96,6 @@ dependencies:
   - PyMuPDF==1.23.21
   - databricks-cli < 0.100
   - databricks-connect
-  - faiss-gpu==1.7
-  - langchain-community
-  - langchain-nvidia-ai-endpoints==0.0.11
   - milvus==2.3.5
   - pymilvus==2.3.6
   - pytest-kafka==0.6.0
diff --git a/conda/environments/examples_cuda-121_arch-x86_64.yaml b/conda/environments/examples_cuda-121_arch-x86_64.yaml
index 5247c10145..a56c41c20b 100644
--- a/conda/environments/examples_cuda-121_arch-x86_64.yaml
+++ b/conda/environments/examples_cuda-121_arch-x86_64.yaml
@@ -12,7 +12,7 @@ dependencies:
 - anyio>=3.7
 - appdirs
 - arxiv=1.4
-- beautifulsoup4
+- beautifulsoup4=4.12
 - boto3
 - click >=8
 - cuml=24.02.*
@@ -35,7 +35,7 @@ dependencies:
 - numexpr
 - numpydoc=1.5
 - nvtabular=23.08.00
-- onnx
+- onnx=1.15
 - openai=1.13
 - papermill=2.4.0
 - pip
@@ -48,10 +48,10 @@ dependencies:
 - pytorch=*=*cuda*
 - requests
 - requests-cache=1.1
-- requests-toolbelt
+- requests-toolbelt=1.0
 - s3fs=2023.12.2
 - scikit-learn=1.3.2
-- sentence-transformers
+- sentence-transformers=2.7
 - sqlalchemy
 - tqdm=4
 - transformers=4.36.2
@@ -68,10 +68,9 @@ dependencies:
   - dglgo
   - faiss-gpu==1.7
   - google-search-results==2.4
-  - langchain-community
   - langchain-nvidia-ai-endpoints==0.0.11
-  - langchain==0.1.9
+  - langchain==0.1.16
   - milvus==2.3.5
-  - nemollm
+  - nemollm==0.3.5
   - pymilvus==2.3.6
 name: examples_cuda-121_arch-x86_64
diff --git a/conda/environments/runtime_cuda-121_arch-x86_64.yaml b/conda/environments/runtime_cuda-121_arch-x86_64.yaml
index 1bd5996236..e6b76b43aa 100644
--- a/conda/environments/runtime_cuda-121_arch-x86_64.yaml
+++ b/conda/environments/runtime_cuda-121_arch-x86_64.yaml
@@ -10,7 +10,6 @@ channels:
 - pytorch
 dependencies:
 - appdirs
-- beautifulsoup4
 - click >=8
 - datacompy=0.10
 - dill=0.3.7
@@ -30,7 +29,6 @@ dependencies:
 - pytorch=*=*cuda*
 - requests
 - requests-cache=1.1
-- requests-toolbelt
 - scikit-learn=1.3.2
 - sqlalchemy
 - tqdm=4
@@ -40,9 +38,6 @@ dependencies:
 - pip:
   - databricks-cli < 0.100
   - databricks-connect
-  - faiss-gpu==1.7
-  - langchain-community
-  - langchain-nvidia-ai-endpoints==0.0.11
   - milvus==2.3.5
   - pymilvus==2.3.6
 name: runtime_cuda-121_arch-x86_64
diff --git a/dependencies.yaml b/dependencies.yaml
index b94b521e77..9011d0b974 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -32,10 +32,7 @@ files:
       - docs
       - example-dfp-prod
       - example-gnn
-      - example-llm-agents
-      - example-llm-completion
-      - example-llm-rag
-      - example-llm-vdb-upload
+      - example-llms
       - python
       - runtime
       - test_python_morpheus
@@ -86,10 +83,7 @@ files:
       - development
       - example-dfp-prod
       - example-gnn
-      - example-llm-agents
-      - example-llm-completion
-      - example-llm-rag
-      - example-llm-vdb-upload
+      - example-llms
       - python
       - runtime
       - test_python_morpheus
@@ -107,10 +101,7 @@ files:
       - docs
       - example-dfp-prod
       - example-gnn
-      - example-llm-agents
-      - example-llm-completion
-      - example-llm-rag
-      - example-llm-vdb-upload
+      - example-llms
       - python
       - runtime
 
@@ -132,10 +123,7 @@ files:
       - cve-mitigation
       - example-dfp-prod
       - example-gnn
-      - example-llm-agents
-      - example-llm-completion
-      - example-llm-rag
-      - example-llm-vdb-upload
+      - example-llms
       - python
       - runtime
 
@@ -249,7 +237,6 @@ dependencies:
           - &dill dill=0.3.7
           - &scikit-learn scikit-learn=1.3.2
           - appdirs
-          - beautifulsoup4
           - datacompy=0.10
           - elasticsearch==8.9.0
           - feedparser=6.0.10
@@ -264,7 +251,6 @@ dependencies:
           - pytorch=*=*cuda*
           - requests
           - requests-cache=1.1
-          - requests-toolbelt # Transitive dep needed by nemollm, specified here to ensure we get a compatible version
           - sqlalchemy
           - tqdm=4
           - typing_utils=0.1
@@ -274,9 +260,6 @@ dependencies:
           - pip:
             - databricks-cli < 0.100
             - databricks-connect
-            - faiss-gpu==1.7
-            - langchain-community
-            - langchain-nvidia-ai-endpoints==0.0.11
             - milvus==2.3.5 # update to match pymilvus when available
             - pymilvus==2.3.6
 
@@ -321,55 +304,32 @@ dependencies:
             - dgl==2.0.0
             - dglgo
 
-  example-llm-agents:
+  example-llms:
     common:
       - output_types: [conda]
         packages:
-          - &grpcio-status grpcio-status==1.59
           - &transformers transformers=4.36.2 # newer versions are incompatible with our pinned version of huggingface_hub
-          - huggingface_hub=0.20.2 # work-around for https://github.com/UKPLab/sentence-transformers/issues/1762
-          - numexpr
-          - sentence-transformers
-          - pip
-          - pip:
-            - &langchain langchain==0.1.9
-            - nemollm
-
-  example-llm-completion:
-    common:
-      - output_types: [conda]
-        packages:
-          - *grpcio-status
-          - &arxiv arxiv=1.4
-          - &newspaper3k newspaper3k=0.2
-          - &pypdf pypdf=3.17.4
-
-  example-llm-rag:
-    common:
-      - output_types: [conda]
-        packages:
-          - *grpcio-status
           - anyio>=3.7
+          - arxiv=1.4
+          - beautifulsoup4=4.12
+          - grpcio-status==1.59
+          - huggingface_hub=0.20.2 # work-around for https://github.com/UKPLab/sentence-transformers/issues/1762
           - jsonpatch>=1.33
+          - newspaper3k=0.2
+          - numexpr
+          - onnx=1.15
           - openai=1.13
+          - pypdf=3.17.4
+          - requests-toolbelt=1.0 # Transitive dep needed by nemollm, specified here to ensure we get a compatible version
+          - sentence-transformers=2.7
           - pip
           - pip:
-            - *langchain
+            - faiss-gpu==1.7
             - google-search-results==2.4
-
-  example-llm-vdb-upload:
-    common:
-      - output_types: [conda]
-        packages:
-          - *arxiv
-          - *grpcio-status
-          - *newspaper3k
-          - *pypdf
-          - onnx
-          - pip
-          - pip:
-              - PyMuPDF==1.23.21
-              - *langchain
+            - langchain-nvidia-ai-endpoints==0.0.11
+            - langchain==0.1.16
+            - nemollm==0.3.5
+            - PyMuPDF==1.23.21
 
   model-training-tuning:
     common:
diff --git a/morpheus/llm/services/utils/langchain_llm_client_wrapper.py b/morpheus/llm/services/utils/langchain_llm_client_wrapper.py
index 1215ab35b8..80a30925ac 100644
--- a/morpheus/llm/services/utils/langchain_llm_client_wrapper.py
+++ b/morpheus/llm/services/utils/langchain_llm_client_wrapper.py
@@ -14,20 +14,11 @@
 
 import typing
 
-from morpheus.llm.services.llm_service import LLMClient
+from langchain_core.callbacks import AsyncCallbackManagerForLLMRun
+from langchain_core.callbacks import CallbackManagerForLLMRun
+from langchain_core.language_models.llms import LLM
 
-IMPORT_EXCEPTION = None
-IMPORT_ERROR_MESSAGE = ("LangchainLLMClientWrapper require the langchain package to be installed. "
-                        "Install it by running the following command:\n"
-                        "`conda env update --solver=libmamba -n morpheus "
-                        "--file morpheus/conda/environments/examples_cuda-121_arch-x86_64.yaml --prune`")
-
-try:
-    from langchain_core.callbacks import AsyncCallbackManagerForLLMRun
-    from langchain_core.callbacks import CallbackManagerForLLMRun
-    from langchain_core.language_models.llms import LLM
-except ImportError as import_exc:
-    IMPORT_EXCEPTION = import_exc
+from morpheus.llm.services.llm_service import LLMClient
 
 
 class LangchainLLMClientWrapper(LLM):

From fa40b2e764e909f528ed88961fc8619a6172b06e Mon Sep 17 00:00:00 2001
From: Michael Demoret <mdemoret@nvidia.com>
Date: Tue, 11 Jun 2024 16:12:12 -0700
Subject: [PATCH 09/12] Final cleanup

---
 morpheus.code-workspace                       |   4 +-
 .../llm/services/nvfoundation_llm_service.py  |  12 +-
 morpheus/service/vdb/faiss_vdb_service.py     | 315 +++++++-----------
 tests/_utils/faiss.py                         |  35 --
 tests/conftest.py                             |  16 -
 tests/test_faiss_vector_db_service.py         | 176 +++++-----
 6 files changed, 214 insertions(+), 344 deletions(-)
 delete mode 100644 tests/_utils/faiss.py

diff --git a/morpheus.code-workspace b/morpheus.code-workspace
index 5154095574..f81d904f8e 100644
--- a/morpheus.code-workspace
+++ b/morpheus.code-workspace
@@ -27,7 +27,6 @@
     "launch": {
         "compounds": [],
         "configurations": [
-
             {
                 "args": [
                     "--log_level=DEBUG",
@@ -732,7 +731,6 @@
         ],
         "yapf.args": [
             "--style=${workspaceFolder}/setup.cfg"
-        ],
-        "python.analysis.inlayHints.pytestParameters": true
+        ]
     }
 }
diff --git a/morpheus/llm/services/nvfoundation_llm_service.py b/morpheus/llm/services/nvfoundation_llm_service.py
index 9a30daf3f8..5154158538 100644
--- a/morpheus/llm/services/nvfoundation_llm_service.py
+++ b/morpheus/llm/services/nvfoundation_llm_service.py
@@ -41,7 +41,7 @@ class NVFoundationLLMClient(LLMClient):
     `NeMoLLMService.get_client` method.
     Parameters
     ----------
-    parent :  NVFoundationMService
+    parent : NVFoundationMService
         The parent service for this client.
     model_name : str
         The name of the model to interact with.
@@ -127,8 +127,8 @@ def generate_batch(self,
             Inputs containing prompt data.
         return_exceptions : bool
             Whether to return exceptions in the output list or raise them immediately.
-        **kwargs : dict
-        Additional keyword arguments for generate batch.
+        **kwargs
+            Additional keyword arguments for generate batch.
         """
 
         prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]]
@@ -171,8 +171,8 @@ async def generate_batch_async(self,
             Inputs containing prompt data.
         return_exceptions : bool
             Whether to return exceptions in the output list or raise them immediately.
-        **kwargs: dict
-        Additional keyword arguments for generate batch async.
+        **kwargs
+            Additional keyword arguments for generate batch async.
         """
 
         prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]]
@@ -202,7 +202,7 @@ class NVFoundationLLMService(LLMService):
         environment variable. If neither are present an error will be raised, by default None
     base_url : str, optional
         The api host url, by default None. If `None` the url will be read from the `NVIDIA_API_BASE` environment
-        variable. If neither are present the NeMo default will be used, by default None
+        variable. If neither are present the NVIDIA default will be used, by default None
     """
 
     class APIKey(EnvConfigValue):
diff --git a/morpheus/service/vdb/faiss_vdb_service.py b/morpheus/service/vdb/faiss_vdb_service.py
index a23d254819..7b7d3362bd 100644
--- a/morpheus/service/vdb/faiss_vdb_service.py
+++ b/morpheus/service/vdb/faiss_vdb_service.py
@@ -27,9 +27,10 @@
 logger = logging.getLogger(__name__)
 
 IMPORT_EXCEPTION = None
-IMPORT_ERROR_MESSAGE = "FaissDBResourceService requires the FAISS."
+IMPORT_ERROR_MESSAGE = "FaissDBResourceService requires the FAISS library to be installed."
 
 try:
+    from langchain.embeddings.base import Embeddings
     from langchain.vectorstores.faiss import FAISS
 except ImportError as import_exc:
     IMPORT_EXCEPTION = import_exc
@@ -37,14 +38,14 @@
 
 class FaissVectorDBResourceService(VectorDBResourceService):
     """
-    Represents a service for managing resources in a Milvus Vector Database.
+    Represents a service for managing resources in a FAISS Vector Database.
 
     Parameters
     ----------
+    parent : FaissVectorDBService
+        The parent service for this resource.
     name : str
-        Name of the resource.
-    client : MilvusClient
-        An instance of the MilvusClient for interaction with the Milvus Vector Database.
+        The name of the resource.
     """
 
     def __init__(self, parent: "FaissVectorDBService", *, name: str) -> None:
@@ -54,14 +55,15 @@ def __init__(self, parent: "FaissVectorDBService", *, name: str) -> None:
         super().__init__()
 
         self._parent = parent
-        self._name = name
+        self._folder_path = self._parent._local_dir
+        self._index_name = name
 
         self._index = FAISS.load_local(folder_path=self._parent._local_dir,
                                        embeddings=self._parent._embeddings,
-                                       index_name=self._name,
+                                       index_name=self._index_name,
                                        allow_dangerous_deserialization=True)
 
-    def insert(self, data: list[list] | list[dict], **kwargs: dict[str, typing.Any]) -> dict:
+    def insert(self, data: list[list] | list[dict], **kwargs) -> dict:
         """
         Insert data into the vector database.
 
@@ -69,7 +71,7 @@ def insert(self, data: list[list] | list[dict], **kwargs: dict[str, typing.Any])
         ----------
         data : list[list] | list[dict]
             Data to be inserted into the collection.
-        **kwargs : dict[str, typing.Any]
+        **kwargs
             Extra keyword arguments specific to the vector database implementation.
 
         Returns
@@ -77,10 +79,9 @@ def insert(self, data: list[list] | list[dict], **kwargs: dict[str, typing.Any])
         dict
             Returns response content as a dictionary.
         """
-        self._index.add_embeddings(data)
-        return {"status": "success"}
+        raise NotImplementedError("Insert operation is not supported in FAISS")
 
-    def insert_dataframe(self, df: typing.Union[cudf.DataFrame, pd.DataFrame], **kwargs: dict[str, typing.Any]) -> dict:
+    def insert_dataframe(self, df: typing.Union[cudf.DataFrame, pd.DataFrame], **kwargs) -> dict:
         """
         Insert a dataframe entires into the vector database.
 
@@ -88,7 +89,7 @@ def insert_dataframe(self, df: typing.Union[cudf.DataFrame, pd.DataFrame], **kwa
         ----------
         df : typing.Union[cudf.DataFrame, pd.DataFrame]
             Dataframe to be inserted into the collection.
-        **kwargs : dict[str, typing.Any]
+        **kwargs
             Extra keyword arguments specific to the vector database implementation.
 
         Returns
@@ -98,13 +99,13 @@ def insert_dataframe(self, df: typing.Union[cudf.DataFrame, pd.DataFrame], **kwa
         """
         raise NotImplementedError("Insert operation is not supported in FAISS")
 
-    def describe(self, **kwargs: dict[str, typing.Any]) -> dict:
+    def describe(self, **kwargs) -> dict:
         """
         Provides a description of the collection.
 
         Parameters
         ----------
-        **kwargs : dict[str, typing.Any]
+        **kwargs
             Extra keyword arguments specific to the vector database implementation.
 
         Returns
@@ -112,42 +113,32 @@ def describe(self, **kwargs: dict[str, typing.Any]) -> dict:
         dict
             Returns response content as a dictionary.
         """
-        raise NotImplementedError("Describe operation is not supported in FAISS")
+        return {
+            "index_name": self._index_name,
+            "folder_path": self._folder_path,
+        }
 
-    def query(self, query: str, **kwargs: dict[str, typing.Any]) -> typing.Any:
+    def query(self, query: str, **kwargs) -> typing.Any:
         """
-        Query data in a collection in the Milvus vector database.
-
-        This method performs a search operation in the specified collection/partition in the Milvus vector database.
+        Query data in a collection in the vector database.
 
         Parameters
         ----------
         query : str, optional
             The search query, which can be a filter expression, by default None.
-        **kwargs : dict
+        **kwargs
             Additional keyword arguments for the search operation.
 
         Returns
         -------
         typing.Any
             The search result, which can vary depending on the query and options.
-
-        Raises
-        ------
-        RuntimeError
-            If an error occurs during the search operation.
-            If query argument is `None` and `data` keyword argument doesn't exist.
-            If `data` keyword arguement is `None`.
         """
         raise NotImplementedError("Query operation is not supported in FAISS")
 
-    async def similarity_search(self,
-                                embeddings: list[list[float]],
-                                k: int = 4,
-                                **kwargs: dict[str, typing.Any]) -> list[list[dict]]:
+    async def similarity_search(self, embeddings: list[list[float]], k: int = 4, **kwargs) -> list[list[dict]]:
         """
-        Perform a similarity search within the FAISS docstore (asimilarity_search_by_vector
-        returns docs most similar to embedding vector asynchronously).
+        Perform a similarity search within the FAISS docstore.
 
         Parameters
         ----------
@@ -155,7 +146,7 @@ async def similarity_search(self,
             Embeddings for which to perform the similarity search.
         k : int, optional
             The number of nearest neighbors to return, by default 4.
-        **kwargs : dict[str, typing.Any]
+        **kwargs
             Extra keyword arguments specific to the vector database implementation.
 
         Returns
@@ -171,7 +162,7 @@ async def single_search(single_embedding):
 
         return list(await asyncio.gather(*[single_search(embedding) for embedding in embeddings]))
 
-    def update(self, data: list[typing.Any], **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]:
+    def update(self, data: list[typing.Any], **kwargs) -> dict[str, typing.Any]:
         """
         Update data in the collection.
 
@@ -179,7 +170,7 @@ def update(self, data: list[typing.Any], **kwargs: dict[str, typing.Any]) -> dic
         ----------
         data : list[typing.Any]
             Data to be updated in the collection.
-        **kwargs : dict[str, typing.Any]
+        **kwargs
             Extra keyword arguments specific to upsert operation.
 
         Returns
@@ -189,7 +180,7 @@ def update(self, data: list[typing.Any], **kwargs: dict[str, typing.Any]) -> dic
         """
         raise NotImplementedError("Update operation is not supported in FAISS")
 
-    def delete_by_keys(self, keys: int | str | list, **kwargs: dict[str, typing.Any]) -> typing.Any:
+    def delete_by_keys(self, keys: int | str | list, **kwargs) -> typing.Any:
         """
         Delete vectors by keys from the collection.
 
@@ -197,7 +188,7 @@ def delete_by_keys(self, keys: int | str | list, **kwargs: dict[str, typing.Any]
         ----------
         keys : int | str | list
             Primary keys to delete vectors.
-        **kwargs :  dict[str, typing.Any]
+        **kwargs
             Extra keyword arguments specific to the vector database implementation.
 
         Returns
@@ -207,7 +198,7 @@ def delete_by_keys(self, keys: int | str | list, **kwargs: dict[str, typing.Any]
         """
         raise NotImplementedError("Delete by keys operation is not supported in FAISS")
 
-    def delete(self, expr: str, **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]:
+    def delete(self, expr: str, **kwargs) -> dict[str, typing.Any]:
         """
         Delete vectors by giving a list of IDs.
 
@@ -215,7 +206,7 @@ def delete(self, expr: str, **kwargs: dict[str, typing.Any]) -> dict[str, typing
         ----------
         expr : str
             Delete expression.
-        **kwargs :  dict[str, typing.Any]
+        **kwargs
             Extra keyword arguments specific to the vector database implementation.
 
         Returns
@@ -223,10 +214,9 @@ def delete(self, expr: str, **kwargs: dict[str, typing.Any]) -> dict[str, typing
         dict[str, typing.Any]
             Returns result of the given keys that are deleted from the collection.
         """
-        self._index.delete(expr)
-        return {"status": "success"}
+        raise NotImplementedError("delete operation is not supported in FAISS")
 
-    def retrieve_by_keys(self, keys: int | str | list, **kwargs: dict[str, typing.Any]) -> list[typing.Any]:
+    def retrieve_by_keys(self, keys: int | str | list, **kwargs) -> list[typing.Any]:
         """
         Retrieve the inserted vectors using their primary keys.
 
@@ -235,7 +225,7 @@ def retrieve_by_keys(self, keys: int | str | list, **kwargs: dict[str, typing.An
         keys : int | str | list
             Primary keys to get vectors for. Depending on pk_field type it can be int or str
             or a list of either.
-        **kwargs :  dict[str, typing.Any]
+        **kwargs
             Additional keyword arguments for the retrieval operation.
 
         Returns
@@ -245,13 +235,13 @@ def retrieve_by_keys(self, keys: int | str | list, **kwargs: dict[str, typing.An
         """
         raise NotImplementedError("Retrieve by keys operation is not supported in FAISS")
 
-    def count(self, **kwargs: dict[str, typing.Any]) -> int:
+    def count(self, **kwargs) -> int:
         """
         Returns number of rows/entities.
 
         Parameters
         ----------
-        **kwargs :  dict[str, typing.Any]
+        **kwargs
             Additional keyword arguments for the count operation.
 
         Returns
@@ -259,19 +249,17 @@ def count(self, **kwargs: dict[str, typing.Any]) -> int:
         int
             Returns number of entities in the collection.
         """
-        docstore = self._parent._local_dir
-        count = len(docstore)
-        return count
+        return self._index.index.ntotal
 
-    def drop(self, **kwargs: dict[str, typing.Any]) -> None:
+    def drop(self, **kwargs) -> None:
         """
-        Drop a collection, index, or partition in the Milvus vector database.
+        Drops the resource from the vector database service.
 
         This function allows you to drop a collection.
 
         Parameters
         ----------
-        **kwargs : dict
+        **kwargs
             Additional keyword arguments for specifying the type and partition name (if applicable).
         """
         raise NotImplementedError("Drop operation is not supported in FAISS")
@@ -279,26 +267,22 @@ def drop(self, **kwargs: dict[str, typing.Any]) -> None:
 
 class FaissVectorDBService(VectorDBService):
     """
-    Service class for Milvus Vector Database implementation. This class provides functions for interacting
-    with a Milvus vector database.
+    Service class for FAISS Vector Database implementation. This class provides functions for interacting
+    with a FAISS vector database.
 
     Parameters
     ----------
-    host : str
-        The hostname or IP address of the Milvus server.
-    port : str
-        The port number for connecting to the Milvus server.
-    alias : str, optional
-        Alias for the Milvus connection, by default "default".
-    **kwargs : dict
-        Additional keyword arguments specific to the Milvus connection configuration.
+    local_dir : str
+        The local directory where the FAISS index files are stored.
+    embeddings : Embeddings
+        The embeddings object to use for embedding text.
     """
 
     _collection_locks = {}
     _cleanup_interval = 600  # 10mins
     _last_cleanup_time = time.time()
 
-    def __init__(self, local_dir: str, embeddings):
+    def __init__(self, local_dir: str, embeddings: "Embeddings"):
 
         if IMPORT_EXCEPTION is not None:
             raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION
@@ -306,7 +290,26 @@ def __init__(self, local_dir: str, embeddings):
         self._local_dir = local_dir
         self._embeddings = embeddings
 
-    def load_resource(self, name: str = "index", **kwargs: dict[str, typing.Any]) -> FaissVectorDBResourceService:
+    @property
+    def embeddings(self):
+        return self._embeddings
+
+    def load_resource(self, name: str = "index", **kwargs) -> FaissVectorDBResourceService:
+        """
+        Loads a VDB resource into memory for use.
+
+        Parameters
+        ----------
+        name : str, optional
+            The VDB resource to load. For FAISS, this corresponds to the index name, by default "index"
+        **kwargs
+            Additional keyword arguments specific to the resource service.
+
+        Returns
+        -------
+        FaissVectorDBResourceService
+            The loaded resource service.
+        """
 
         return FaissVectorDBResourceService(self, name=name, **kwargs)
 
@@ -331,27 +334,23 @@ def has_store_object(self, name: str) -> bool:
                              index_name=name,
                              allow_dangerous_deserialization=True)
             return True
-        except Exception as e:
-            print(f"Failed to load FAISS with the given index file name: {e}")
-        # Return False if given index file name cannot be loaded
-        return False
+        except Exception:
+            return False
 
-    def list_store_objects(self, **kwargs: dict[str, typing.Any]) -> list[str]:
+    def list_store_objects(self, **kwargs) -> list[str]:
         """
-        List the names of all collections in the Milvus vector database.
+        List the names of all resources in the vector database.
 
         Returns
         -------
         list[str]
             A list of collection names.
         """
-        raise NotImplementedError("Drop operation is not supported in FAISS")
+        raise NotImplementedError("list_store_objects operation is not supported in FAISS")
 
-    def create(self, name: str, overwrite: bool = False, **kwargs: dict[str, typing.Any]):
+    def create(self, name: str, overwrite: bool = False, **kwargs):
         """
-        Create a collection in the Milvus vector database with the specified name and configuration. This method
-        creates a new collection in the Milvus vector database with the provided name and configuration options.
-        If the collection already exists, it can be overwritten if the `overwrite` parameter is set to True.
+        Create a collection.
 
         Parameters
         ----------
@@ -359,7 +358,7 @@ def create(self, name: str, overwrite: bool = False, **kwargs: dict[str, typing.
             Name of the collection to be created.
         overwrite : bool, optional
             If True, the collection will be overwritten if it already exists, by default False.
-        **kwargs : dict
+        **kwargs
             Additional keyword arguments containing collection configuration.
 
         Raises
@@ -367,33 +366,13 @@ def create(self, name: str, overwrite: bool = False, **kwargs: dict[str, typing.
         ValueError
             If the provided schema fields configuration is empty.
         """
-        # can create with: from_embeddings, from_texts, or from_documents
-
-        resource = self.load_resource(name)
-
-        if "documents" in kwargs:
-            documents = kwargs["documents"]
-            return resource._index.from_documents(documents, self._embeddings)
-
-        if "text_embeddings" in kwargs:
-            text_embeddings = kwargs["text_embeddings"]
-            metadatas = kwargs.get("metadatas")
-            ids = kwargs.get("ids")
-            return resource._index.from_embeddings(text_embeddings, self._embeddings, metadatas, ids)
-
-        if "texts" in kwargs:
-            texts = kwargs["texts"]
-            metadatas = kwargs.get("metadatas")
-            ids = kwargs.get("ids")
-            return resource._index.from_texts(texts, self._embeddings, metadatas, ids)
-
-        raise ValueError("You must provide documents, texts, or text_embeddings along with embeddings in kwargs.")
+        raise NotImplementedError("create operation is not supported in FAISS")
 
     def create_from_dataframe(self,
                               name: str,
                               df: typing.Union[cudf.DataFrame, pd.DataFrame],
                               overwrite: bool = False,
-                              **kwargs: dict[str, typing.Any]) -> None:
+                              **kwargs) -> None:
         """
         Create collections in the vector database.
 
@@ -405,16 +384,15 @@ def create_from_dataframe(self,
             The dataframe to create the collection from.
         overwrite : bool, optional
             Whether to overwrite the collection if it already exists. Default is False.
-        **kwargs : dict[str, typing.Any]
+        **kwargs
             Extra keyword arguments specific to the vector database implementation.
         """
 
-        raise NotImplementedError("Describe operation is not supported in FAISS")
+        raise NotImplementedError("create_from_dataframe operation is not supported in FAISS")
 
-    def insert(self, name: str, data: list[list] | list[dict], **kwargs: dict[str,
-                                                                              typing.Any]) -> dict[str, typing.Any]:
+    def insert(self, name: str, data: list[list] | list[dict], **kwargs) -> dict[str, typing.Any]:
         """
-        Insert a collection specific data in the Milvus vector database.
+        Insert a collection specific data in the vector database.
 
         Parameters
         ----------
@@ -422,7 +400,7 @@ def insert(self, name: str, data: list[list] | list[dict], **kwargs: dict[str,
             Name of the collection to be inserted.
         data : list[list] | list[dict]
             Data to be inserted in the collection.
-        **kwargs : dict[str, typing.Any]
+        **kwargs
             Additional keyword arguments containing collection configuration.
 
         Returns
@@ -436,16 +414,12 @@ def insert(self, name: str, data: list[list] | list[dict], **kwargs: dict[str,
             If the collection not exists exists.
         """
 
-        resource = self.load_resource(name)
-
-        return resource.insert(data, **kwargs)
+        raise NotImplementedError("create_from_dataframe operation is not supported in FAISS")
 
-    def insert_dataframe(self,
-                         name: str,
-                         df: typing.Union[cudf.DataFrame, pd.DataFrame],
-                         **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]:
+    def insert_dataframe(self, name: str, df: typing.Union[cudf.DataFrame, pd.DataFrame],
+                         **kwargs) -> dict[str, typing.Any]:
         """
-        Converts dataframe to rows and insert to a collection in the Milvus vector database.
+        Converts dataframe to rows and insert to the vector database.
 
         Parameters
         ----------
@@ -453,7 +427,7 @@ def insert_dataframe(self,
             Name of the collection to be inserted.
         df : typing.Union[cudf.DataFrame, pd.DataFrame]
             Dataframe to be inserted in the collection.
-        **kwargs : dict[str, typing.Any]
+        **kwargs
             Additional keyword arguments containing collection configuration.
 
         Returns
@@ -466,15 +440,11 @@ def insert_dataframe(self,
         RuntimeError
             If the collection not exists exists.
         """
-        resource = self.load_resource(name)
-
-        return resource.insert_dataframe(df=df, **kwargs)
+        raise NotImplementedError("insert_dataframe operation is not supported in FAISS")
 
-    def query(self, name: str, query: str = None, **kwargs: dict[str, typing.Any]) -> typing.Any:
+    def query(self, name: str, query: str = None, **kwargs) -> typing.Any:
         """
-        Query data in a collection in the Milvus vector database.
-
-        This method performs a search operation in the specified collection/partition in the Milvus vector database.
+        Query data in a vector database.
 
         Parameters
         ----------
@@ -482,7 +452,7 @@ def query(self, name: str, query: str = None, **kwargs: dict[str, typing.Any]) -
             Name of the collection to search within.
         query : str
             The search query, which can be a filter expression.
-        **kwargs : dict
+        **kwargs
             Additional keyword arguments for the search operation.
 
         Returns
@@ -491,11 +461,9 @@ def query(self, name: str, query: str = None, **kwargs: dict[str, typing.Any]) -
             The search result, which can vary depending on the query and options.
         """
 
-        resource = self.load_resource(name)
-
-        return resource.query(query, **kwargs)
+        raise NotImplementedError("query operation is not supported in FAISS")
 
-    async def similarity_search(self, name: str, **kwargs: dict[str, typing.Any]) -> list[dict]:
+    async def similarity_search(self, name: str, **kwargs) -> list[dict]:
         """
         Perform a similarity search within the collection.
 
@@ -503,7 +471,7 @@ async def similarity_search(self, name: str, **kwargs: dict[str, typing.Any]) ->
         ----------
         name : str
             Name of the collection.
-        **kwargs : dict[str, typing.Any]
+        **kwargs
             Extra keyword arguments specific to the vector database implementation.
 
         Returns
@@ -512,11 +480,9 @@ async def similarity_search(self, name: str, **kwargs: dict[str, typing.Any]) ->
             Returns a list of dictionaries representing the results of the similarity search.
         """
 
-        resource = self.load_resource(name)
+        raise NotImplementedError("similarity_search operation is not supported in FAISS")
 
-        return resource.similarity_search(**kwargs)
-
-    def update(self, name: str, data: list[typing.Any], **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]:
+    def update(self, name: str, data: list[typing.Any], **kwargs) -> dict[str, typing.Any]:
         """
         Update data in the vector database.
 
@@ -526,7 +492,7 @@ def update(self, name: str, data: list[typing.Any], **kwargs: dict[str, typing.A
             Name of the collection.
         data : list[typing.Any]
             Data to be updated in the collection.
-        **kwargs : dict[str, typing.Any]
+        **kwargs
             Extra keyword arguments specific to upsert operation.
 
         Returns
@@ -535,14 +501,9 @@ def update(self, name: str, data: list[typing.Any], **kwargs: dict[str, typing.A
             Returns result of the updated operation stats.
         """
 
-        if not isinstance(data, list):
-            raise RuntimeError("Data is not of type list.")
-
-        resource = self.load_resource(name)
-
-        return resource.update(data=data, **kwargs)
+        raise NotImplementedError("update operation is not supported in FAISS")
 
-    def delete_by_keys(self, name: str, keys: int | str | list, **kwargs: dict[str, typing.Any]) -> typing.Any:
+    def delete_by_keys(self, name: str, keys: int | str | list, **kwargs) -> typing.Any:
         """
         Delete vectors by keys from the collection.
 
@@ -552,7 +513,7 @@ def delete_by_keys(self, name: str, keys: int | str | list, **kwargs: dict[str,
             Name of the collection.
         keys : int | str | list
             Primary keys to delete vectors.
-        **kwargs :  dict[str, typing.Any]
+        **kwargs
             Extra keyword arguments specific to the vector database implementation.
 
         Returns
@@ -561,11 +522,9 @@ def delete_by_keys(self, name: str, keys: int | str | list, **kwargs: dict[str,
             Returns result of the given keys that are delete from the collection.
         """
 
-        resource = self.load_resource(name)
+        raise NotImplementedError("delete_by_keys operation is not supported in FAISS")
 
-        return resource.delete_by_keys(keys=keys, **kwargs)
-
-    def delete(self, name: str, expr: str, **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]:
+    def delete(self, name: str, expr: str, **kwargs) -> dict[str, typing.Any]:
         """
         Delete vectors from the collection using expressions.
 
@@ -575,7 +534,7 @@ def delete(self, name: str, expr: str, **kwargs: dict[str, typing.Any]) -> dict[
             Name of the collection.
         expr : str
             Delete expression.
-        **kwargs :  dict[str, typing.Any]
+        **kwargs
             Extra keyword arguments specific to the vector database implementation.
 
         Returns
@@ -584,12 +543,9 @@ def delete(self, name: str, expr: str, **kwargs: dict[str, typing.Any]) -> dict[
             Returns result of the given keys that are delete from the collection.
         """
 
-        resource = self.load_resource(name)
-        result = resource.delete(expr=expr, **kwargs)
-
-        return result
+        raise NotImplementedError("delete operation is not supported in FAISS")
 
-    def retrieve_by_keys(self, name: str, keys: int | str | list, **kwargs: dict[str, typing.Any]) -> list[typing.Any]:
+    def retrieve_by_keys(self, name: str, keys: int | str | list, **kwargs) -> list[typing.Any]:
         """
         Retrieve the inserted vectors using their primary keys from the Collection.
 
@@ -600,7 +556,7 @@ def retrieve_by_keys(self, name: str, keys: int | str | list, **kwargs: dict[str
         keys : int | str | list
             Primary keys to get vectors for. Depending on pk_field type it can be int or str
             or a list of either.
-        **kwargs :  dict[str, typing.Any]
+        **kwargs
             Additional keyword arguments for the retrieval operation.
 
         Returns
@@ -609,13 +565,9 @@ def retrieve_by_keys(self, name: str, keys: int | str | list, **kwargs: dict[str
             Returns result rows of the given keys from the collection.
         """
 
-        resource = self.load_resource(name)
+        raise NotImplementedError("retrieve_by_keys operation is not supported in FAISS")
 
-        result = resource.retrieve_by_keys(keys=keys, **kwargs)
-
-        return result
-
-    def count(self, name: str, **kwargs: dict[str, typing.Any]) -> int:
+    def count(self, name: str, **kwargs) -> int:
         """
         Returns number of rows/entities in the given collection.
 
@@ -623,7 +575,7 @@ def count(self, name: str, **kwargs: dict[str, typing.Any]) -> int:
         ----------
         name : str
             Name of the collection.
-        **kwargs :  dict[str, typing.Any]
+        **kwargs
             Additional keyword arguments for the count operation.
 
         Returns
@@ -631,47 +583,29 @@ def count(self, name: str, **kwargs: dict[str, typing.Any]) -> int:
         int
             Returns number of entities in the collection.
         """
-        resource = self.load_resource(name)
 
-        return resource.count(**kwargs)
+        raise NotImplementedError("count operation is not supported in FAISS")
 
-    def drop(self, name: str, **kwargs: dict[str, typing.Any]) -> None:
+    def drop(self, name: str, **kwargs) -> None:
         """
-        Drop a collection, index, or partition in the Milvus vector database.
-
-        This method allows you to drop a collection, an index within a collection,
-        or a specific partition within a collection in the Milvus vector database.
+        Drop a collection.
 
         Parameters
         ----------
         name : str
             Name of the collection, index, or partition to be dropped.
-        **kwargs : dict
+        **kwargs
             Additional keyword arguments for specifying the type and partition name (if applicable).
 
-        Notes on Expected Keyword Arguments:
-        ------------------------------------
-        - 'collection' (str, optional):
-        Specifies the type of collection to drop. Possible values: 'collection' (default), 'index', 'partition'.
-
-        - 'partition_name' (str, optional):
-        Required when dropping a specific partition within a collection. Specifies the partition name to be dropped.
-
-        - 'field_name' (str, optional):
-        Required when dropping an index within a collection. Specifies the field name for which the index is created.
-
-        - 'index_name' (str, optional):
-        Required when dropping an index within a collection. Specifies the name of the index to be dropped.
-
         Raises
         ------
         ValueError
             If mandatory arguments are missing or if the provided 'collection' value is invalid.
         """
 
-        raise NotImplementedError("Describe operation is not supported in FAISS")
+        raise NotImplementedError("drop operation is not supported in FAISS")
 
-    def describe(self, name: str, **kwargs: dict[str, typing.Any]) -> dict:
+    def describe(self, name: str, **kwargs) -> dict:
         """
         Describe the collection in the vector database.
 
@@ -679,8 +613,8 @@ def describe(self, name: str, **kwargs: dict[str, typing.Any]) -> dict:
         ----------
         name : str
             Name of the collection.
-        **kwargs : dict[str, typing.Any]
-            Additional keyword arguments specific to the Milvus vector database.
+        **kwargs
+            Additional keyword arguments specific to the vector database.
 
         Returns
         -------
@@ -688,9 +622,7 @@ def describe(self, name: str, **kwargs: dict[str, typing.Any]) -> dict:
             Returns collection information.
         """
 
-        resource = self.load_resource(name)
-
-        return resource.describe(**kwargs)
+        raise NotImplementedError("describe operation is not supported in FAISS")
 
     def release_resource(self, name: str) -> None:
         """
@@ -702,13 +634,10 @@ def release_resource(self, name: str) -> None:
             Name of the collection to release.
         """
 
-        raise NotImplementedError("Describe operation is not supported in FAISS")
+        raise NotImplementedError("release_resource operation is not supported in FAISS")
 
     def close(self) -> None:
         """
-        Close the connection to the Milvus vector database.
-
-        This method disconnects from the Milvus vector database by removing the connection.
-
+        Close the vector database service and release all resources.
         """
-        raise NotImplementedError("Describe operation is not supported in FAISS")
+        raise NotImplementedError("close operation is not supported in FAISS")
diff --git a/tests/_utils/faiss.py b/tests/_utils/faiss.py
deleted file mode 100644
index d8060477dc..0000000000
--- a/tests/_utils/faiss.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Utilities for testing Morpheus with FAISS"""
-from typing import List
-
-
-class FakeEmbedder:
-
-    def embed_query(self, data: str) -> List[float]:
-        # setting data to arbitrary float since constant value will always be returned
-        data = 0.0
-        return [float(1.0)] * 1023 + [float(0.0) * data]
-
-    def embed_documents(self, data: list) -> List[List[float]]:
-        return [[float(3.1)] * 1023 + [float(i)] for i in range(len(data))]
-
-    async def aembed_query(self, data: str) -> List[float]:
-        # setting data to arbitrary float since constant value will always be returned
-        data = 0.0
-        return [float(1.0)] * 1023 + [float(0.0) * data]
-
-    async def aembed_documents(self, data: list) -> List[List[float]]:
-        return [[float(3.1)] * 1023 + [float(i)] for i in range(len(data))]
diff --git a/tests/conftest.py b/tests/conftest.py
index 075011589b..ee5181d3bc 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -32,7 +32,6 @@
 import requests
 
 from _utils import import_or_skip
-from _utils.faiss import FakeEmbedder
 from _utils.kafka import _init_pytest_kafka
 from _utils.kafka import kafka_bootstrap_servers_fixture  # noqa: F401 pylint:disable=unused-import
 from _utils.kafka import kafka_consumer_fixture  # noqa: F401 pylint:disable=unused-import
@@ -1021,21 +1020,6 @@ def milvus_server_uri(tmp_path_factory):
             yield uri
 
 
-@pytest.fixture(scope="session")
-def faiss_test_dir():
-    # Get path for FAISS directory
-    tmp_dir_path = os.environ.get('FAISS_DIR')
-    if tmp_dir_path is None:
-        raise ValueError("set FAISS_DIR to directory with FAISS DB")
-    yield tmp_dir_path
-
-
-@pytest.fixture(scope="session")
-def faiss_test_embeddings():
-    embeddings = FakeEmbedder()
-    yield embeddings
-
-
 @pytest.fixture(scope="session", name="milvus_data")
 def milvus_data_fixture():
     inital_data = [{"id": i, "embedding": [i / 10.0] * 3, "age": 25 + i} for i in range(10)]
diff --git a/tests/test_faiss_vector_db_service.py b/tests/test_faiss_vector_db_service.py
index fe0f898eb7..98a428bbe3 100644
--- a/tests/test_faiss_vector_db_service.py
+++ b/tests/test_faiss_vector_db_service.py
@@ -14,132 +14,126 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+import typing
+from pathlib import Path
+
 import pytest
-from langchain_community.docstore.in_memory import InMemoryDocstore
-from langchain_community.vectorstores.faiss import FAISS
-from langchain_core.documents import Document
 
-from _utils.faiss import FakeEmbedder
 from morpheus.service.vdb.faiss_vdb_service import FaissVectorDBResourceService
 from morpheus.service.vdb.faiss_vdb_service import FaissVectorDBService
 
-# create FAISS docstore for testing
-texts = ["for", "the", "test"]
-embeddings = FakeEmbedder()
-ids = ["a", "b", "c"]
-create_store = FAISS.from_texts(texts, embeddings, ids=ids)
-INDEX_NAME = "index"
-TMP_DIR_PATH = "/workspace/.tmp/faiss_test_index"
-create_store.save_local(TMP_DIR_PATH, INDEX_NAME)
+if (typing.TYPE_CHECKING):
+    from langchain_core.embeddings import Embeddings
+else:
+    lc_core_embeddings = pytest.importorskip("langchain_core.embeddings", reason="langchain_core not installed")
+    Embeddings = lc_core_embeddings.Embeddings
+
+
+class FakeEmbedder(Embeddings):
+
+    def embed_query(self, text: str) -> list[float]:
+        # One-hot encoding using length of text
+        vec = [float(0.0)] * 1024
+
+        vec[len(text) % 1024] = 1.0
+
+        return vec
+
+    def embed_documents(self, texts: list[str]) -> list[list[float]]:
+        return [self.embed_query(text) for text in texts]
+
+    async def aembed_query(self, text: str) -> list[float]:
+        return self.embed_query(text)
+
+    async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
+        return self.embed_documents(texts)
+
+
+@pytest.fixture(scope="function", name="faiss_simple_store_dir")
+def faiss_simple_store_dir_fixture(tmpdir_path: Path):
+
+    from langchain_community.vectorstores.faiss import FAISS
+
+    embeddings = FakeEmbedder()
+
+    # create FAISS docstore for testing
+    index_store = FAISS.from_texts([str(x) * x for x in range(3)], embeddings, ids=[chr(x + 97) for x in range(3)])
+
+    index_store.save_local(str(tmpdir_path), index_name="index")
+
+    # create a second index for testing
+    other_store = FAISS.from_texts([str(x) * x for x in range(3, 8)],
+                                   embeddings,
+                                   ids=[chr(x + 97) for x in range(3, 8)])
+    other_store.save_local(str(tmpdir_path), index_name="other_index")
+
+    return str(tmpdir_path)
 
 
 @pytest.fixture(scope="function", name="faiss_service")
-def faiss_service_fixture(faiss_test_dir: str, faiss_test_embeddings: list):
+def faiss_service_fixture(faiss_simple_store_dir: str):
     # Fixture for FAISS service; can edit FAISS docstore instantiated outside fixture if need to change
     #  embedding model, et.
-    service = FaissVectorDBService(local_dir=faiss_test_dir, embeddings=faiss_test_embeddings)
+    service = FaissVectorDBService(local_dir=faiss_simple_store_dir, embeddings=FakeEmbedder())
     yield service
 
 
 def test_load_resource(faiss_service: FaissVectorDBService):
+
+    # Check the default implementation
     resource = faiss_service.load_resource()
     assert isinstance(resource, FaissVectorDBResourceService)
-    assert resource._name == "index"
-
-
-def test_count(faiss_service: FaissVectorDBService):
-    docstore = "index"
-    count = faiss_service.count(docstore)
-    assert count == len(faiss_service._local_dir)
 
+    # Check specifying a name
+    resource = faiss_service.load_resource("index")
+    assert resource.describe()["index_name"] == "index"
 
-def test_insert(faiss_service: FaissVectorDBService):
-    # Test for inserting embeddings (not docs, texts) into docstore
-    vector = FakeEmbedder().embed_query(data="hi")
-    test_data = list(iter([("hi", vector)]))
-    docstore_name = "index"
-    response = faiss_service.insert(name=docstore_name, data=test_data)
-    assert response == {"status": "success"}
+    # Check another name
+    resource = faiss_service.load_resource("other_index")
+    assert resource.describe()["index_name"] == "other_index"
 
 
-def test_delete(faiss_service: FaissVectorDBService):
-    # specify name of docstore and ID to delete
-    docstore_name = "index"
-    delete_id = "a"
-    response_delete = faiss_service.delete(name=docstore_name, expr=delete_id)
-    assert response_delete == {"status": "success"}
+def test_describe(faiss_service: FaissVectorDBService):
+    desc_dict = faiss_service.load_resource().describe()
 
+    assert desc_dict["index_name"] == "index"
+    assert os.path.exists(desc_dict["folder_path"])
+    # Room for other properties
 
-async def test_similarity_search():
-    index_to_id = create_store.index_to_docstore_id
-    in_mem_docstore = InMemoryDocstore({
-        index_to_id[0]: Document(page_content="for"),
-        index_to_id[1]: Document(page_content="the"),
-        index_to_id[2]: Document(page_content="test"),
-    })
 
-    assert create_store.docstore.__dict__ == in_mem_docstore.__dict__
-
-    query_vec = await embeddings.aembed_query("for")
-    output = await create_store.asimilarity_search_by_vector(query_vec, k=1)
-
-    assert output == [Document(page_content="for")]
-
-
-def test_has_store_object(faiss_service: FaissVectorDBService):
-    # create FAISS docstore to test with
-    object_store = FAISS.from_texts(texts, embeddings, ids=ids)
-    object_name = "store_object_index"
-    object_store.save_local(TMP_DIR_PATH, object_name)
+def test_count(faiss_service: FaissVectorDBService):
 
-    # attempt to load docstore with given index name
-    load_attempt = faiss_service.has_store_object(object_name)
-    assert load_attempt is True
+    count = faiss_service.load_resource().count()
+    assert count == 3
 
-    # attempt to load docstore with wrong index name
-    object_name = "wrong_index_name"
-    load_attempt = faiss_service.has_store_object(object_name)
-    assert load_attempt is False
 
+async def test_similarity_search(faiss_service: FaissVectorDBService):
 
-def test_create(faiss_service: FaissVectorDBService):
-    # Test creating docstore from embeddings
-    vector = FakeEmbedder().embed_query(data="hi")
-    test_embedding = list(iter([("hi", vector)]))
-    docstore_name = "index"
-    embeddings_docstore = faiss_service.create(name=docstore_name, text_embeddings=test_embedding)
+    vdb = faiss_service.load_resource()
 
-    # save created docstore
-    index_name_embeddings = "embeddings_index"
-    embeddings_docstore.save_local(TMP_DIR_PATH, index_name_embeddings)
+    query_vec = await faiss_service.embeddings.aembed_query("22")
 
-    # attempt to load created docstore
-    load_attempt = faiss_service.has_store_object(index_name_embeddings)
+    k_1 = await vdb.similarity_search(embeddings=[query_vec], k=1)
 
-    assert load_attempt is True
+    assert len(k_1[0]) == 1
+    assert k_1[0][0]["page_content"] == "22"
 
-    # Test creating docstore from texts
-    test_texts = ["for", "the", "test"]
-    texts_docstore = faiss_service.create(name=docstore_name, texts=test_texts)
+    k_3 = await vdb.similarity_search(embeddings=[query_vec], k=3)
 
-    # save created docstore
-    index_name_texts = "texts_index"
-    texts_docstore.save_local(TMP_DIR_PATH, index_name_texts)
+    assert len(k_3[0]) == 3
+    assert k_3[0][0]["page_content"] == "22"
 
-    # attempt to load created docstore
-    load_attempt = faiss_service.has_store_object(index_name_texts)
+    # Exceed the number of documents in the docstore
+    k_5 = await vdb.similarity_search(embeddings=[query_vec], k=vdb.count() + 2)
 
-    assert load_attempt is True
+    assert len(k_5[0]) == vdb.count()
+    assert k_5[0][0]["page_content"] == "22"
 
-    # Test creating docstore from documents
-    test_documents = [Document(page_content="This is for the test.")]
-    docs_docstore = faiss_service.create(name=docstore_name, documents=test_documents)
 
-    # save created docstore
-    index_name_docs = "docs_index"
-    docs_docstore.save_local(TMP_DIR_PATH, index_name_docs)
+def test_has_store_object(faiss_service: FaissVectorDBService):
+    assert faiss_service.has_store_object("index")
 
-    # attempt to load created docstore
-    load_attempt = faiss_service.has_store_object(index_name_docs)
+    assert faiss_service.has_store_object("other_index")
 
-    assert load_attempt is True
+    assert not faiss_service.has_store_object("not_an_index")

From 564ece33fa38c7b76d122543ce9b37d9a3d4fbec Mon Sep 17 00:00:00 2001
From: Michael Demoret <mdemoret@nvidia.com>
Date: Tue, 11 Jun 2024 16:54:47 -0700
Subject: [PATCH 10/12] Loosening hotfix restrictions

---
 conda/environments/all_cuda-121_arch-x86_64.yaml      | 5 +++--
 conda/environments/examples_cuda-121_arch-x86_64.yaml | 6 +++---
 dependencies.yaml                                     | 6 +++---
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/conda/environments/all_cuda-121_arch-x86_64.yaml b/conda/environments/all_cuda-121_arch-x86_64.yaml
index 267efb662f..f320d3ac86 100644
--- a/conda/environments/all_cuda-121_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-121_arch-x86_64.yaml
@@ -117,13 +117,14 @@ dependencies:
 - pip:
   - --find-links https://data.dgl.ai/wheels-test/repo.html
   - --find-links https://data.dgl.ai/wheels/cu121/repo.html
+  - PyMuPDF==1.23.*
   - PyMuPDF==1.23.21
   - databricks-cli < 0.100
   - databricks-connect
   - dgl==2.0.0
   - dglgo
-  - faiss-gpu==1.7
-  - google-search-results==2.4
+  - faiss-gpu==1.7.*
+  - google-search-results==2.4.*
   - langchain-nvidia-ai-endpoints==0.0.11
   - langchain==0.1.16
   - milvus==2.3.5
diff --git a/conda/environments/examples_cuda-121_arch-x86_64.yaml b/conda/environments/examples_cuda-121_arch-x86_64.yaml
index a56c41c20b..cda5d37df4 100644
--- a/conda/environments/examples_cuda-121_arch-x86_64.yaml
+++ b/conda/environments/examples_cuda-121_arch-x86_64.yaml
@@ -61,13 +61,13 @@ dependencies:
 - pip:
   - --find-links https://data.dgl.ai/wheels-test/repo.html
   - --find-links https://data.dgl.ai/wheels/cu121/repo.html
-  - PyMuPDF==1.23.21
+  - PyMuPDF==1.23.*
   - databricks-cli < 0.100
   - databricks-connect
   - dgl==2.0.0
   - dglgo
-  - faiss-gpu==1.7
-  - google-search-results==2.4
+  - faiss-gpu==1.7.*
+  - google-search-results==2.4.*
   - langchain-nvidia-ai-endpoints==0.0.11
   - langchain==0.1.16
   - milvus==2.3.5
diff --git a/dependencies.yaml b/dependencies.yaml
index 9011d0b974..8d41be4f50 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -324,12 +324,12 @@ dependencies:
           - sentence-transformers=2.7
           - pip
           - pip:
-            - faiss-gpu==1.7
-            - google-search-results==2.4
+            - faiss-gpu==1.7.*
+            - google-search-results==2.4.*
             - langchain-nvidia-ai-endpoints==0.0.11
             - langchain==0.1.16
             - nemollm==0.3.5
-            - PyMuPDF==1.23.21
+            - PyMuPDF==1.23.*
 
   model-training-tuning:
     common:

From 5611fc0bf2ccf0321046289b7b0d0a438c1a47b3 Mon Sep 17 00:00:00 2001
From: Michael Demoret <mdemoret@nvidia.com>
Date: Wed, 12 Jun 2024 09:09:57 -0700
Subject: [PATCH 11/12] Fixing OpenAI tests

---
 morpheus.code-workspace                       |   2 +-
 morpheus/llm/services/openai_chat_service.py  |  25 ++
 tests/llm/services/test_openai_chat_client.py | 151 ------------
 .../llm/services/test_openai_chat_service.py  | 230 ++++++++++++++++--
 4 files changed, 231 insertions(+), 177 deletions(-)
 delete mode 100644 tests/llm/services/test_openai_chat_client.py

diff --git a/morpheus.code-workspace b/morpheus.code-workspace
index f81d904f8e..9e25e5b414 100644
--- a/morpheus.code-workspace
+++ b/morpheus.code-workspace
@@ -695,7 +695,7 @@
             "tests"
         ],
         "python.testing.pytestEnabled": true,
-        "python.testing.unittestEnabled": true,
+        "python.testing.unittestEnabled": false,
         "rewrap.wrappingColumn": 120,
         "testMate.cpp.debug.configTemplate": {
             "args": "${argsArray}",
diff --git a/morpheus/llm/services/openai_chat_service.py b/morpheus/llm/services/openai_chat_service.py
index 8fe1919a90..3b2c87b4f2 100644
--- a/morpheus/llm/services/openai_chat_service.py
+++ b/morpheus/llm/services/openai_chat_service.py
@@ -137,6 +137,31 @@ def __init__(self,
                                                 api_key=self._parent._api_key.value,
                                                 base_url=self._parent._base_url.value)
 
+    @property
+    def model_name(self):
+        """
+        Get the name of the model associated with this client.
+
+        Returns
+        -------
+        str
+            The name of the model.
+        """
+        return self._model_name
+
+    @property
+    def model_kwargs(self):
+        """
+        Get the keyword args that will be passed to the model when calling generation functions.
+
+        Returns
+        -------
+        dict
+            The keyword arguments dictionary.
+        """
+        # Return a copy to avoid modification of the original
+        return self._model_kwargs.copy()
+
     def get_input_names(self) -> list[str]:
         input_names = [self._prompt_key]
         if self._set_assistant:
diff --git a/tests/llm/services/test_openai_chat_client.py b/tests/llm/services/test_openai_chat_client.py
deleted file mode 100644
index 628274f68b..0000000000
--- a/tests/llm/services/test_openai_chat_client.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import asyncio
-from unittest import mock
-
-import pytest
-
-from _utils.llm import mk_mock_openai_response
-from morpheus.llm.services.openai_chat_service import OpenAIChatService
-
-
-@pytest.mark.parametrize("api_key", ["12345", None])
-@pytest.mark.parametrize("base_url", ["http://test.openai.com/v1", None])
-@pytest.mark.parametrize("max_retries", [5, 10])
-def test_constructor(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock],
-                     api_key: str,
-                     base_url: str,
-                     max_retries: int):
-    OpenAIChatService(api_key=api_key, base_url=base_url).get_client(model_name="test_model", max_retries=max_retries)
-
-    for mock_client in mock_chat_completion:
-        mock_client.assert_called_once_with(api_key=api_key, base_url=base_url, max_retries=max_retries)
-
-
-@pytest.mark.parametrize("max_retries", [5, 10])
-def test_constructor_default_service_constructor(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock],
-                                                 max_retries: int):
-    OpenAIChatService().get_client(model_name="test_model", max_retries=max_retries)
-
-    for mock_client in mock_chat_completion:
-        mock_client.assert_called_once_with(max_retries=max_retries, organization=None, api_key=None, base_url=None)
-
-
-@pytest.mark.parametrize("use_async", [True, False])
-@pytest.mark.parametrize(
-    "input_dict, set_assistant, expected_messages",
-    [({
-        "prompt": "test_prompt", "assistant": "assistant_response"
-    },
-      True, [{
-          "role": "user", "content": "test_prompt"
-      }, {
-          "role": "assistant", "content": "assistant_response"
-      }]), ({
-          "prompt": "test_prompt"
-      }, False, [{
-          "role": "user", "content": "test_prompt"
-      }])])
-@pytest.mark.parametrize("temperature", [0, 1, 2])
-def test_generate(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock],
-                  use_async: bool,
-                  input_dict: dict[str, str],
-                  set_assistant: bool,
-                  expected_messages: list[dict],
-                  temperature: int):
-    (mock_client, mock_async_client) = mock_chat_completion
-    client = OpenAIChatService().get_client(model_name="test_model",
-                                            set_assistant=set_assistant,
-                                            temperature=temperature)
-
-    if use_async:
-        results = asyncio.run(client.generate_async(**input_dict))
-        mock_async_client.chat.completions.create.assert_called_once_with(model="test_model",
-                                                                          messages=expected_messages,
-                                                                          temperature=temperature)
-        mock_client.chat.completions.create.assert_not_called()
-
-    else:
-        results = client.generate(**input_dict)
-        mock_client.chat.completions.create.assert_called_once_with(model="test_model",
-                                                                    messages=expected_messages,
-                                                                    temperature=temperature)
-        mock_async_client.chat.completions.create.assert_not_called()
-
-    assert results == "test_output"
-
-
-@pytest.mark.parametrize("use_async", [True, False])
-@pytest.mark.parametrize("inputs, set_assistant, expected_messages",
-                         [({
-                             "prompt": ["prompt1", "prompt2"], "assistant": ["assistant1", "assistant2"]
-                         },
-                           True,
-                           [[{
-                               "role": "user", "content": "prompt1"
-                           }, {
-                               "role": "assistant", "content": "assistant1"
-                           }], [{
-                               "role": "user", "content": "prompt2"
-                           }, {
-                               "role": "assistant", "content": "assistant2"
-                           }]]),
-                          ({
-                              "prompt": ["prompt1", "prompt2"]
-                          },
-                           False, [[{
-                               "role": "user", "content": "prompt1"
-                           }], [{
-                               "role": "user", "content": "prompt2"
-                           }]])])
-@pytest.mark.parametrize("temperature", [0, 1, 2])
-def test_generate_batch(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock],
-                        use_async: bool,
-                        inputs: dict[str, list[str]],
-                        set_assistant: bool,
-                        expected_messages: list[list[dict]],
-                        temperature: int):
-    (mock_client, mock_async_client) = mock_chat_completion
-    client = OpenAIChatService().get_client(model_name="test_model",
-                                            set_assistant=set_assistant,
-                                            temperature=temperature)
-
-    expected_results = ["test_output" for _ in range(len(inputs["prompt"]))]
-    expected_calls = [
-        mock.call(model="test_model", messages=messages, temperature=temperature) for messages in expected_messages
-    ]
-
-    if use_async:
-        results = asyncio.run(client.generate_batch_async(inputs))
-        mock_async_client.chat.completions.create.assert_has_calls(expected_calls, any_order=False)
-        mock_client.chat.completions.create.assert_not_called()
-
-    else:
-        results = client.generate_batch(inputs)
-        mock_client.chat.completions.create.assert_has_calls(expected_calls, any_order=False)
-        mock_async_client.chat.completions.create.assert_not_called()
-
-    assert results == expected_results
-
-
-@pytest.mark.parametrize("completion", [[], [None]], ids=["no_choices", "no_content"])
-@pytest.mark.usefixtures("mock_chat_completion")
-def test_extract_completion_errors(completion: list):
-    client = OpenAIChatService().get_client(model_name="test_model")
-    mock_completion = mk_mock_openai_response(completion)
-
-    with pytest.raises(ValueError):
-        client._extract_completion(mock_completion)
diff --git a/tests/llm/services/test_openai_chat_service.py b/tests/llm/services/test_openai_chat_service.py
index f3adc1023a..54b4290ded 100644
--- a/tests/llm/services/test_openai_chat_service.py
+++ b/tests/llm/services/test_openai_chat_service.py
@@ -13,50 +13,230 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import asyncio
+import os
 from unittest import mock
 
 import pytest
 
-from morpheus.llm.services.llm_service import LLMService
-from morpheus.llm.services.openai_chat_service import OpenAIChatClient
+from _utils.llm import mk_mock_openai_response
 from morpheus.llm.services.openai_chat_service import OpenAIChatService
 
 
-def test_constructor():
-    service = OpenAIChatService()
-    assert isinstance(service, LLMService)
+@pytest.fixture(name="set_default_openai_api_key", autouse=True, scope="function")
+def set_default_openai_api_key_fixture():
+    # Must have an API key set to create the openai client
+    with mock.patch.dict(os.environ, clear=True, values={"OPENAI_API_KEY": "testing_api_key"}):
+        yield
+
+
+def assert_called_once_with_relaxed(mock_obj, *args, **kwargs):
+
+    if (len(mock_obj.call_args_list) == 1):
+
+        recent_call = mock_obj.call_args_list[-1]
+
+        # Ensure that the number of arguments matches by adding ANY to the back of the args
+        if (len(args) < len(recent_call.args)):
+            args = tuple(list(args) + [mock.ANY] * (len(recent_call.args) - len(args)))
+
+        addl_kwargs = {key: mock.ANY for key in recent_call.kwargs.keys() if key not in kwargs}
+
+        kwargs.update(addl_kwargs)
+
+    mock_obj.assert_called_once_with(*args, **kwargs)
+
+
+@pytest.mark.parametrize("api_key", ["12345", None])
+@pytest.mark.parametrize("base_url", ["http://test.openai.com/v1", None])
+@pytest.mark.parametrize("org_id", ["my-org-124", None])
+def test_constructor(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock],
+                     api_key: str,
+                     base_url: str,
+                     org_id: str):
+
+    OpenAIChatService(api_key=api_key, base_url=base_url, org_id=org_id).get_client(model_name="test_model")
+
+    if (api_key is None):
+        api_key = os.environ["OPENAI_API_KEY"]
+
+    for mock_client in mock_chat_completion:
+        assert_called_once_with_relaxed(mock_client, organization=org_id, api_key=api_key, base_url=base_url)
+
+
+@pytest.mark.parametrize("max_retries", [5, 10, -1, None])
+def test_max_retries(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], max_retries: int):
+    OpenAIChatService().get_client(model_name="test_model", max_retries=max_retries)
+
+    for mock_client in mock_chat_completion:
+        assert_called_once_with_relaxed(mock_client, max_retries=max_retries)
+
+
+@pytest.mark.parametrize("use_json", [True, False])
+def test_client_json(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], use_json: bool):
+    client = OpenAIChatService().get_client(model_name="test_model", json=use_json)
+
+    # Perform a dummy generate call
+    client.generate(prompt="test_prompt")
+
+    if (use_json):
+        assert_called_once_with_relaxed(mock_chat_completion[0].chat.completions.create,
+                                        response_format={"type": "json_object"})
+    else:
+        assert mock_chat_completion[0].chat.completions.create.call_args_list[-1].kwargs.get("response_format") is None
+
+
+@pytest.mark.parametrize("set_assistant", [True, False])
+def test_client_set_assistant(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], set_assistant: bool):
+    client = OpenAIChatService().get_client(model_name="test_model", set_assistant=set_assistant)
+
+    # Perform a dummy generate call
+    client.generate(prompt="test_prompt", assistant="assistant_message")
+
+    messages = mock_chat_completion[0].chat.completions.create.call_args_list[-1].kwargs["messages"]
+
+    found_assistant = False
+
+    for message in messages:
+        if (message.get("role") == "assistant"):
+            found_assistant = True
+            break
+
+    assert found_assistant == set_assistant
+
+
+@pytest.mark.parametrize("use_async", [True, False])
+@pytest.mark.parametrize(
+    "input_dict, set_assistant, expected_messages",
+    [({
+        "prompt": "test_prompt", "assistant": "assistant_response"
+    },
+      True, [{
+          "role": "user", "content": "test_prompt"
+      }, {
+          "role": "assistant", "content": "assistant_response"
+      }]), ({
+          "prompt": "test_prompt"
+      }, False, [{
+          "role": "user", "content": "test_prompt"
+      }])])
+@pytest.mark.parametrize("temperature", [0, 1, 2])
+def test_generate(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock],
+                  use_async: bool,
+                  input_dict: dict[str, str],
+                  set_assistant: bool,
+                  expected_messages: list[dict],
+                  temperature: int):
+    (mock_client, mock_async_client) = mock_chat_completion
+    client = OpenAIChatService().get_client(model_name="test_model",
+                                            set_assistant=set_assistant,
+                                            temperature=temperature)
+
+    if use_async:
+        results = asyncio.run(client.generate_async(**input_dict))
+        mock_async_client.chat.completions.create.assert_called_once_with(model="test_model",
+                                                                          messages=expected_messages,
+                                                                          temperature=temperature)
+        mock_client.chat.completions.create.assert_not_called()
+
+    else:
+        results = client.generate(**input_dict)
+        mock_client.chat.completions.create.assert_called_once_with(model="test_model",
+                                                                    messages=expected_messages,
+                                                                    temperature=temperature)
+        mock_async_client.chat.completions.create.assert_not_called()
+
+    assert results == "test_output"
+
+
+@pytest.mark.parametrize("use_async", [True, False])
+@pytest.mark.parametrize("inputs, set_assistant, expected_messages",
+                         [({
+                             "prompt": ["prompt1", "prompt2"], "assistant": ["assistant1", "assistant2"]
+                         },
+                           True,
+                           [[{
+                               "role": "user", "content": "prompt1"
+                           }, {
+                               "role": "assistant", "content": "assistant1"
+                           }], [{
+                               "role": "user", "content": "prompt2"
+                           }, {
+                               "role": "assistant", "content": "assistant2"
+                           }]]),
+                          ({
+                              "prompt": ["prompt1", "prompt2"]
+                          },
+                           False, [[{
+                               "role": "user", "content": "prompt1"
+                           }], [{
+                               "role": "user", "content": "prompt2"
+                           }]])])
+@pytest.mark.parametrize("temperature", [0, 1, 2])
+def test_generate_batch(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock],
+                        use_async: bool,
+                        inputs: dict[str, list[str]],
+                        set_assistant: bool,
+                        expected_messages: list[list[dict]],
+                        temperature: int):
+    (mock_client, mock_async_client) = mock_chat_completion
+    client = OpenAIChatService().get_client(model_name="test_model",
+                                            set_assistant=set_assistant,
+                                            temperature=temperature)
+
+    expected_results = ["test_output" for _ in range(len(inputs["prompt"]))]
+    expected_calls = [
+        mock.call(model="test_model", messages=messages, temperature=temperature) for messages in expected_messages
+    ]
+
+    if use_async:
+        results = asyncio.run(client.generate_batch_async(inputs))
+        mock_async_client.chat.completions.create.assert_has_calls(expected_calls, any_order=False)
+        mock_client.chat.completions.create.assert_not_called()
+
+    else:
+        results = client.generate_batch(inputs)
+        mock_client.chat.completions.create.assert_has_calls(expected_calls, any_order=False)
+        mock_async_client.chat.completions.create.assert_not_called()
+
+    assert results == expected_results
+
+
+@pytest.mark.parametrize("completion", [[], [None]], ids=["no_choices", "no_content"])
+@pytest.mark.usefixtures("mock_chat_completion")
+def test_extract_completion_errors(completion: list):
+    client = OpenAIChatService().get_client(model_name="test_model")
+    mock_completion = mk_mock_openai_response(completion)
+
+    with pytest.raises(ValueError):
+        client._extract_completion(mock_completion)
 
 
 def test_get_client():
     service = OpenAIChatService()
     client = service.get_client(model_name="test_model")
 
-    assert isinstance(client, OpenAIChatClient)
+    assert client.model_name == "test_model"
+
+    client = service.get_client(model_name="test_model2", extra_arg="test_arg")
+
+    assert client.model_name == "test_model2"
+    assert client.model_kwargs == {"extra_arg": "test_arg"}
 
 
-@pytest.mark.parametrize("use_json", [True, False])
-@pytest.mark.parametrize("set_assistant", [True, False])
 @pytest.mark.parametrize("temperature", [0, 1, 2])
 @pytest.mark.parametrize("max_retries", [5, 10])
-@mock.patch("morpheus.llm.services.openai_chat_service.OpenAIChatClient")
-def test_get_client_passed_args(mock_client: mock.MagicMock,
-                                set_assistant: bool,
-                                use_json: bool,
+def test_get_client_passed_args(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock],
                                 temperature: int,
                                 max_retries: int):
     service = OpenAIChatService()
-    service.get_client(model_name="test_model",
-                       set_assistant=set_assistant,
-                       json=use_json,
-                       temperature=temperature,
-                       test='this',
-                       max_retries=max_retries)
+    client = service.get_client(model_name="test_model", temperature=temperature, test='this', max_retries=max_retries)
+
+    # Perform a dummy generate call
+    client.generate(prompt="test_prompt")
 
     # Ensure the get_client method passed on the set_assistant and model kwargs
-    mock_client.assert_called_once_with(service,
-                                        model_name="test_model",
-                                        set_assistant=set_assistant,
-                                        json=use_json,
-                                        temperature=temperature,
-                                        test='this',
-                                        max_retries=max_retries)
+    assert_called_once_with_relaxed(mock_chat_completion[0].chat.completions.create,
+                                    model="test_model",
+                                    temperature=temperature,
+                                    test='this')

From aeb05446d7651dd8fa459699d52a1657dac2b47c Mon Sep 17 00:00:00 2001
From: Michael Demoret <mdemoret@nvidia.com>
Date: Wed, 12 Jun 2024 09:49:26 -0700
Subject: [PATCH 12/12] Fixing tests

---
 .../llm/services/nvfoundation_llm_service.py  |  5 ++-
 tests/llm/services/test_llm_service_pipe.py   | 33 ++++++++------
 ...nt.py => test_nvfoundation_llm_service.py} | 44 +++++++++---------
 tests/llm/test_completion_pipe.py             | 45 +++++++++----------
 4 files changed, 65 insertions(+), 62 deletions(-)
 rename tests/llm/services/{test_nvfoundation_llm_client.py => test_nvfoundation_llm_service.py} (75%)

diff --git a/morpheus/llm/services/nvfoundation_llm_service.py b/morpheus/llm/services/nvfoundation_llm_service.py
index 5154158538..62bc355662 100644
--- a/morpheus/llm/services/nvfoundation_llm_service.py
+++ b/morpheus/llm/services/nvfoundation_llm_service.py
@@ -207,7 +207,6 @@ class NVFoundationLLMService(LLMService):
 
     class APIKey(EnvConfigValue):
         _ENV_KEY: str = "NVIDIA_API_KEY"
-        _ALLOW_NONE: bool = True
 
     class BaseURL(EnvConfigValue):
         _ENV_KEY: str = "NVIDIA_API_BASE"
@@ -236,6 +235,10 @@ def _merge_model_kwargs(self, model_kwargs: dict) -> dict:
     def api_key(self):
         return self._api_key.value
 
+    @property
+    def base_url(self):
+        return self._base_url.value
+
     def get_client(self, *, model_name: str, **model_kwargs) -> NVFoundationLLMClient:
         """
         Returns a client for interacting with a specific model. This method is the preferred way to create a client.
diff --git a/tests/llm/services/test_llm_service_pipe.py b/tests/llm/services/test_llm_service_pipe.py
index e6e2f8bbf3..13fb5f652e 100644
--- a/tests/llm/services/test_llm_service_pipe.py
+++ b/tests/llm/services/test_llm_service_pipe.py
@@ -18,12 +18,13 @@
 import cudf
 
 from _utils import assert_results
+from _utils.environment import set_env
 from _utils.llm import mk_mock_openai_response
 from morpheus.config import Config
 from morpheus.llm import LLMEngine
 from morpheus.llm.nodes.extracter_node import ExtracterNode
 from morpheus.llm.nodes.llm_generate_node import LLMGenerateNode
-from morpheus.llm.services.llm_service import LLMService
+from morpheus.llm.services.llm_service import LLMClient
 from morpheus.llm.services.nemo_llm_service import NeMoLLMService
 from morpheus.llm.services.openai_chat_service import OpenAIChatService
 from morpheus.llm.task_handlers.simple_task_handler import SimpleTaskHandler
@@ -35,22 +36,17 @@
 from morpheus.stages.preprocess.deserialize_stage import DeserializeStage
 
 
-def _build_engine(llm_service_cls: type[LLMService]):
-    llm_service = llm_service_cls()
-    llm_clinet = llm_service.get_client(model_name="test_model")
+def _build_engine(llm_client: LLMClient):
 
     engine = LLMEngine()
     engine.add_node("extracter", node=ExtracterNode())
-    engine.add_node("completion", inputs=["/extracter"], node=LLMGenerateNode(llm_client=llm_clinet))
+    engine.add_node("completion", inputs=["/extracter"], node=LLMGenerateNode(llm_client=llm_client))
     engine.add_task_handler(inputs=["/completion"], handler=SimpleTaskHandler())
 
     return engine
 
 
-def _run_pipeline(config: Config,
-                  llm_service_cls: type[LLMService],
-                  country_prompts: list[str],
-                  capital_responses: list[str]):
+def _run_pipeline(config: Config, llm_client: LLMClient, country_prompts: list[str], capital_responses: list[str]):
     """
     Loosely patterned after `examples/llm/completion`
     """
@@ -66,7 +62,7 @@ def _run_pipeline(config: Config,
     pipe.add_stage(
         DeserializeStage(config, message_type=ControlMessage, task_type="llm_engine", task_payload=completion_task))
 
-    pipe.add_stage(LLMEngineStage(config, engine=_build_engine(llm_service_cls)))
+    pipe.add_stage(LLMEngineStage(config, engine=_build_engine(llm_client)))
     sink = pipe.add_stage(CompareDataFrameStage(config, compare_df=expected_df))
 
     pipe.run()
@@ -79,7 +75,13 @@ def test_completion_pipe_nemo(config: Config,
                               country_prompts: list[str],
                               capital_responses: list[str]):
     mock_nemollm.post_process_generate_response.side_effect = [{"text": response} for response in capital_responses]
-    _run_pipeline(config, NeMoLLMService, country_prompts, capital_responses)
+
+    # Set a dummy key to bypass the API key check
+    with set_env(NGC_API_KEY="test"):
+
+        llm_client = NeMoLLMService().get_client(model_name="test_model")
+
+        _run_pipeline(config, llm_client, country_prompts, capital_responses)
 
 
 def test_completion_pipe_openai(config: Config,
@@ -91,7 +93,10 @@ def test_completion_pipe_openai(config: Config,
         mk_mock_openai_response([response]) for response in capital_responses
     ]
 
-    _run_pipeline(config, OpenAIChatService, country_prompts, capital_responses)
+    with set_env(OPENAI_API_KEY="test"):
+        llm_client = OpenAIChatService().get_client(model_name="test_model")
+
+        _run_pipeline(config, llm_client, country_prompts, capital_responses)
 
-    mock_client.chat.completions.create.assert_not_called()
-    mock_async_client.chat.completions.create.assert_called()
+        mock_client.chat.completions.create.assert_not_called()
+        mock_async_client.chat.completions.create.assert_called()
diff --git a/tests/llm/services/test_nvfoundation_llm_client.py b/tests/llm/services/test_nvfoundation_llm_service.py
similarity index 75%
rename from tests/llm/services/test_nvfoundation_llm_client.py
rename to tests/llm/services/test_nvfoundation_llm_service.py
index dc02c1836a..dec76060e8 100644
--- a/tests/llm/services/test_nvfoundation_llm_client.py
+++ b/tests/llm/services/test_nvfoundation_llm_service.py
@@ -25,23 +25,24 @@
 from morpheus.llm.services.nvfoundation_llm_service import NVFoundationLLMService
 
 
-@pytest.mark.usefixtures("restore_environ")
-@pytest.mark.parametrize("api_key", [None, "test_api_key"])
-@pytest.mark.parametrize("set_env", [True, False])
-def test_constructor(api_key: str, set_env: bool):
-    """
-    Test that the constructor prefers explicit arguments over environment variables.
-    """
-    env_api_key = "test_env_api_key"
+@pytest.fixture(name="set_default_nvidia_api_key", autouse=True, scope="function")
+def set_default_nvidia_api_key_fixture():
+    # Must have an API key set to create the openai client
+    with mock.patch.dict(os.environ, clear=True, values={"NVIDIA_API_KEY": "nvapi-testing_api_key"}):
+        yield
 
-    if set_env:
-        os.environ["NVIDIA_API_KEY"] = env_api_key
 
-    service = NVFoundationLLMService(api_key=api_key)
+@pytest.mark.parametrize("api_key", ["nvapi-12345", None])
+@pytest.mark.parametrize("base_url", ["http://test.nvidia.com/v1", None])
+def test_constructor(api_key: str, base_url: bool):
 
-    expected_api_key = api_key if "NVIDIA_API_KEY" not in os.environ else env_api_key
+    service = NVFoundationLLMService(api_key=api_key, base_url=base_url)
 
-    assert service.api_key == expected_api_key
+    if (api_key is None):
+        api_key = os.environ["NVIDIA_API_KEY"]
+
+    assert service.api_key == api_key
+    assert service.base_url == base_url
 
 
 def test_get_client():
@@ -61,7 +62,7 @@ def test_model_kwargs():
 
 
 def test_get_input_names():
-    client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model", additional_arg="test_arg")
+    client = NVFoundationLLMService().get_client(model_name="test_model", additional_arg="test_arg")
 
     assert client.get_input_names() == ["prompt"]
 
@@ -76,7 +77,7 @@ def mock_generation_side_effect(*_, **kwargs):
 
         mock_nvfoundationllm.side_effect = mock_generation_side_effect
 
-        client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model")
+        client = NVFoundationLLMService().get_client(model_name="test_model")
         assert client.generate(prompt="test_prompt") == "test_prompt"
 
 
@@ -90,7 +91,7 @@ def mock_generation_side_effect(*_, **kwargs):
 
         mock_nvfoundationllm.side_effect = mock_generation_side_effect
 
-        client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model")
+        client = NVFoundationLLMService().get_client(model_name="test_model")
 
         assert client.generate_batch({'prompt': ["prompt1", "prompt2"]}) == ["prompt1", "prompt2"]
 
@@ -105,7 +106,7 @@ def mock_generation_side_effect(*_, **kwargs):
 
         mock_nvfoundationllm.side_effect = mock_generation_side_effect
 
-        client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model")
+        client = NVFoundationLLMService().get_client(model_name="test_model")
 
         assert await client.generate_async(prompt="test_prompt") == "test_prompt"
 
@@ -120,7 +121,7 @@ def mock_generation_side_effect(*_, **kwargs):
 
         mock_nvfoundationllm.side_effect = mock_generation_side_effect
 
-        client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model")
+        client = NVFoundationLLMService().get_client(model_name="test_model")
 
         assert await client.generate_batch_async({'prompt': ["prompt1", "prompt2"]})
 
@@ -129,12 +130,11 @@ async def test_generate_batch_async_error():
     with mock.patch("langchain_nvidia_ai_endpoints.ChatNVIDIA.agenerate_prompt", autospec=True) as mock_nvfoundationllm:
 
         def mock_generation_side_effect(*_, **kwargs):
-            return LLMResult(generations=[[ChatGeneration(message=ChatMessage(content=x.text, role="assistant"))]
-                                          for x in kwargs["prompts"]])
+            raise RuntimeError("unittest")
 
         mock_nvfoundationllm.side_effect = mock_generation_side_effect
 
-        client = NVFoundationLLMService(api_key="nvapi-...").get_client(model_name="test_model")
+        client = NVFoundationLLMService().get_client(model_name="test_model")
 
         with pytest.raises(RuntimeError, match="unittest"):
-            await client.generate_batch_async({'prompt': ["prompt1", "prompt2"]})
+            await client.generate_batch_async({'prompt': ["prompt1", "prompt2"]}, return_exceptions=False)
diff --git a/tests/llm/test_completion_pipe.py b/tests/llm/test_completion_pipe.py
index 106eb39586..e57e36f09f 100644
--- a/tests/llm/test_completion_pipe.py
+++ b/tests/llm/test_completion_pipe.py
@@ -28,7 +28,7 @@
 from morpheus.llm.nodes.extracter_node import ExtracterNode
 from morpheus.llm.nodes.llm_generate_node import LLMGenerateNode
 from morpheus.llm.nodes.prompt_template_node import PromptTemplateNode
-from morpheus.llm.services.llm_service import LLMService
+from morpheus.llm.services.llm_service import LLMClient
 from morpheus.llm.services.nemo_llm_service import NeMoLLMService
 from morpheus.llm.services.openai_chat_service import OpenAIChatService
 from morpheus.llm.task_handlers.simple_task_handler import SimpleTaskHandler
@@ -42,9 +42,7 @@
 logger = logging.getLogger(__name__)
 
 
-def _build_engine(llm_service_cls: type[LLMService], model_name: str = "test_model"):
-    llm_service = llm_service_cls()
-    llm_client = llm_service.get_client(model_name=model_name)
+def _build_engine(llm_client: LLMClient):
 
     engine = LLMEngine()
     engine.add_node("extracter", node=ExtracterNode())
@@ -57,11 +55,7 @@ def _build_engine(llm_service_cls: type[LLMService], model_name: str = "test_mod
     return engine
 
 
-def _run_pipeline(config: Config,
-                  llm_service_cls: type[LLMService],
-                  countries: list[str],
-                  capital_responses: list[str],
-                  model_name: str = "test_model") -> dict:
+def _run_pipeline(config: Config, llm_client: LLMClient, countries: list[str], capital_responses: list[str]) -> dict:
     """
     Loosely patterned after `examples/llm/completion`
     """
@@ -81,7 +75,7 @@ def _run_pipeline(config: Config,
                          task_type="llm_engine",
                          task_payload=completion_task))
 
-    pipe.add_stage(LLMEngineStage(config, engine=_build_engine(llm_service_cls, model_name=model_name)))
+    pipe.add_stage(LLMEngineStage(config, engine=_build_engine(llm_client)))
 
     sink = pipe.add_stage(CompareDataFrameStage(config, compare_df=expected_df))
 
@@ -99,8 +93,10 @@ def test_completion_pipe_nemo(config: Config,
     # Set a dummy key to bypass the API key check
     with set_env(NGC_API_KEY="test"):
 
+        llm_client = NeMoLLMService().get_client(model_name="test_model")
+
         mock_nemollm.post_process_generate_response.side_effect = [{"text": response} for response in capital_responses]
-        results = _run_pipeline(config, NeMoLLMService, countries=countries, capital_responses=capital_responses)
+        results = _run_pipeline(config, llm_client, countries=countries, capital_responses=capital_responses)
         assert_results(results)
 
 
@@ -114,20 +110,21 @@ def test_completion_pipe_openai(config: Config,
         mk_mock_openai_response([response]) for response in capital_responses
     ]
 
-    results = _run_pipeline(config, OpenAIChatService, countries=countries, capital_responses=capital_responses)
-    assert_results(results)
-    mock_client.chat.completions.create.assert_not_called()
-    mock_async_client.chat.completions.create.assert_called()
+    with set_env(OPENAI_API_KEY="test"):
+        llm_client = OpenAIChatService().get_client(model_name="test_model")
+
+        results = _run_pipeline(config, llm_client, countries=countries, capital_responses=capital_responses)
+        assert_results(results)
+        mock_client.chat.completions.create.assert_not_called()
+        mock_async_client.chat.completions.create.assert_called()
 
 
 @pytest.mark.usefixtures("nemollm")
 @pytest.mark.usefixtures("ngc_api_key")
 def test_completion_pipe_integration_nemo(config: Config, countries: list[str], capital_responses: list[str]):
-    results = _run_pipeline(config,
-                            NeMoLLMService,
-                            countries=countries,
-                            capital_responses=capital_responses,
-                            model_name="gpt-43b-002")
+    llm_client = NeMoLLMService().get_client(model_name="gpt-43b-002")
+
+    results = _run_pipeline(config, llm_client, countries=countries, capital_responses=capital_responses)
     assert results['diff_cols'] == 0
     assert results['total_rows'] == len(countries)
     assert results['matching_rows'] + results['diff_rows'] == len(countries)
@@ -136,11 +133,9 @@ def test_completion_pipe_integration_nemo(config: Config, countries: list[str],
 @pytest.mark.usefixtures("openai")
 @pytest.mark.usefixtures("openai_api_key")
 def test_completion_pipe_integration_openai(config: Config, countries: list[str], capital_responses: list[str]):
-    results = _run_pipeline(config,
-                            OpenAIChatService,
-                            countries=countries,
-                            capital_responses=capital_responses,
-                            model_name="gpt-3.5-turbo")
+    llm_client = NeMoLLMService().get_client(model_name="gpt-3.5-turbo")
+
+    results = _run_pipeline(config, llm_client, countries=countries, capital_responses=capital_responses)
     assert results['diff_cols'] == 0
     assert results['total_rows'] == len(countries)
     assert results['matching_rows'] + results['diff_rows'] == len(countries)