nv-morpheus · mdemoret-nv · Apr 22, 2024 · Apr 2, 2024 · Apr 3, 2024 · Apr 10, 2024
@@ -112,9 +112,16 @@ input_mappings_t process_input_names(user_input_mappings_t user_inputs, const st
 
         if (found_star_input_name != found_star_node_name)
         {
-            throw std::invalid_argument(
-                "LLMNode::add_node() called with a placeholder input name and node name that "
-                "do not match");
+            if (found_star_input_name)
+            {
+                throw std::invalid_argument(
+                    "LLMNode::add_node() called with a placeholder external name but no placeholder internal name");
+            }
+            else
+            {
+                throw std::invalid_argument(
+                    "LLMNode::add_node() called with a placeholder internal name but no placeholder external name");
+            }
         }
         else if (found_star_input_name && found_star_node_name)
         {

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import importlib
 import logging
 import typing
 from abc import ABC
@@ -133,3 +134,56 @@ def get_client(self, *, model_name: str, **model_kwargs) -> LLMClient:
             Additional keyword arguments to pass to the model.
         """
         pass
+
+    @typing.overload
+    @staticmethod
+    def create(service_type: typing.Literal["nemo"], *service_args,
+               **service_kwargs) -> "morpheus.llm.services.nemo_llm_service.NeMoLLMService":
+        pass
+
+    @typing.overload
+    @staticmethod
+    def create(service_type: typing.Literal["openai"], *service_args,
+               **service_kwargs) -> "morpheus.llm.services.openai_chat_service.OpenAIChatService":
+        pass
+
+    @typing.overload
+    @staticmethod
+    def create(service_type: str, *service_args, **service_kwargs) -> "LLMService":
+        pass
+
+    @staticmethod
+    def create(service_type: str | typing.Literal["nemo"] | typing.Literal["openai"], *service_args, **service_kwargs):
+        """
+        Returns a service for interacting with LLM models.
+        Parameters
+        ----------
+        service_type : str
+            The type of the service to create
+        service_kwargs : dict[str, typing.Any]
+            Additional keyword arguments to pass to the service.
+        """
+        if service_type.lower() == 'openai':
+            llm_or_chat = "chat"
+        else:
+            llm_or_chat = "llm"
+
+        module_name = f"morpheus.llm.services.{service_type.lower()}_{llm_or_chat}_service"
+        module = importlib.import_module(module_name)
+
+        # Get all of the classes in the module to find the correct service class
+        mod_classes = dict([(name, cls) for name, cls in module.__dict__.items() if isinstance(cls, type)])
+
+        class_name_lower = f"{service_type}{llm_or_chat}Service".lower()
+
+        # Find case-insensitive match for the class name
+        matching_classes = [name for name in mod_classes if name.lower() == class_name_lower]
+
+        assert len(matching_classes) == 1, f"Expected to find exactly one class with name {class_name_lower} in module {module_name}, but found {matching_classes}"
+
+        # Create the class
+        class_ = getattr(module, matching_classes[0])
+
+        instance = class_(*service_args, **service_kwargs)
+
+        return instance
@@ -190,7 +190,7 @@ class NeMoLLMService(LLMService):
     A service for interacting with NeMo LLM models, this class should be used to create a client for a specific model.
     """
 
-    def __init__(self, *, api_key: str = None, org_id: str = None, retry_count=5) -> None:
+    def __init__(self, *, api_key: str = None, org_id: str = None, base_url: str = None, retry_count=5) -> None:
         """
         Creates a service for interacting with NeMo LLM models.
 
@@ -203,6 +203,9 @@ def __init__(self, *, api_key: str = None, org_id: str = None, retry_count=5) ->
             The organization ID for the LLM service, by default None. If `None` the organization ID will be read from
             the `NGC_ORG_ID` environment variable. This value is only required if the account associated with the
             `api_key` is a member of multiple NGC organizations., by default None
+        base_url : str, optional
+            The api host url, by default None. If `None` the url will be read from the `NGC_API_BASE` environment
+            variable. If neither are present an error will be raised., by default None
         retry_count : int, optional
             The number of times to retry a request before raising an exception, by default 5
 
@@ -214,11 +217,12 @@ def __init__(self, *, api_key: str = None, org_id: str = None, retry_count=5) ->
         super().__init__()
         api_key = api_key if api_key is not None else os.environ.get("NGC_API_KEY", None)
         org_id = org_id if org_id is not None else os.environ.get("NGC_ORG_ID", None)
+        base_url = base_url if base_url is not None else os.environ.get("NGC_API_BASE", None)
 
         self._retry_count = retry_count
 
         self._conn = nemollm.NemoLLM(
-            api_host=os.environ.get("NGC_API_BASE", None),
+            api_host=base_url,
             # The client must configure the authentication and authorization parameters
             # in accordance with the API server security policy.
             # Configure Bearer authorization

@@ -0,0 +1,177 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import logging
+import os
+import typing
+
+from morpheus.llm.services.llm_service import LLMClient
+from morpheus.llm.services.llm_service import LLMService
+
+logger = logging.getLogger(__name__)
+
+IMPORT_EXCEPTION = None
+IMPORT_ERROR_MESSAGE = (
+    "The `langchain-nvidia-ai-endpoints` package was not found. Install it and other additional dependencies by running the following command:\n"
+    "`conda env update --solver=libmamba -n morpheus "
+    "--file morpheus/conda/environments/dev_cuda-121_arch-x86_64.yaml --prune`")
+
+try:
+    from langchain_core.prompt_values import StringPromptValue
+    from langchain_nvidia_ai_endpoints import ChatNVIDIA
+    from langchain_nvidia_ai_endpoints._common import NVEModel
+except ImportError as import_exc:
+    IMPORT_EXCEPTION = import_exc
+
+
+class NVFoundationLLMClient(LLMClient):
+    """
+    Client for interacting with a specific model in Nemo. This class should be constructed with the
+    `NeMoLLMService.get_client` method.
+    Parameters
+    ----------
+    parent : NeMoLLMService
+        The parent service for this client.
+    model_name : str
+        The name of the model to interact with.
+    model_kwargs : dict[str, typing.Any]
+        Additional keyword arguments to pass to the model when generating text.
+    """
+
+    def __init__(self, parent: "NVFoundationLLMService", *, model_name: str, **model_kwargs) -> None:
+        if IMPORT_EXCEPTION is not None:
+            raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION
+
+        super().__init__()
+
+        assert parent is not None, "Parent service cannot be None."
+
+        self._parent = parent
+        self._model_name = model_name
+        self._model_kwargs = model_kwargs
+        self._prompt_key = "prompt"
+
+        self._client = ChatNVIDIA(client=self._parent._nve_client, model=model_name, **model_kwargs)
+
+    def get_input_names(self) -> list[str]:
+        schema = self._client.get_input_schema()
+
+        return [self._prompt_key]
+
+    def generate(self, **input_dict) -> str:
+        """
+        Issue a request to generate a response based on a given prompt.
+        Parameters
+        ----------
+        input_dict : dict
+            Input containing prompt data.
+        """
+        return self.generate_batch({self._prompt_key: [input_dict[self._prompt_key]]})[0]
+
+    async def generate_async(self, **input_dict) -> str:
+        """
+        Issue an asynchronous request to generate a response based on a given prompt.
+        Parameters
+        ----------
+        input_dict : dict
+            Input containing prompt data.
+        """
+
+        inputs = {self._prompt_key: [input_dict[self._prompt_key]]}
+
+        input_dict.pop(self._prompt_key)
+
+        return (await self.generate_batch_async(inputs=inputs, **input_dict))[0]
+
+    def generate_batch(self, inputs: dict[str, list], **kwargs) -> list[str]:
+        """
+        Issue a request to generate a list of responses based on a list of prompts.
+        Parameters
+        ----------
+        inputs : dict
+            Inputs containing prompt data.
+        """
+        prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]]
+
+        responses = self._client.generate_prompt(prompts=prompts, **self._model_kwargs)  # type: ignore
+
+        return [g[0].text for g in responses.generations]
+
+    async def generate_batch_async(self, inputs: dict[str, list], **kwargs) -> list[str]:
+        """
+        Issue an asynchronous request to generate a list of responses based on a list of prompts.
+        Parameters
+        ----------
+        inputs : dict
+            Inputs containing prompt data.
+        """
+
+        prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]]
+
+        final_kwargs = {**self._model_kwargs, **kwargs}
+
+        responses = await self._client.agenerate_prompt(prompts=prompts, **final_kwargs)  # type: ignore
+
+        return [g[0].text for g in responses.generations]
+
+
+class NVFoundationLLMService(LLMService):
+    """
+    A service for interacting with NeMo LLM models, this class should be used to create a client for a specific model.
+    Parameters
+    ----------
+    api_key : str, optional
+        The API key for the LLM service, by default None. If `None` the API key will be read from the `NGC_API_KEY`
+        environment variable. If neither are present an error will be raised.
+    org_id : str, optional
+        The organization ID for the LLM service, by default None. If `None` the organization ID will be read from the
+        `NGC_ORG_ID` environment variable. This value is only required if the account associated with the `api_key` is
+        a member of multiple NGC organizations.
+    base_url : str, optional
+            The api host url, by default None. If `None` the url will be read from the `NVIDIA_API_BASE` environment
+            variable. If neither are present `https://api.nvcf.nvidia.com/v2` will be used., by default None
+    """
+
+    def __init__(self, *, api_key: str = None, base_url: str = None) -> None:
+        if IMPORT_EXCEPTION is not None:
+            raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION
+
+        super().__init__()
+
+        self._api_key = api_key
+        if base_url is None:
+            self._base_url = os.getenv('NVIDIA_API_BASE', 'https://api.nvcf.nvidia.com/v2')
+        else:
+            self._base_url = base_url
+
+        self._nve_client = NVEModel(
+            nvidia_api_key=self._api_key,
+            fetch_url_format=f"{self._base_url}/nvcf/pexec/status/",
+            call_invoke_base=f"{self._base_url}/nvcf/pexec/functions",
+            func_list_format=f"{self._base_url}/nvcf/functions",
+        )  # type: ignore
+
+    def get_client(self, *, model_name: str, **model_kwargs) -> NVFoundationLLMClient:
+        """
+        Returns a client for interacting with a specific model. This method is the preferred way to create a client.
+        Parameters
+        ----------
+        model_name : str
+            The name of the model to create a client for.
+        model_kwargs : dict[str, typing.Any]
+            Additional keyword arguments to pass to the model when generating text.
+        """
+
+        return NVFoundationLLMClient(self, model_name=model_name, **model_kwargs)
@@ -113,8 +113,8 @@ def __init__(self,
         self._model_kwargs = copy.deepcopy(model_kwargs)
 
         # Create the client objects for both sync and async
-        self._client = openai.OpenAI(max_retries=max_retries)
-        self._client_async = openai.AsyncOpenAI(max_retries=max_retries)
+        self._client = openai.OpenAI(api_key=parent._api_key, base_url=parent._base_url, max_retries=max_retries)
+        self._client_async = openai.AsyncOpenAI(api_key=parent._api_key, base_url=parent._base_url, max_retries=max_retries)
 
     def get_input_names(self) -> list[str]:
         input_names = [self._prompt_key]
@@ -316,12 +316,18 @@ class OpenAIChatService(LLMService):
     A service for interacting with OpenAI Chat models, this class should be used to create clients.
     """
 
-    def __init__(self, *, default_model_kwargs: dict = None) -> None:
+    def __init__(self, *, api_key: str = None, base_url: str = None, default_model_kwargs: dict = None) -> None:
         """
         Creates a service for interacting with OpenAI Chat models, this class should be used to create clients.
 
         Parameters
         ----------
+        api_key : str, optional
+            The API key for the LLM service, by default None. If `None` the API key will be read from the
+            `OPENAI_API_KEY` environment variable. If neither are present an error will be raised.
+        base_url : str, optional
+            The api host url, by default None. If `None` the url will be read from the `OPENAI_API_BASE` environment
+            variable. If neither are present the OpenAI default will be used., by default None
         default_model_kwargs : dict, optional
             Default arguments to use when creating a client via the `get_client` function. Any argument specified here
             will automatically be used when calling `get_client`. Arguments specified in the `get_client` function will
@@ -338,6 +344,9 @@ def __init__(self, *, default_model_kwargs: dict = None) -> None:
 
         super().__init__()
 
+        self._api_key = api_key
+        self._base_url = base_url
+
         self._default_model_kwargs = default_model_kwargs or {}
 
         self._logger = logging.getLogger(f"{__package__}.{OpenAIChatService.__name__}")

@@ -0,0 +1,13 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.