Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LLMService Creation Changes #1631

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions morpheus/_lib/src/llm/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,16 @@ input_mappings_t process_input_names(user_input_mappings_t user_inputs, const st

if (found_star_input_name != found_star_node_name)
{
throw std::invalid_argument(
"LLMNode::add_node() called with a placeholder input name and node name that "
"do not match");
if (found_star_input_name)
{
throw std::invalid_argument(
"LLMNode::add_node() called with a placeholder external name but no placeholder internal name");
}
else
{
throw std::invalid_argument(
"LLMNode::add_node() called with a placeholder internal name but no placeholder external name");
}
}
else if (found_star_input_name && found_star_node_name)
{
Expand Down
54 changes: 54 additions & 0 deletions morpheus/llm/services/llm_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import importlib
import logging
import typing
from abc import ABC
Expand Down Expand Up @@ -133,3 +134,56 @@ def get_client(self, *, model_name: str, **model_kwargs) -> LLMClient:
Additional keyword arguments to pass to the model.
"""
pass

@typing.overload
@staticmethod
def create(service_type: typing.Literal["nemo"], *service_args,
**service_kwargs) -> "morpheus.llm.services.nemo_llm_service.NeMoLLMService":
pass

@typing.overload
@staticmethod
def create(service_type: typing.Literal["openai"], *service_args,
**service_kwargs) -> "morpheus.llm.services.openai_chat_service.OpenAIChatService":
pass

@typing.overload
@staticmethod
def create(service_type: str, *service_args, **service_kwargs) -> "LLMService":
pass

@staticmethod
def create(service_type: str | typing.Literal["nemo"] | typing.Literal["openai"], *service_args, **service_kwargs):
mdemoret-nv marked this conversation as resolved.
Show resolved Hide resolved
"""
Returns a service for interacting with LLM models.
Parameters
----------
service_type : str
The type of the service to create
service_kwargs : dict[str, typing.Any]
Additional keyword arguments to pass to the service.
mdemoret-nv marked this conversation as resolved.
Show resolved Hide resolved
"""
if service_type.lower() == 'openai':
llm_or_chat = "chat"
else:
llm_or_chat = "llm"

module_name = f"morpheus.llm.services.{service_type.lower()}_{llm_or_chat}_service"
module = importlib.import_module(module_name)

# Get all of the classes in the module to find the correct service class
mod_classes = dict([(name, cls) for name, cls in module.__dict__.items() if isinstance(cls, type)])

class_name_lower = f"{service_type}{llm_or_chat}Service".lower()

# Find case-insensitive match for the class name
matching_classes = [name for name in mod_classes if name.lower() == class_name_lower]

assert len(matching_classes) == 1, f"Expected to find exactly one class with name {class_name_lower} in module {module_name}, but found {matching_classes}"

# Create the class
class_ = getattr(module, matching_classes[0])

instance = class_(*service_args, **service_kwargs)

return instance
8 changes: 6 additions & 2 deletions morpheus/llm/services/nemo_llm_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ class NeMoLLMService(LLMService):
A service for interacting with NeMo LLM models, this class should be used to create a client for a specific model.
"""

def __init__(self, *, api_key: str = None, org_id: str = None, retry_count=5) -> None:
def __init__(self, *, api_key: str = None, org_id: str = None, base_url: str = None, retry_count=5) -> None:
"""
Creates a service for interacting with NeMo LLM models.

Expand All @@ -203,6 +203,9 @@ def __init__(self, *, api_key: str = None, org_id: str = None, retry_count=5) ->
The organization ID for the LLM service, by default None. If `None` the organization ID will be read from
the `NGC_ORG_ID` environment variable. This value is only required if the account associated with the
`api_key` is a member of multiple NGC organizations., by default None
base_url : str, optional
The api host url, by default None. If `None` the url will be read from the `NGC_API_BASE` environment
variable. If neither are present an error will be raised., by default None
retry_count : int, optional
The number of times to retry a request before raising an exception, by default 5

Expand All @@ -214,11 +217,12 @@ def __init__(self, *, api_key: str = None, org_id: str = None, retry_count=5) ->
super().__init__()
api_key = api_key if api_key is not None else os.environ.get("NGC_API_KEY", None)
org_id = org_id if org_id is not None else os.environ.get("NGC_ORG_ID", None)
base_url = base_url if base_url is not None else os.environ.get("NGC_API_BASE", None)
shawn-davis marked this conversation as resolved.
Show resolved Hide resolved

self._retry_count = retry_count

self._conn = nemollm.NemoLLM(
api_host=os.environ.get("NGC_API_BASE", None),
api_host=base_url,
# The client must configure the authentication and authorization parameters
# in accordance with the API server security policy.
# Configure Bearer authorization
Expand Down
177 changes: 177 additions & 0 deletions morpheus/llm/services/nvfoundation_llm_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import logging
import os
import typing

from morpheus.llm.services.llm_service import LLMClient
from morpheus.llm.services.llm_service import LLMService

logger = logging.getLogger(__name__)

IMPORT_EXCEPTION = None
IMPORT_ERROR_MESSAGE = (
"The `langchain-nvidia-ai-endpoints` package was not found. Install it and other additional dependencies by running the following command:\n"
"`conda env update --solver=libmamba -n morpheus "
"--file morpheus/conda/environments/dev_cuda-121_arch-x86_64.yaml --prune`")

try:
from langchain_core.prompt_values import StringPromptValue
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_nvidia_ai_endpoints._common import NVEModel
except ImportError as import_exc:
IMPORT_EXCEPTION = import_exc


class NVFoundationLLMClient(LLMClient):
"""
Client for interacting with a specific model in Nemo. This class should be constructed with the
`NeMoLLMService.get_client` method.
Parameters
----------
parent : NeMoLLMService
The parent service for this client.
model_name : str
The name of the model to interact with.
model_kwargs : dict[str, typing.Any]
Additional keyword arguments to pass to the model when generating text.
"""

def __init__(self, parent: "NVFoundationLLMService", *, model_name: str, **model_kwargs) -> None:
if IMPORT_EXCEPTION is not None:
raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION

super().__init__()

assert parent is not None, "Parent service cannot be None."

self._parent = parent
self._model_name = model_name
self._model_kwargs = model_kwargs
self._prompt_key = "prompt"

self._client = ChatNVIDIA(client=self._parent._nve_client, model=model_name, **model_kwargs)

def get_input_names(self) -> list[str]:
schema = self._client.get_input_schema()

return [self._prompt_key]

def generate(self, **input_dict) -> str:
"""
Issue a request to generate a response based on a given prompt.
Parameters
----------
input_dict : dict
Input containing prompt data.
"""
return self.generate_batch({self._prompt_key: [input_dict[self._prompt_key]]})[0]

async def generate_async(self, **input_dict) -> str:
"""
Issue an asynchronous request to generate a response based on a given prompt.
Parameters
----------
input_dict : dict
Input containing prompt data.
"""

inputs = {self._prompt_key: [input_dict[self._prompt_key]]}

input_dict.pop(self._prompt_key)

return (await self.generate_batch_async(inputs=inputs, **input_dict))[0]

def generate_batch(self, inputs: dict[str, list], **kwargs) -> list[str]:
"""
Issue a request to generate a list of responses based on a list of prompts.
Parameters
----------
inputs : dict
Inputs containing prompt data.
"""
prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]]

responses = self._client.generate_prompt(prompts=prompts, **self._model_kwargs) # type: ignore

return [g[0].text for g in responses.generations]

async def generate_batch_async(self, inputs: dict[str, list], **kwargs) -> list[str]:
"""
Issue an asynchronous request to generate a list of responses based on a list of prompts.
Parameters
----------
inputs : dict
Inputs containing prompt data.
"""

prompts = [StringPromptValue(text=p) for p in inputs[self._prompt_key]]

final_kwargs = {**self._model_kwargs, **kwargs}

responses = await self._client.agenerate_prompt(prompts=prompts, **final_kwargs) # type: ignore

return [g[0].text for g in responses.generations]


class NVFoundationLLMService(LLMService):
"""
A service for interacting with NeMo LLM models, this class should be used to create a client for a specific model.
Parameters
----------
api_key : str, optional
The API key for the LLM service, by default None. If `None` the API key will be read from the `NGC_API_KEY`
environment variable. If neither are present an error will be raised.
org_id : str, optional
The organization ID for the LLM service, by default None. If `None` the organization ID will be read from the
`NGC_ORG_ID` environment variable. This value is only required if the account associated with the `api_key` is
a member of multiple NGC organizations.
base_url : str, optional
The api host url, by default None. If `None` the url will be read from the `NVIDIA_API_BASE` environment
variable. If neither are present `https://api.nvcf.nvidia.com/v2` will be used., by default None
mdemoret-nv marked this conversation as resolved.
Show resolved Hide resolved
"""

def __init__(self, *, api_key: str = None, base_url: str = None) -> None:
if IMPORT_EXCEPTION is not None:
raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION

super().__init__()

self._api_key = api_key
if base_url is None:
self._base_url = os.getenv('NVIDIA_API_BASE', 'https://api.nvcf.nvidia.com/v2')
else:
self._base_url = base_url

self._nve_client = NVEModel(
nvidia_api_key=self._api_key,
fetch_url_format=f"{self._base_url}/nvcf/pexec/status/",
call_invoke_base=f"{self._base_url}/nvcf/pexec/functions",
func_list_format=f"{self._base_url}/nvcf/functions",
) # type: ignore

def get_client(self, *, model_name: str, **model_kwargs) -> NVFoundationLLMClient:
"""
Returns a client for interacting with a specific model. This method is the preferred way to create a client.
Parameters
----------
model_name : str
The name of the model to create a client for.
model_kwargs : dict[str, typing.Any]
Additional keyword arguments to pass to the model when generating text.
"""

return NVFoundationLLMClient(self, model_name=model_name, **model_kwargs)
15 changes: 12 additions & 3 deletions morpheus/llm/services/openai_chat_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ def __init__(self,
self._model_kwargs = copy.deepcopy(model_kwargs)

# Create the client objects for both sync and async
self._client = openai.OpenAI(max_retries=max_retries)
self._client_async = openai.AsyncOpenAI(max_retries=max_retries)
self._client = openai.OpenAI(api_key=parent._api_key, base_url=parent._base_url, max_retries=max_retries)
self._client_async = openai.AsyncOpenAI(api_key=parent._api_key, base_url=parent._base_url, max_retries=max_retries)

def get_input_names(self) -> list[str]:
input_names = [self._prompt_key]
Expand Down Expand Up @@ -316,12 +316,18 @@ class OpenAIChatService(LLMService):
A service for interacting with OpenAI Chat models, this class should be used to create clients.
"""

def __init__(self, *, default_model_kwargs: dict = None) -> None:
def __init__(self, *, api_key: str = None, base_url: str = None, default_model_kwargs: dict = None) -> None:
"""
Creates a service for interacting with OpenAI Chat models, this class should be used to create clients.

Parameters
----------
api_key : str, optional
The API key for the LLM service, by default None. If `None` the API key will be read from the
`OPENAI_API_KEY` environment variable. If neither are present an error will be raised.
base_url : str, optional
The api host url, by default None. If `None` the url will be read from the `OPENAI_API_BASE` environment
variable. If neither are present the OpenAI default will be used., by default None
default_model_kwargs : dict, optional
Default arguments to use when creating a client via the `get_client` function. Any argument specified here
will automatically be used when calling `get_client`. Arguments specified in the `get_client` function will
Expand All @@ -338,6 +344,9 @@ def __init__(self, *, default_model_kwargs: dict = None) -> None:

super().__init__()

self._api_key = api_key
self._base_url = base_url

self._default_model_kwargs = default_model_kwargs or {}

self._logger = logging.getLogger(f"{__package__}.{OpenAIChatService.__name__}")
Expand Down
13 changes: 13 additions & 0 deletions morpheus/llm/services/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading
Loading