diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index f64e0d3b32..dfac10e129 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -27,7 +27,6 @@ services: volumes: - ${HOST_MORPHEUS_ROOT}/models:/models - ${HOST_MORPHEUS_ROOT}/examples/abp_pcap_detection/abp-pcap-xgb:/models/triton-model-repo/abp-pcap-xgb - - ${HOST_MORPHEUS_ROOT}/examples/ransomware_detection/models/ransomw-model-short-rf:/models/triton-model-repo/ransomw-model-short-rf zookeeper: image: bitnami/zookeeper:latest diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 1af5a22f0c..6651dcdb15 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -113,3 +113,16 @@ sed_runner "s/${CURRENT_SHORT_TAG}/${NEXT_SHORT_TAG}/g" docs/source/getting_star # models/model-cards sed_runner "s|blob/branch-${CURRENT_SHORT_TAG}|blob/branch-${NEXT_SHORT_TAG}|g" models/model-cards/*.md sed_runner "s|tree/branch-${CURRENT_SHORT_TAG}|tree/branch-${NEXT_SHORT_TAG}|g" models/model-cards/*.md + +# Update the version of the Morpheus model container +# We need to update several files, however we need to avoid symlinks as well as the build and .cache directories +DOCS_MD_FILES=$(find -P ./docs/source/ -type f -iname "*.md") +EXAMPLES_MD_FILES=$(find -P ./examples/ -type f -iname "*.md") +sed_runner "s|morpheus-tritonserver-models:${CURRENT_SHORT_TAG}|morpheus-tritonserver-models:${NEXT_SHORT_TAG}|g" \ + ${DOCS_MD_FILES} \ + ${EXAMPLES_MD_FILES} \ + .devcontainer/docker-compose.yml \ + examples/sid_visualization/docker-compose.yml \ + models/triton-model-repo/README.md \ + scripts/validation/val-globals.sh \ + tests/benchmarks/README.md diff --git a/docker/Dockerfile b/docker/Dockerfile index 875d935c7d..6cdfd57f1b 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -294,7 +294,7 @@ COPY "${MORPHEUS_ROOT_HOST}/conda/environments/*.yaml" "./conda/environments/" COPY "${MORPHEUS_ROOT_HOST}/docker" "./docker" COPY --from=build_docs "/workspace/build/docs/html" "./docs" COPY "${MORPHEUS_ROOT_HOST}/examples" "./examples" -COPY "${MORPHEUS_ROOT_HOST}/models" "./models" +COPY "${MORPHEUS_ROOT_HOST}/models/datasets" "./models/datasets" COPY "${MORPHEUS_ROOT_HOST}/scripts" "./scripts" COPY "${MORPHEUS_ROOT_HOST}/*.md" "./" COPY "${MORPHEUS_ROOT_HOST}/LICENSE" "./" diff --git a/docker/build_container_release.sh b/docker/build_container_release.sh index 01d505c48a..fc6a22c647 100755 --- a/docker/build_container_release.sh +++ b/docker/build_container_release.sh @@ -27,7 +27,7 @@ export DOCKER_TARGET=${DOCKER_TARGET:-"runtime"} popd &> /dev/null # Fetch data -"${SCRIPT_DIR}/../scripts/fetch_data.py" fetch docs examples models +"${SCRIPT_DIR}/../scripts/fetch_data.py" fetch docs examples datasets # Call the general build script ${SCRIPT_DIR}/build_container.sh diff --git a/docker/run_container_release.sh b/docker/run_container_release.sh index 7a60d75faf..53851abae2 100755 --- a/docker/run_container_release.sh +++ b/docker/run_container_release.sh @@ -37,7 +37,7 @@ DOCKER_EXTRA_ARGS=${DOCKER_EXTRA_ARGS:-""} popd &> /dev/null -DOCKER_ARGS="--runtime=nvidia --env WORKSPACE_VOLUME=${PWD} -v $PWD/models:/workspace/models --net=host --gpus=all --cap-add=sys_nice ${DOCKER_EXTRA_ARGS}" +DOCKER_ARGS="--runtime=nvidia --env WORKSPACE_VOLUME=${PWD} --net=host --gpus=all --cap-add=sys_nice ${DOCKER_EXTRA_ARGS}" if [[ -n "${SSH_AUTH_SOCK}" ]]; then echo -e "${b}Setting up ssh-agent auth socket${x}" diff --git a/docs/source/basics/building_a_pipeline.md b/docs/source/basics/building_a_pipeline.md index 395cc12f24..9aa1aa8645 100644 --- a/docs/source/basics/building_a_pipeline.md +++ b/docs/source/basics/building_a_pipeline.md @@ -211,9 +211,9 @@ Pipeline visualization saved to .tmp/multi_monitor_throughput.png This example shows an NLP Pipeline which uses several stages available in Morpheus. This example utilizes the Triton Inference Server to perform inference, and writes the output to a Kafka topic named `inference_output`. Both of which need to be started prior to launching Morpheus. #### Launching Triton -From the Morpheus repo root directory, run the following to launch Triton and load the `sid-minibert` model: +Run the following to launch Triton and load the `sid-minibert` model: ```bash -docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:23.06-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-onnx +docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-onnx ``` #### Launching Kafka diff --git a/docs/source/developer_guide/contributing.md b/docs/source/developer_guide/contributing.md index 1c9299f1a4..2ccdb1a2d1 100644 --- a/docs/source/developer_guide/contributing.md +++ b/docs/source/developer_guide/contributing.md @@ -129,7 +129,7 @@ This workflow utilizes a Docker container to set up most dependencies ensuring a ``` 1. The container tag follows the same rules as `build_container_dev.sh` and will default to the current `YYMMDD`. Specify the desired tag with `DOCKER_IMAGE_TAG`. i.e. `DOCKER_IMAGE_TAG=my_tag ./docker/run_container_dev.sh` 2. This will automatically mount the current working directory to `/workspace`. - 3. Some of the validation tests require launching a Triton Docker container within the Morpheus container. To enable this you will need to grant the Morpheus container access to your host OS's Docker socket file with: + 3. Some of the validation tests require launching the Morpheus models Docker container within the Morpheus container. To enable this you will need to grant the Morpheus container access to your host OS's Docker socket file with: ```bash DOCKER_EXTRA_ARGS="-v /var/run/docker.sock:/var/run/docker.sock" ./docker/run_container_dev.sh ``` @@ -235,6 +235,13 @@ git submodule update --init --recursive ``` At this point, Morpheus can be fully used. Any changes to Python code will not require a rebuild. Changes to C++ code will require calling `./scripts/compile.sh`. Installing Morpheus is only required once per virtual environment. +### Build the Morpheus Models Container + +From the root of the Morpheus repository run the following command: +```bash +models/docker/build_container.sh +``` + ### Quick Launch Kafka Cluster Launching a full production Kafka cluster is outside the scope of this project; however, if a quick cluster is needed for testing or development, one can be quickly launched via Docker Compose. The following commands outline that process. Refer to [this](https://medium.com/big-data-engineering/hello-kafka-world-the-complete-guide-to-kafka-with-docker-and-python-f788e2588cfc) guide for more in-depth information: diff --git a/docs/source/developer_guide/guides/2_real_world_phishing.md b/docs/source/developer_guide/guides/2_real_world_phishing.md index 7dc24ae993..16b2b30f3d 100644 --- a/docs/source/developer_guide/guides/2_real_world_phishing.md +++ b/docs/source/developer_guide/guides/2_real_world_phishing.md @@ -221,22 +221,21 @@ In the above the `needed_columns` were provided to as an argument to the `stage` ## Predicting Fraudulent Emails with Accelerated Machine Learning -Now we'll use the `RecipientFeaturesStage` that we just made in a real-world pipeline to detect fraudulent emails. The pipeline we will be building makes use of the `TritonInferenceStage` which is a pre-defined Morpheus stage designed to support the execution of Natural Language Processing (NLP) models via NVIDIA's [Triton Inference Server](https://developer.nvidia.com/nvidia-triton-inference-server). NVIDIA Triton Inference Server allows for GPU accelerated ML/DL and seamless co-location and execution of a wide variety of model frameworks. For our application, we will be using the `phishing-bert-onnx` model, which is included with Morpheus in the `models/triton-model-repo/` directory. +Now we'll use the `RecipientFeaturesStage` that we just made in a real-world pipeline to detect fraudulent emails. The pipeline we will be building makes use of the `TritonInferenceStage` which is a pre-defined Morpheus stage designed to support the execution of Natural Language Processing (NLP) models via NVIDIA's [Triton Inference Server](https://developer.nvidia.com/nvidia-triton-inference-server). NVIDIA Triton Inference Server allows for GPU accelerated ML/DL and seamless co-location and execution of a wide variety of model frameworks. For our application, we will be using the `phishing-bert-onnx` model, which is included with Morpheus models Docker container as well as in the `models/triton-model-repo/phishing-bert-onnx` directory. It's important to note here that Triton is a service that is external to the Morpheus pipeline and often will not reside on the same machine(s) as the rest of the pipeline. The `TritonInferenceStage` will use HTTP and [gRPC](https://grpc.io/) network protocols to allow us to interact with the machine learning models that are hosted by the Triton server. ### Launching Triton -Triton will need to be running while we execute our pipeline. For simplicity, we will launch it locally inside of a Docker container. +Triton will need to be running while we execute our pipeline. For simplicity, we will be using the Morpheus models container which includes both Trtion and the Morpheus models. > **Note**: This step assumes you have both [Docker](https://docs.docker.com/engine/install/) and the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installation-guide) installed. -From the root of the Morpheus project we will launch a Triton Docker container with the `models` directory mounted into the container: +We will launch a Triton Docker container with: ```shell docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 \ - -v $PWD/models:/models \ - nvcr.io/nvidia/tritonserver:23.06-py3 \ + nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 \ tritonserver --model-repository=/models/triton-model-repo \ --exit-on-error=false \ --log-info=true \ @@ -381,7 +380,7 @@ From this information, we note that the expected dimensions of the model inputs ### Defining our Pipeline For this pipeline we will have several configuration parameters such as the paths to the input and output files, we will be using the (click)[https://click.palletsprojects.com/] library to expose and parse these parameters as command line arguments. We will also expose the choice of using the class or function based stage implementation via the `--use_stage_function` command-line flag. -> **Note**: For simplicity, we assume that the `MORPHEUS_ROOT` environment variable is set to the root of the Morpheus project repository. +> **Note**: For simplicity, we assume that the `MORPHEUS_ROOT` environment variable is set to the root of the Morpheus project repository. To start, we will need to instantiate and set a few attributes of the `Config` class. This object is used for configuration options that are global to the pipeline as a whole. We will provide this object to each stage along with stage-specific configuration parameters. @@ -402,7 +401,7 @@ The `feature_length` property needs to match the dimensions of the model inputs, Ground truth classification labels are read from the `morpheus/data/labels_phishing.txt` file included in Morpheus. -Now that our config object is populated, we move on to the pipeline itself. We will be using the same input file from the previous example. +Now that our config object is populated, we move on to the pipeline itself. We will be using the same input file from the previous example. Next, we will add our custom recipient features stage to the pipeline. We imported both implementations of the stage, allowing us to add the appropriate one based on the `use_stage_function` value provided by the command-line. diff --git a/docs/source/examples.md b/docs/source/examples.md index b1f7d6b9d5..8596e7de5d 100644 --- a/docs/source/examples.md +++ b/docs/source/examples.md @@ -33,9 +33,14 @@ Ensure the environment is set up by following [Getting Started with Morpheus](./ ## Environments -Morpheus supports multiple environments, each environment is intended to support a given use-case. Each example documents which environments it is able to run in. With the exception of the Morpheus Release Container, the examples require fetching the model and example datasets via the `fetch_data.sh` script: +Morpheus supports multiple environments, each environment is intended to support a given use-case. Each example documents which environments it is able to run in. With the exception of the Morpheus Release Container, the examples require fetching both the `datasets` and `examples` dataset via the `fetch_data.sh` script: ```bash -./scripts/fetch_data.py fetch examples models +./scripts/fetch_data.py fetch examples datasets +``` + +In addition to this many of the examples utilize the Morpheus Triton Models container which can be obtained by running the following command: +```bash +docker pull nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 ``` The following are the supported environments: diff --git a/docs/source/getting_started.md b/docs/source/getting_started.md index 79ef4a5ccb..7abe40bd1f 100644 --- a/docs/source/getting_started.md +++ b/docs/source/getting_started.md @@ -17,10 +17,14 @@ limitations under the License. # Getting Started with Morpheus -There are two ways to get started with Morpheus: +There are three ways to get started with Morpheus: +- [Using pre-built Docker containers](#using-pre-built-docker-containers) - [Building the Morpheus Docker container](#building-the-morpheus-container) - [Building Morpheus from source](./developer_guide/contributing.md#building-from-source) +The [pre-built Docker containers](#using-pre-built-docker-containers) are the easiest way to get started with the latest release of Morpheus. Released versions of Morpheus containers can be found on [NGC](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/morpheus/collections/morpheus_). + +More advanced users, or those who are interested in using the latest pre-release features, will need to [build the Morpheus container](#building-the-morpheus-container) or [build from source](./developer_guide/contributing.md#building-from-source). ## Requirements - Volta architecture GPU or better @@ -33,6 +37,47 @@ There are two ways to get started with Morpheus: > > The Morpheus documentation and examples assume that the [Manage Docker as a non-root user](https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user) post install step has been performed allowing Docker commands to be executed by a non-root user. This is not strictly necessary so long as the current user has `sudo` privileges to execute Docker commands. +## Using pre-built Docker containers +### Pull the Morpheus Image +1. Go to [https://catalog.ngc.nvidia.com/orgs/nvidia/teams/morpheus/containers/morpheus/tags](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/morpheus/containers/morpheus/tags) +1. Choose a version +1. Download the selected version, for example for `24.10`: + ```bash + docker pull nvcr.io/nvidia/morpheus/morpheus:24.10-runtime + ``` +1. Optional, many of the examples require NVIDIA Triton Inference Server to be running with the included models. To download the Morpheus Triton Server Models container (ensure that the version number matches that of the Morpheus container you downloaded in the previous step): + ```bash + docker pull nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 + ``` + +> **Note about Morpheus versions:** +> +> Morpheus uses Calendar Versioning ([CalVer](https://calver.org/)). For each Morpheus release there will be an image tagged in the form of `YY.MM-runtime` this tag will always refer to the latest point release for that version. In addition to this there will also be at least one point release version tagged in the form of `vYY.MM.00-runtime` this will be the initial point release for that version (ex. `v24.10.00-runtime`). In the event of a major bug, we may release additional point releases (ex. `v24.10.01-runtime`, `v24.10.02-runtime` etc...), and the `YY.MM-runtime` tag will be updated to reference that point release. +> +> Users who want to ensure they are running with the latest bug fixes should use a release image tag (`YY.MM-runtime`). Users who need to deploy a specific version into production should use a point release image tag (`vYY.MM.00-runtime`). + +### Starting the Morpheus Container +1. Ensure that [The NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker) is installed. +1. Start the container downloaded from the previous section: +```bash +docker run --rm -ti --runtime=nvidia --gpus=all --net=host -v /var/run/docker.sock:/var/run/docker.sock nvcr.io/nvidia/morpheus/morpheus:24.10-runtime bash +``` + +Note about some of the flags above: +| Flag | Description | +| ---- | ----------- | +| `--runtime=nvidia` | Choose the NVIDIA docker runtime, this enables access to the GPU inside the container. This flag isn't needed if the `nvidia` runtime is already set as the default runtime for Docker. | +| `--gpus=all` | Specify which GPUs the container has access to. Alternately, a specific GPU could be chosen with `--gpus=` | +| `--net=host` | Most of the Morpheus pipelines utilize [NVIDIA Triton Inference Server](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver), which will be running in another container. For simplicity we will give the container access to the host system's network, production deployments may opt for an explicit network configuration. | +| `-v /var/run/docker.sock:/var/run/docker.sock` | Enables access to the Docker socket file from within the running container, this allows launching other Docker containers from within the Morpheus container. This flag is required for launching Triton with access to the included Morpheus models, users with their own models can omit this. | + +Once launched, users wishing to launch Triton using the included Morpheus models will need to install the Docker tools in the Morpheus container by running: +```bash +./external/utilities/docker/install_docker.sh +``` + +Skip ahead to the [Acquiring the Morpheus Models Container](#acquiring-the-morpheus-models-container) section. + ## Building the Morpheus Container ### Clone the Repository @@ -57,6 +102,7 @@ scripts/fetch_data.py fetch [...] At time of writing the defined datasets are: * all - Metaset includes all others +* datasets - Input files needed for many of the examples * docs - Graphics needed for documentation * examples - Data needed by scripts in the `examples` subdir * models - Morpheus models (largest dataset) @@ -100,14 +146,24 @@ The `./docker/run_container_release.sh` script accepts the same `DOCKER_IMAGE_NA DOCKER_IMAGE_TAG="v24.10.00-runtime" ./docker/run_container_release.sh ``` -## Launching Triton Server +## Acquiring the Morpheus Models Container -Many of the validation tests and example workflows require a Triton server to function. In a new terminal, from the root of the Morpheus repo, use the following command to launch a Docker container for Triton loading all of the included pre-trained models: +Many of the validation tests and example workflows require a Triton server to function. For simplicity Morpheus provides a pre-built models container which contains both Triton and the Morpheus models. Users using a release version of Morpheus can download the corresponding Triton models container from NGC with the following command: +```bash +docker pull nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 +``` +Users working with an unreleased development version of Morpheus can build the Triton models container from the Morpheus repository. To build the Triton models container, from the root of the Morpheus repository run the following command: +```bash +models/docker/build_container.sh +``` + +## Launching Triton Server + +In a new terminal use the following command to launch a Docker container for Triton loading all of the included pre-trained models: ```bash docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 \ - -v $PWD/models:/models \ - nvcr.io/nvidia/tritonserver:23.06-py3 \ + nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 \ tritonserver --model-repository=/models/triton-model-repo \ --exit-on-error=false \ --log-info=true \ @@ -119,6 +175,19 @@ This will launch Triton using the default network ports (8000 for HTTP, 8001 for Note: The above command is useful for testing out Morpheus, however it does load several models into GPU memory, which at time of writing consumes roughly 2GB of GPU memory. Production users should consider only loading the specific model(s) they plan on using with the `--model-control-mode=explicit` and `--load-model` flags. For example to launch Triton only loading the `abp-nvsmi-xgb` model: ```bash +docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 \ + nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 \ + tritonserver --model-repository=/models/triton-model-repo \ + --exit-on-error=false \ + --log-info=true \ + --strict-readiness=false \ + --disable-auto-complete-config \ + --model-control-mode=explicit \ + --load-model abp-nvsmi-xgb +``` + +Alternately, for users who have checked out the Morpheus git repository, launching the Triton server container directly mounting the models from the repository is an option. This approach is most useful for users training their own models. From the root of the Morpheus repo, use the following command to launch a Docker container for Triton loading all of the included pre-trained models: +```bash docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 \ -v $PWD/models:/models \ nvcr.io/nvidia/tritonserver:23.06-py3 \ diff --git a/examples/README.md b/examples/README.md index 983adc08db..25d3a9b687 100644 --- a/examples/README.md +++ b/examples/README.md @@ -32,9 +32,9 @@ limitations under the License. * [Retrieval Augmented Generation (RAG)](./llm/rag/README.md) ## Environments -Morpheus supports multiple environments, each environment is intended to support a given use-case. Each example documents which environments it is able to run in. With the exception of the Morpheus Release Container, the examples require fetching the model and example datasets via the `fetch_data.sh` script: +Morpheus supports multiple environments, each environment is intended to support a given use-case. Each example documents which environments it is able to run in. With the exception of the Morpheus Release Container, the examples require fetching both the `datasets` and `examples` dataset via the `fetch_data.sh` script: ```bash -./scripts/fetch_data.py fetch examples models +./scripts/fetch_data.py fetch examples datasets ``` The following are the supported environments: diff --git a/examples/abp_nvsmi_detection/README.md b/examples/abp_nvsmi_detection/README.md index 1927ffc9cb..cbaf809086 100644 --- a/examples/abp_nvsmi_detection/README.md +++ b/examples/abp_nvsmi_detection/README.md @@ -86,12 +86,12 @@ This example utilizes the Triton Inference Server to perform inference. Pull the Docker image for Triton: ```bash -docker pull nvcr.io/nvidia/tritonserver:23.06-py3 +docker pull nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 ``` -From the Morpheus repo root directory, run the following to launch Triton and load the `abp-nvsmi-xgb` XGBoost model: +Run the following to launch Triton and load the `abp-nvsmi-xgb` XGBoost model: ```bash -docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:23.06-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model abp-nvsmi-xgb +docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model abp-nvsmi-xgb ``` This will launch Triton and only load the `abp-nvsmi-xgb` model. This model has been configured with a max batch size of 32768, and to use dynamic batching for increased performance. diff --git a/examples/abp_pcap_detection/README.md b/examples/abp_pcap_detection/README.md index 8ff40ac2e9..3220da33c9 100644 --- a/examples/abp_pcap_detection/README.md +++ b/examples/abp_pcap_detection/README.md @@ -30,13 +30,13 @@ To run this example, an instance of Triton Inference Server and a sample dataset ### Triton Inference Server ```bash -docker pull nvcr.io/nvidia/tritonserver:23.06-py3 +docker pull nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 ``` ##### Deploy Triton Inference Server -From the root of the Morpheus repo, run the following to launch Triton and load the `abp-pcap-xgb` model: +Run the following to launch Triton and load the `abp-pcap-xgb` model: ```bash -docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD/examples/abp_pcap_detection/abp-pcap-xgb:/models/abp-pcap-xgb --name tritonserver nvcr.io/nvidia/tritonserver:23.06-py3 tritonserver --model-repository=/models --exit-on-error=false +docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 --name tritonserver nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model abp-pcap-xgb ``` ##### Verify Model Deployment diff --git a/examples/doca/vdb_realtime/README.md b/examples/doca/vdb_realtime/README.md index e4b02e6042..8072c0c0de 100644 --- a/examples/doca/vdb_realtime/README.md +++ b/examples/doca/vdb_realtime/README.md @@ -35,6 +35,12 @@ Start Milvus docker compose up -d ``` +## Fetch the examples data + +```bash +./scripts/fetch_data.py fetch examples +``` + ## Launch Triton Inference Server To serve the embedding model, we will use Triton: @@ -42,10 +48,8 @@ To serve the embedding model, we will use Triton: ```bash cd ${MORPHEUS_ROOT} -# Fetch all models -./scripts/fetch_data.py fetch models # Launch Triton -docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:24.01-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model all-MiniLM-L6-v2 +docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model all-MiniLM-L6-v2 ``` ## Populate the Milvus database diff --git a/examples/llm/rag/README.md b/examples/llm/rag/README.md index 1fb5d451f7..ee4adcc198 100644 --- a/examples/llm/rag/README.md +++ b/examples/llm/rag/README.md @@ -100,10 +100,10 @@ Before running the pipeline, we need obtain service API keys for the following s ### Ensure that LFS files are downloaded -To retrieve models from LFS run the following: +To retrieve datasets from LFS run the following: ```bash -./scripts/fetch_data.py fetch models +./scripts/fetch_data.py fetch datasets ``` ### Obtain an OpenAI API or NGC API Key diff --git a/examples/llm/vdb_upload/README.md b/examples/llm/vdb_upload/README.md index de6c18a81d..4f9b00a484 100644 --- a/examples/llm/vdb_upload/README.md +++ b/examples/llm/vdb_upload/README.md @@ -125,10 +125,10 @@ Before running the pipeline, we need to ensure that the following services are r #### Ensure LFS files are downloaded -To retrieve models from LFS run the following: +To retrieve datasets from LFS run the following: ```bash -./scripts/fetch_data.py fetch models +./scripts/fetch_data.py fetch datasets ``` #### Milvus Service @@ -140,12 +140,12 @@ To retrieve models from LFS run the following: - Pull the Docker image for Triton: ```bash - docker pull nvcr.io/nvidia/tritonserver:23.06-py3 + docker pull nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 ``` -- From the Morpheus repo root directory, run the following to launch Triton and load the `all-MiniLM-L6-v2` model: +- Run the following to launch Triton and load the `all-MiniLM-L6-v2` model: ```bash - docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:23.06-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model all-MiniLM-L6-v2 + docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model all-MiniLM-L6-v2 ``` This will launch Triton and only load the `all-MiniLM-L6-v2` model. Once Triton has loaded the model, the following @@ -279,7 +279,7 @@ using `sentence-transformers/paraphrase-multilingual-mpnet-base-v2` as an exampl - Reload the docker container, specifying that we also need to load paraphrase-multilingual-mpnet-base-v2 ```bash docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 \ - -v $PWD/models:/models nvcr.io/nvidia/tritonserver:23.06-py3 tritonserver \ + nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 tritonserver \ --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model \ all-MiniLM-L6-v2 --load-model sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ``` diff --git a/examples/log_parsing/README.md b/examples/log_parsing/README.md index eff8d62538..4d798a66cb 100644 --- a/examples/log_parsing/README.md +++ b/examples/log_parsing/README.md @@ -29,19 +29,19 @@ Example Morpheus pipeline using Triton Inference server and Morpheus. ### Set up Triton Inference Server ##### Pull Triton Inference Server Docker Image -Pull Docker image from NGC (https://ngc.nvidia.com/catalog/containers/nvidia:tritonserver) suitable for your environment. +Pull the Morpheus Triton models Docker image from NGC. Example: ```bash -docker pull nvcr.io/nvidia/tritonserver:23.06-py3 +docker pull nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 ``` ##### Start Triton Inference Server Container From the Morpheus repo root directory, run the following to launch Triton and load the `log-parsing-onnx` model: ```bash -docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:23.06-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model log-parsing-onnx +docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model log-parsing-onnx ``` ##### Verify Model Deployment diff --git a/examples/nlp_si_detection/README.md b/examples/nlp_si_detection/README.md index 19d38e19c0..ab69546d22 100644 --- a/examples/nlp_si_detection/README.md +++ b/examples/nlp_si_detection/README.md @@ -85,11 +85,9 @@ This example utilizes the Triton Inference Server to perform inference. The neur From the Morpheus repo root directory, run the following to launch Triton and load the `sid-minibert` model: ```bash -docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:23.06-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-onnx +docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-onnx ``` -Where `23.06-py3` can be replaced with the current year and month of the Triton version to use. For example, to use May 2021, specify `nvcr.io/nvidia/tritonserver:21.05-py3`. Ensure that the version of TensorRT that is used in Triton matches the version of TensorRT elsewhere (refer to [NGC Deep Learning Frameworks Support Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)). - This will launch Triton and only load the `sid-minibert-onnx` model. This model has been configured with a max batch size of 32, and to use dynamic batching for increased performance. Once Triton has loaded the model, the following should be displayed: diff --git a/examples/ransomware_detection/README.md b/examples/ransomware_detection/README.md index 6b9e19f1ac..84c48147a4 100644 --- a/examples/ransomware_detection/README.md +++ b/examples/ransomware_detection/README.md @@ -35,7 +35,7 @@ Pull Docker image from NGC (https://ngc.nvidia.com/catalog/containers/nvidia:tri Example: ```bash -docker pull nvcr.io/nvidia/tritonserver:23.06-py3 +docker pull nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 ``` ##### Setup Env Variable ```bash @@ -47,7 +47,7 @@ From the Morpheus repo root directory, run the following to launch Triton and lo ```bash # Run Triton in explicit mode docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 \ - -v $PWD/examples/ransomware_detection/models:/models/triton-model-repo nvcr.io/nvidia/tritonserver:23.06-py3 \ + nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 \ tritonserver --model-repository=/models/triton-model-repo \ --exit-on-error=false \ --model-control-mode=explicit \ @@ -84,7 +84,7 @@ Input features for a short model can be taken from every three snapshots sequenc The configuration options for this example can be queried with: ```bash -python run.py --help +python examples/ransomware_detection/run.py --help ``` ``` diff --git a/examples/root_cause_analysis/README.md b/examples/root_cause_analysis/README.md index 84f3d47b2a..47a4cd1dc1 100644 --- a/examples/root_cause_analysis/README.md +++ b/examples/root_cause_analysis/README.md @@ -54,11 +54,9 @@ This example utilizes the Triton Inference Server to perform inference. The bina From the Morpheus repo root directory, run the following to launch Triton and load the `root-cause-binary-onnx` model: ```bash -docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:23.06-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model root-cause-binary-onnx +docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model root-cause-binary-onnx ``` -Where `23.06-py3` can be replaced with the current year and month of the Triton version to use. For example, to use May 2021, specify `nvcr.io/nvidia/tritonserver:21.05-py3`. Ensure that the version of TensorRT that is used in Triton matches the version of TensorRT elsewhere (refer to [NGC Deep Learning Frameworks Support Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)). - This will launch Triton and only load the model required by our example pipeline. The model has been configured with a max batch size of 32, and to use dynamic batching for increased performance. Once Triton has loaded the model, the following should be displayed: diff --git a/examples/sid_visualization/README.md b/examples/sid_visualization/README.md index ff641fdf1d..faf2c666c3 100644 --- a/examples/sid_visualization/README.md +++ b/examples/sid_visualization/README.md @@ -47,11 +47,6 @@ Save the Morpheus repo directory: export MORPHEUS_ROOT=$(git rev-parse --show-toplevel) ``` -Ensure SID model is downloaded for deployment to Triton: -```bash -./scripts/fetch_data.py fetch models -``` - Change to the example directory: ```bash cd ${MORPHEUS_ROOT}/examples/sid_visualization diff --git a/examples/sid_visualization/docker-compose.yml b/examples/sid_visualization/docker-compose.yml index 3bd48c6238..9f42360019 100644 --- a/examples/sid_visualization/docker-compose.yml +++ b/examples/sid_visualization/docker-compose.yml @@ -19,11 +19,11 @@ x-with-gpus: &with_gpus reservations: devices: - capabilities: - - gpu + - gpu services: triton: - image: nvcr.io/nvidia/tritonserver:23.06-py3 + image: nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 <<: *with_gpus command: "tritonserver --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-onnx --model-repository=/models/triton-model-repo" environment: @@ -33,8 +33,6 @@ services: - "8001" - "8002" runtime: nvidia - volumes: - - "${MORPHEUS_HOME:-../..}/models:/models" gui: image: sid-viz:latest diff --git a/models/docker/Dockerfile b/models/docker/Dockerfile new file mode 100644 index 0000000000..b2df3fc1b1 --- /dev/null +++ b/models/docker/Dockerfile @@ -0,0 +1,26 @@ +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ARG FROM_IMAGE="nvcr.io/nvidia/tritonserver" +ARG FROM_IMAGE_TAG="23.06-py3" +FROM ${FROM_IMAGE}:${FROM_IMAGE_TAG} AS base + +WORKDIR / + +# Copy the model repository +COPY "${MORPHEUS_ROOT_HOST}/models" "./models" + +# Copy the example models that live in the examples dir +COPY "${MORPHEUS_ROOT_HOST}/examples/abp_pcap_detection/abp-pcap-xgb" "./models/triton-model-repo/abp-pcap-xgb" diff --git a/models/docker/README.md b/models/docker/README.md new file mode 100644 index 0000000000..aa057b60b1 --- /dev/null +++ b/models/docker/README.md @@ -0,0 +1,38 @@ + + +# Morpheus Triton Server Models Container + +The Morpheus Triton Server Models Container builds upon the [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) container by adding the Morpheus pre-trained models. + +## Building the Container +To build the container with the default arguments, run the following command from the root of the Morpheus repository: +```bash +./models/docker/build_container.sh +``` + +This will build a container tagged as `nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:`. + +### Environment Variable Arguments +The `build_container.sh` script accepts the following environment variables to customize the build: +- `MORPHEUS_ROOT`: File path to the root of the Morpheus repository, if undefined the script will infer the value based on the script's location. +- `MORPHEUS_ROOT_HOST`: Relative path from the script working directory to the root of the Morpheus repository on the host. This should not need to be set so long as `MORPHEUS_ROOT` is set correctly. +- `FROM_IMAGE`: The base Triton Inference Server container image to use, defaults to `nvcr.io/nvidia/tritonserver`. +- `FROM_IMAGE_TAG`: The tag of the base Triton Inference Server container image to use. +- `DOCKER_IMAGE_NAME`: The name of the resulting container image, defaults to `nvcr.io/nvidia/morpheus/morpheus-tritonserver-models`. +- `DOCKER_IMAGE_TAG`: The tag of the resulting container image, defaults to the current Morpheus version. +- `DOCKER_EXTRA_ARGS`: Additional arguments to pass to the `docker build` command. diff --git a/models/docker/build_container.sh b/models/docker/build_container.sh new file mode 100755 index 0000000000..85dfe40ddb --- /dev/null +++ b/models/docker/build_container.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + +# Get the path to MORPHEUS_ROOT without altering the docker context (in case we are in a submodule) +pushd ${SCRIPT_DIR} &> /dev/null +export MORPHEUS_ROOT=${MORPHEUS_ROOT:-"$(git rev-parse --show-toplevel)"} +popd &> /dev/null + +# Determine the relative path from $PWD to $MORPHEUS_ROOT +MORPHEUS_ROOT_HOST=${MORPHEUS_ROOT_HOST:-"$(realpath --relative-to=${PWD} ${MORPHEUS_ROOT})"} + +FULL_VERSION=$(git describe --tags --abbrev=0) +MAJOR_VERSION=$(echo ${FULL_VERSION} | awk '{split($0, a, "[v.]"); print a[2]}') +MINOR_VERSION=$(echo ${FULL_VERSION} | awk '{split($0, a, "."); print a[2]}') +SHORT_VERSION=${MAJOR_VERSION}.${MINOR_VERSION} + +# Build args +FROM_IMAGE=${FROM_IMAGE:-"nvcr.io/nvidia/tritonserver"} +FROM_IMAGE_TAG=${FROM_IMAGE_TAG:-"23.06-py3"} + +DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME:-"nvcr.io/nvidia/morpheus/morpheus-tritonserver-models"} +DOCKER_IMAGE_TAG=${DOCKER_IMAGE_TAG:-"${SHORT_VERSION}"} + +DOCKER_EXTRA_ARGS=${DOCKER_EXTRA_ARGS:-""} + +# Ensure all models are fetched +"${MORPHEUS_ROOT}/scripts/fetch_data.py" fetch models + +# Build the docker arguments +DOCKER_ARGS="-t ${DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG}" +DOCKER_ARGS="${DOCKER_ARGS} --build-arg FROM_IMAGE=${FROM_IMAGE}" +DOCKER_ARGS="${DOCKER_ARGS} --build-arg FROM_IMAGE_TAG=${FROM_IMAGE_TAG}" +DOCKER_ARGS="${DOCKER_ARGS} --network=host" + +# Last add any extra args (duplicates override earlier ones) +DOCKER_ARGS="${DOCKER_ARGS} ${DOCKER_EXTRA_ARGS}" + +# Export buildkit variable +export DOCKER_BUILDKIT=1 + +echo "Building morpheus:${DOCKER_TAG} with args..." +echo " FROM_IMAGE : ${FROM_IMAGE}" +echo " FROM_IMAGE : ${FROM_IMAGE_TAG}" + +echo "" +echo " COMMAND: docker build ${DOCKER_ARGS} -f ${SCRIPT_DIR}/Dockerfile ." +echo " Note: add '--progress plain' to DOCKER_EXTRA_ARGS to show all container build output" + +docker build ${DOCKER_ARGS} -f ${SCRIPT_DIR}/Dockerfile . diff --git a/examples/ransomware_detection/models/ransomw-model-long-rf/1/checkpoint.tl b/models/ransomware-models/ransomw-model-long-rf/checkpoint.tl similarity index 100% rename from examples/ransomware_detection/models/ransomw-model-long-rf/1/checkpoint.tl rename to models/ransomware-models/ransomw-model-long-rf/checkpoint.tl diff --git a/examples/ransomware_detection/models/ransomw-model-medium-rf/1/checkpoint.tl b/models/ransomware-models/ransomw-model-medium-rf/checkpoint.tl similarity index 100% rename from examples/ransomware_detection/models/ransomw-model-medium-rf/1/checkpoint.tl rename to models/ransomware-models/ransomw-model-medium-rf/checkpoint.tl diff --git a/examples/ransomware_detection/models/ransomw-model-short-rf/1/checkpoint.tl b/models/ransomware-models/ransomw-model-short-rf/checkpoint.tl similarity index 100% rename from examples/ransomware_detection/models/ransomw-model-short-rf/1/checkpoint.tl rename to models/ransomware-models/ransomw-model-short-rf/checkpoint.tl diff --git a/models/triton-model-repo/README.md b/models/triton-model-repo/README.md index b4d11b1829..07bce2c454 100644 --- a/models/triton-model-repo/README.md +++ b/models/triton-model-repo/README.md @@ -37,6 +37,22 @@ Sym links are used to minimize changes to the `config.pbtxt` files while still a The downside of using symlinks is that the entire Morpheus model repo must be volume mounted when launching Triton. Refer to the next section for information on how to correctly mount this repo, and select which models should be loaded. +## Models Container +The models in this directory are available in a pre-built container image containing Triton Inference Server, along with the models themselves. The container image is available on NGC and can be pulled using the following command: +```bash +docker pull nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 +``` + +Those users who are working on training their own models have two options available: +1) Build the models container locally by running the following command from the root of the Morpheus repo: +```bash +./models/docker/build_container.sh +``` + +This option is good for users who have a model which has already been trained and is ready for deployment. For more information refer to the [README](./docker/README.md) in the `docker` directory. + +2) Using the Triton Docker image directly, and mounting the `models` directory into the container. This option is good for users who are iterating on a single model and do not wish to build the entire container each time. The rest of this document covers using this option. + ## Launching Triton To launch Triton with one of the models in `triton-model-repo`, this entire repo must be volume mounted into the container. Once the entire repository is mounted, the Triton options: `--model-repository` and `--load-model` can be selectively used to choose which models to load. The following are several examples on launching Triton with different models and different setups: diff --git a/models/triton-model-repo/ransomw-model-long-rf/1/checkpoint.tl b/models/triton-model-repo/ransomw-model-long-rf/1/checkpoint.tl new file mode 120000 index 0000000000..c76e3a000a --- /dev/null +++ b/models/triton-model-repo/ransomw-model-long-rf/1/checkpoint.tl @@ -0,0 +1 @@ +../../../ransomware-models/ransomw-model-long-rf/checkpoint.tl \ No newline at end of file diff --git a/examples/ransomware_detection/models/ransomw-model-long-rf/config.pbtxt b/models/triton-model-repo/ransomw-model-long-rf/config.pbtxt similarity index 100% rename from examples/ransomware_detection/models/ransomw-model-long-rf/config.pbtxt rename to models/triton-model-repo/ransomw-model-long-rf/config.pbtxt diff --git a/models/triton-model-repo/ransomw-model-medium-rf/1/checkpoint.tl b/models/triton-model-repo/ransomw-model-medium-rf/1/checkpoint.tl new file mode 120000 index 0000000000..753887c55b --- /dev/null +++ b/models/triton-model-repo/ransomw-model-medium-rf/1/checkpoint.tl @@ -0,0 +1 @@ +../../../ransomware-models/ransomw-model-medium-rf/checkpoint.tl \ No newline at end of file diff --git a/examples/ransomware_detection/models/ransomw-model-medium-rf/config.pbtxt b/models/triton-model-repo/ransomw-model-medium-rf/config.pbtxt similarity index 100% rename from examples/ransomware_detection/models/ransomw-model-medium-rf/config.pbtxt rename to models/triton-model-repo/ransomw-model-medium-rf/config.pbtxt diff --git a/models/triton-model-repo/ransomw-model-short-rf/1/checkpoint.tl b/models/triton-model-repo/ransomw-model-short-rf/1/checkpoint.tl new file mode 120000 index 0000000000..ca61fd3616 --- /dev/null +++ b/models/triton-model-repo/ransomw-model-short-rf/1/checkpoint.tl @@ -0,0 +1 @@ +../../../ransomware-models/ransomw-model-short-rf/checkpoint.tl \ No newline at end of file diff --git a/examples/ransomware_detection/models/ransomw-model-short-rf/config.pbtxt b/models/triton-model-repo/ransomw-model-short-rf/config.pbtxt similarity index 100% rename from examples/ransomware_detection/models/ransomw-model-short-rf/config.pbtxt rename to models/triton-model-repo/ransomw-model-short-rf/config.pbtxt diff --git a/scripts/fetch_data.py b/scripts/fetch_data.py index cf5b357bef..328a267713 100755 --- a/scripts/fetch_data.py +++ b/scripts/fetch_data.py @@ -23,6 +23,7 @@ LFS_DATASETS = { 'all': '**', + 'datasets': 'models/datasets/**', 'docs': 'docs/**', 'examples': 'examples/**', 'models': 'models/**', diff --git a/scripts/validation/val-globals.sh b/scripts/validation/val-globals.sh index 5fc9eb74eb..810748fb99 100755 --- a/scripts/validation/val-globals.sh +++ b/scripts/validation/val-globals.sh @@ -26,7 +26,7 @@ export e="\033[0;90m" export y="\033[0;33m" export x="\033[0m" -export TRITON_IMAGE=${TRITON_IMAGE:-"nvcr.io/nvidia/tritonserver:23.06-py3"} +export TRITON_IMAGE=${TRITON_IMAGE:-"nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10"} # TRITON_GRPC_PORT is only used when TRITON_URL is undefined export TRITON_GRPC_PORT=${TRITON_GRPC_PORT:-"8001"} diff --git a/scripts/validation/val-utils.sh b/scripts/validation/val-utils.sh index c1475d5efa..8b8012dddc 100755 --- a/scripts/validation/val-utils.sh +++ b/scripts/validation/val-utils.sh @@ -67,9 +67,6 @@ function wait_for_triton { } function ensure_triton_running { - - TRITON_IMAGE=${TRITON_IMAGE:-"nvcr.io/nvidia/tritonserver:23.06-py3"} - IS_RUNNING=$(is_triton_running) if [[ "${IS_RUNNING}" = "0" ]]; then @@ -86,7 +83,7 @@ function ensure_triton_running { MODEL_VOLUME=${WORKSPACE_VOLUME:-${MORPHEUS_ROOT}} # Launch triton container in explicit mode - TRITON_IMG_ID=$(docker run --rm -ti -d --name=triton-validation --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v ${MODEL_VOLUME}/models:/models ${TRITON_IMAGE} tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit) + TRITON_IMG_ID=$(docker run --rm -ti -d --name=triton-validation --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 ${TRITON_IMAGE} tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit) TRITON_IP=$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' ${TRITON_IMG_ID}) export TRITON_URL="${TRITON_IP}:${TRITON_GRPC_PORT}" diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md index 905e6e30b5..7d6352c1ee 100644 --- a/tests/benchmarks/README.md +++ b/tests/benchmarks/README.md @@ -19,17 +19,17 @@ ### Set up Triton Inference Server ##### Pull Triton Inference Server Docker Image -Pull Docker image from NGC (https://ngc.nvidia.com/catalog/containers/nvidia:tritonserver) suitable for your environment. +Pull Morpheus Models Docker image from NGC. Example: ```bash -docker pull nvcr.io/nvidia/tritonserver:23.06-py3 +docker pull nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 ``` ##### Start Triton Inference Server container ```bash -docker run --gpus=all --rm -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:23.06-py3 tritonserver --model-repository=/models/triton-model-repo --model-control-mode=explicit --load-model sid-minibert-onnx --load-model abp-nvsmi-xgb --load-model phishing-bert-onnx --load-model all-MiniLM-L6-v2 +docker run --gpus=all --rm -p8000:8000 -p8001:8001 -p8002:8002 nvcr.io/nvidia/morpheus/morpheus-tritonserver-models:24.10 tritonserver --model-repository=/models/triton-model-repo --model-control-mode=explicit --load-model sid-minibert-onnx --load-model abp-nvsmi-xgb --load-model phishing-bert-onnx --load-model all-MiniLM-L6-v2 ``` ##### Verify Model Deployments