Merge branch 'branch-24.06' of https://github.com/nv-morpheus/Morpheus …

…into tz-gnn-fix
nv-morpheus · May 16, 2024 · f3896d4 · f3896d4
2 parents 4ffeb4c + ee9d932
commit f3896d4
Show file tree

Hide file tree

Showing 60 changed files with 3,092 additions and 1,289 deletions.
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -15,4 +15,9 @@
 
 FROM rapidsai/devcontainers:23.12-cpp-mambaforge-ubuntu22.04 AS base
 
+ENV MORPHEUS_SUPPORT_DOCA=ON
+
+COPY ./docker/optional_deps docker/optional_deps
+RUN ./docker/optional_deps/doca.sh /tmp/doca
+
 ENV PATH="${PATH}:/workspaces/morpheus/.devcontainer/bin"
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -18,6 +18,7 @@
     "build": {
         "dockerfile": "Dockerfile"
     },
+    "privileged": true,
 
     "hostRequirements": {
         "gpu": true
@@ -33,7 +34,8 @@
     ],
 
     "runArgs": [
-        "--network=morpheus"
+        "--network=morpheus",
+        "-v=/dev/hugepages:/dev/hugepages"
     ],
 
     "containerEnv": {
@@ -93,8 +95,7 @@
             "type": "bind",
             "source": "${localWorkspaceFolder}/.devcontainer/opt/morpheus",
             "target": "/opt/morpheus"
-        },
-
+        }
     ],
 
     "features": {

diff --git a/.devcontainer/docker/optional_deps/doca.sh b/.devcontainer/docker/optional_deps/doca.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-OFF}
+LINUX_DISTRO=${LINUX_DISTRO:-ubuntu}
+LINUX_VER=${LINUX_VER:-22.04}
+DOCA_VERSION=${DOCA_VERSION:-2.7.0}
+
+# Exit early if nothing to do
+if [[ ${MORPHEUS_SUPPORT_DOCA} != @(TRUE|ON) ]]; then
+   exit 0
+fi
+
+WORKING_DIR=$1
+
+echo "Installing DOCA using directory: ${WORKING_DIR}"
+
+DEB_DIR=${WORKING_DIR}/deb
+
+mkdir -p ${DEB_DIR}
+
+DOCA_OS_VERSION="ubuntu2204"
+DOCA_PKG_LINK="https://www.mellanox.com/downloads/DOCA/DOCA_v${DOCA_VERSION}/host/doca-host_${DOCA_VERSION}-204000-24.04-${DOCA_OS_VERSION}_amd64.deb"
+
+# Upgrade the base packages (diff between image and Canonical upstream repo)
+apt update -y
+apt upgrade -y
+
+# Install wget
+apt install -y --no-install-recommends wget
+
+wget -qO - ${DOCA_PKG_LINK} -O doca-host.deb
+apt install ./doca-host.deb
+apt update
+apt install -y doca-all
+apt install -y doca-gpu doca-gpu-dev
+
+# Now install the gdrcopy library according to: https://github.com/NVIDIA/gdrcopy
+GDRCOPY_DIR=${WORKING_DIR}/gdrcopy
+
+if [[ ! -d "${GDRCOPY_DIR}" ]] ; then
+    git clone https://github.com/NVIDIA/gdrcopy.git ${GDRCOPY_DIR}
+    cd ${GDRCOPY_DIR}
+else
+    cd ${GDRCOPY_DIR}
+    git pull https://github.com/NVIDIA/gdrcopy.git
+fi
+
+make lib lib_install
diff --git a/docker/optional_deps/doca.sh b/docker/optional_deps/doca.sh
@@ -19,7 +19,7 @@ set -e
 MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-OFF}
 LINUX_DISTRO=${LINUX_DISTRO:-ubuntu}
 LINUX_VER=${LINUX_VER:-22.04}
-DOCA_VERSION=${DOCA_VERSION:-2.6.0}
+DOCA_VERSION=${DOCA_VERSION:-2.7.0}
 
 # Exit early if nothing to do
 if [[ ${MORPHEUS_SUPPORT_DOCA} != @(TRUE|ON) ]]; then
@@ -34,64 +34,21 @@ DEB_DIR=${WORKING_DIR}/deb
 
 mkdir -p ${DEB_DIR}
 
-DOCA_REPO_LINK="https://linux.mellanox.com/public/repo/doca/${DOCA_VERSION}"
-DOCA_REPO="${DOCA_REPO_LINK}/ubuntu22.04"
-DOCA_REPO_ARCH="x86_64"
-DOCA_UPSTREAM_REPO="${DOCA_REPO}/${DOCA_REPO_ARCH}"
+DOCA_OS_VERSION="ubuntu2204"
+DOCA_PKG_LINK="https://www.mellanox.com/downloads/DOCA/DOCA_v${DOCA_VERSION}/host/doca-host_${DOCA_VERSION}-204000-24.04-${DOCA_OS_VERSION}_amd64.deb"
 
 # Upgrade the base packages (diff between image and Canonical upstream repo)
 apt update -y
 apt upgrade -y
 
-# Cleanup apt
-rm -rf /var/lib/apt/lists/*
-apt autoremove -y
+# Install wget
+apt install -y --no-install-recommends wget
 
-# Configure DOCA Repository, and install packages
-apt update -y
-
-# Install wget & Add the DOCA public repository
-apt install -y --no-install-recommends wget software-properties-common gpg-agent
-wget -qO - ${DOCA_UPSTREAM_REPO}/GPG-KEY-Mellanox.pub | apt-key add -
-add-apt-repository "deb [trusted=yes] ${DOCA_UPSTREAM_REPO} ./"
-apt update -y
-
-# Install base-rt content
-apt install -y --no-install-recommends \
-    doca-gpu \
-    doca-gpu-dev \
-    doca-prime-runtime \
-    doca-prime-sdk \
-    doca-sdk \
-    dpcp \
-    flexio \
-    ibacm \
-    ibverbs-utils \
-    librdmacm1 \
-    libibnetdisc5 \
-    libibumad3 \
-    libibmad5 \
-    libopensm \
-    libopenvswitch \
-    libyara8 \
-    mlnx-tools \
-    ofed-scripts \
-    openmpi \
-    openvswitch-common \
-    openvswitch-switch \
-    srptools \
-    mlnx-ethtool \
-    mlnx-iproute2 \
-    python3-pyverbs \
-    rdma-core \
-    ucx \
-    yara
-
-    # Cleanup apt
-rm -rf /usr/lib/python3/dist-packages
-apt remove -y software-properties-common gpg-agent
-rm -rf /var/lib/apt/lists/*
-apt autoremove -y
+wget -qO - ${DOCA_PKG_LINK} -O doca-host.deb
+apt install ./doca-host.deb
+apt update
+apt install -y doca-all
+apt install -y doca-gpu doca-gpu-dev
 
 # Now install the gdrcopy library according to: https://github.com/NVIDIA/gdrcopy
 GDRCOPY_DIR=${WORKING_DIR}/gdrcopy

diff --git a/docs/source/developer_guide/guides/10_modular_pipeline_digital_fingerprinting.md b/docs/source/developer_guide/guides/10_modular_pipeline_digital_fingerprinting.md
@@ -45,6 +45,12 @@ limitations under the License.
     - [DFP Post Processing](#dfp-post-processing)
     - [Serialize](#serialize)
     - [Write to File](#write-to-file)
+  - [Running Example Modular DFP Pipelines](#running-example-modular-dfp-pipelines)
+    - [System requirements](#system-requirements)
+    - [Building the services](#building-the-services)
+    - [Downloading the example datasets](#downloading-the-example-datasets)
+    - [Run Morpheus pipeline](#run-morpheus-pipeline)
+    - [Output Fields](#output-fields)
 
 ## Introduction
 
@@ -522,7 +528,7 @@ pip install s3fs
 python examples/digital_fingerprinting/fetch_example_data.py all
 ```
 
-### Morpheus Pipeline
+### Run Morpheus pipeline
 From the `examples/digital_fingerprinting/production` dir, run:
 ```bash
 docker compose run morpheus_pipeline bash

diff --git a/docs/source/stages/morpheus_stages.md b/docs/source/stages/morpheus_stages.md
@@ -32,7 +32,8 @@ Stages are the building blocks of Morpheus pipelines. Below is a list of the mos
 
 ## Doca
 
-- Doca Stage {py:class}`~morpheus.stages.doca.doca_source_stage.DocaSourceStage` A source stage used to receive raw packet data from a ConnectX-6 Dx NIC. This stage is not compiled by default refer to the [Doca Example](../../../examples/doca/README.md) for details on building this stage.
+- Doca Source Stage {py:class}`~morpheus.stages.doca.doca_source_stage.DocaSourceStage` A source stage used to receive raw packet data in GPU memory from a ConnectX NIC using DOCA GPUNetIO function within a CUDA kernel to actually receive and process Ethernet network packets. Receive packets information is passed to next pipeline stage in the form of RawPacketMessage. This stage is not compiled by default refer to the [Doca Example](../../../examples/doca/README.md) for details on building this stage.
+- Doca Convert Stage {py:class}`~morpheus.stages.doca.doca_source_stage.DocaConvertStage` Convert the RawPacketMessage format received by the DOCA Source Stage into a more complex message format MetaMessage. Packets' info never leave the GPU memory. This stage is not compiled by default refer to the [Doca Example](../../../examples/doca/README.md) for details on building this stage.
 
 ## General
 

diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference.py b/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference.py
@@ -97,7 +97,7 @@ def process_task(control_message: ControlMessage):
             if (model_cache is None):
                 raise RuntimeError(f"Could not find model for user {user_id}")
 
-            loaded_model = model_cache.load_model(client)
+            loaded_model = model_cache.load_model()
 
         # TODO(Devin): Recovery strategy should be more robust/configurable in practice
         except Exception as exec_info:

diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py
@@ -98,7 +98,7 @@ def on_data(self, message: MultiDFPMessage) -> MultiDFPMessage:
             if (model_cache is None):
                 raise RuntimeError(f"Could not find model for user {user_id}")
 
-            loaded_model = model_cache.load_model(self._client)
+            loaded_model = model_cache.load_model()
 
         except Exception:
             logger.exception("Error trying to get model", exc_info=True)

diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/model_cache.py b/examples/digital_fingerprinting/production/morpheus/dfp/utils/model_cache.py
@@ -92,7 +92,7 @@ def last_used(self):
     def last_checked(self):
         return self._last_checked
 
-    def load_model(self, _) -> AutoEncoder:
+    def load_model(self) -> AutoEncoder:
 
         now = datetime.now()
 

diff --git a/examples/doca/run.py → examples/doca/run_tcp.py b/examples/doca/run.py → examples/doca/run_tcp.py
@@ -20,6 +20,7 @@
 from morpheus.config import CppConfig
 from morpheus.config import PipelineModes
 from morpheus.pipeline.linear_pipeline import LinearPipeline
+from morpheus.stages.doca.doca_convert_stage import DocaConvertStage
 from morpheus.stages.doca.doca_source_stage import DocaSourceStage
 from morpheus.stages.general.monitor_stage import MonitorStage
 from morpheus.stages.inference.triton_inference_stage import TritonInferenceStage
@@ -66,18 +67,7 @@
     help="GPU PCI Address",
     required=True,
 )
-@click.option(
-    "--traffic_type",
-    help="UDP or TCP traffic",
-    required=True,
-)
-def run_pipeline(pipeline_batch_size,
-                 model_max_batch_size,
-                 model_fea_length,
-                 out_file,
-                 nic_addr,
-                 gpu_addr,
-                 traffic_type):
+def run_pipeline(pipeline_batch_size, model_max_batch_size, model_fea_length, out_file, nic_addr, gpu_addr):
     # Enable the default logger
     configure_logging(log_level=logging.DEBUG)
 
@@ -87,7 +77,7 @@ def run_pipeline(pipeline_batch_size,
     config.mode = PipelineModes.NLP
 
     # Below properties are specified by the command line
-    config.num_threads = 4
+    config.num_threads = 1
     config.pipeline_batch_size = pipeline_batch_size
     config.model_max_batch_size = model_max_batch_size
     config.feature_length = model_fea_length
@@ -111,52 +101,47 @@ def run_pipeline(pipeline_batch_size,
     pipeline = LinearPipeline(config)
 
     # add doca source stage
-    pipeline.set_source(DocaSourceStage(config, nic_addr, gpu_addr, traffic_type))
-
-    if traffic_type == 'udp':
-        pipeline.add_stage(MonitorStage(config, description="DOCA GPUNetIO rate", unit='pkts'))
-
-    if traffic_type == 'tcp':
-        # add deserialize stage
-        pipeline.add_stage(DeserializeStage(config))
-        pipeline.add_stage(MonitorStage(config, description="Deserialize rate", unit='pkts'))
-
-        hashfile = '/workspace/models/training-tuning-scripts/sid-models/resources/bert-base-uncased-hash.txt'
-
-        # add preprocessing stage
-        pipeline.add_stage(
-            PreprocessNLPStage(config,
-                               vocab_hash_file=hashfile,
-                               do_lower_case=True,
-                               truncation=True,
-                               add_special_tokens=False,
-                               column='data'))
-
-        pipeline.add_stage(MonitorStage(config, description="Tokenize rate", unit='pkts'))
-
-        # add inference stage
-        pipeline.add_stage(
-            TritonInferenceStage(
-                config,
-                # model_name="sid-minibert-trt",
-                model_name="sid-minibert-onnx",
-                server_url="localhost:8000",
-                force_convert_inputs=True,
-                use_shared_memory=True))
-
-        pipeline.add_stage(MonitorStage(config, description="Inference rate", unit='pkts'))
-
-        # add class stage
-        pipeline.add_stage(AddClassificationsStage(config))
-        pipeline.add_stage(MonitorStage(config, description="AddClass rate", unit='pkts'))
-
-        # serialize
-        pipeline.add_stage(SerializeStage(config))
-        pipeline.add_stage(MonitorStage(config, description="Serialize rate", unit='pkts'))
-
-        # write to file
-        pipeline.add_stage(WriteToFileStage(config, filename=out_file, overwrite=True))
-        pipeline.add_stage(MonitorStage(config, description="Write to file rate", unit='pkts'))
+    pipeline.set_source(DocaSourceStage(config, nic_addr, gpu_addr, 'tcp'))
+    pipeline.add_stage(DocaConvertStage(config))
+
+    # add deserialize stage
+    pipeline.add_stage(DeserializeStage(config))
+    pipeline.add_stage(MonitorStage(config, description="Deserialize rate", unit='pkts'))
+
+    hashfile = 'data/bert-base-uncased-hash.txt'
+
+    # add preprocessing stage
+    pipeline.add_stage(
+        PreprocessNLPStage(config,
+                           vocab_hash_file=hashfile,
+                           do_lower_case=True,
+                           truncation=True,
+                           add_special_tokens=False,
+                           column='data'))
+
+    pipeline.add_stage(MonitorStage(config, description="Tokenize rate", unit='pkts'))
+
+    # add inference stage
+    pipeline.add_stage(
+        TritonInferenceStage(config,
+                             model_name="sid-minibert-onnx",
+                             server_url="localhost:8000",
+                             force_convert_inputs=True,
+                             use_shared_memory=True))
+
+    pipeline.add_stage(MonitorStage(config, description="Inference rate", unit='pkts'))
+
+    # add class stage
+    pipeline.add_stage(AddClassificationsStage(config))
+    pipeline.add_stage(MonitorStage(config, description="AddClass rate", unit='pkts'))
+
+    # serialize
+    pipeline.add_stage(SerializeStage(config))
+    pipeline.add_stage(MonitorStage(config, description="Serialize rate", unit='pkts'))
+
+    # write to file
+    pipeline.add_stage(WriteToFileStage(config, filename=out_file, overwrite=True))
+    pipeline.add_stage(MonitorStage(config, description="Write to file rate", unit='pkts'))
 
     # Build the pipeline here to see types in the vizualization
     pipeline.build()