Skip to content

Commit

Permalink
Merge branch 'branch-24.06' of https://github.com/nv-morpheus/Morpheus
Browse files Browse the repository at this point in the history
…into tz-gnn-fix
  • Loading branch information
tzemicheal committed May 16, 2024
2 parents 4ffeb4c + ee9d932 commit f3896d4
Show file tree
Hide file tree
Showing 60 changed files with 3,092 additions and 1,289 deletions.
5 changes: 5 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,9 @@

FROM rapidsai/devcontainers:23.12-cpp-mambaforge-ubuntu22.04 AS base

ENV MORPHEUS_SUPPORT_DOCA=ON

COPY ./docker/optional_deps docker/optional_deps
RUN ./docker/optional_deps/doca.sh /tmp/doca

ENV PATH="${PATH}:/workspaces/morpheus/.devcontainer/bin"
7 changes: 4 additions & 3 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"build": {
"dockerfile": "Dockerfile"
},
"privileged": true,

"hostRequirements": {
"gpu": true
Expand All @@ -33,7 +34,8 @@
],

"runArgs": [
"--network=morpheus"
"--network=morpheus",
"-v=/dev/hugepages:/dev/hugepages"
],

"containerEnv": {
Expand Down Expand Up @@ -93,8 +95,7 @@
"type": "bind",
"source": "${localWorkspaceFolder}/.devcontainer/opt/morpheus",
"target": "/opt/morpheus"
},

}
],

"features": {
Expand Down
64 changes: 64 additions & 0 deletions .devcontainer/docker/optional_deps/doca.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-OFF}
LINUX_DISTRO=${LINUX_DISTRO:-ubuntu}
LINUX_VER=${LINUX_VER:-22.04}
DOCA_VERSION=${DOCA_VERSION:-2.7.0}

# Exit early if nothing to do
if [[ ${MORPHEUS_SUPPORT_DOCA} != @(TRUE|ON) ]]; then
exit 0
fi

WORKING_DIR=$1

echo "Installing DOCA using directory: ${WORKING_DIR}"

DEB_DIR=${WORKING_DIR}/deb

mkdir -p ${DEB_DIR}

DOCA_OS_VERSION="ubuntu2204"
DOCA_PKG_LINK="https://www.mellanox.com/downloads/DOCA/DOCA_v${DOCA_VERSION}/host/doca-host_${DOCA_VERSION}-204000-24.04-${DOCA_OS_VERSION}_amd64.deb"

# Upgrade the base packages (diff between image and Canonical upstream repo)
apt update -y
apt upgrade -y

# Install wget
apt install -y --no-install-recommends wget

wget -qO - ${DOCA_PKG_LINK} -O doca-host.deb
apt install ./doca-host.deb
apt update
apt install -y doca-all
apt install -y doca-gpu doca-gpu-dev

# Now install the gdrcopy library according to: https://github.com/NVIDIA/gdrcopy
GDRCOPY_DIR=${WORKING_DIR}/gdrcopy

if [[ ! -d "${GDRCOPY_DIR}" ]] ; then
git clone https://github.com/NVIDIA/gdrcopy.git ${GDRCOPY_DIR}
cd ${GDRCOPY_DIR}
else
cd ${GDRCOPY_DIR}
git pull https://github.com/NVIDIA/gdrcopy.git
fi

make lib lib_install
63 changes: 10 additions & 53 deletions docker/optional_deps/doca.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ set -e
MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-OFF}
LINUX_DISTRO=${LINUX_DISTRO:-ubuntu}
LINUX_VER=${LINUX_VER:-22.04}
DOCA_VERSION=${DOCA_VERSION:-2.6.0}
DOCA_VERSION=${DOCA_VERSION:-2.7.0}

# Exit early if nothing to do
if [[ ${MORPHEUS_SUPPORT_DOCA} != @(TRUE|ON) ]]; then
Expand All @@ -34,64 +34,21 @@ DEB_DIR=${WORKING_DIR}/deb

mkdir -p ${DEB_DIR}

DOCA_REPO_LINK="https://linux.mellanox.com/public/repo/doca/${DOCA_VERSION}"
DOCA_REPO="${DOCA_REPO_LINK}/ubuntu22.04"
DOCA_REPO_ARCH="x86_64"
DOCA_UPSTREAM_REPO="${DOCA_REPO}/${DOCA_REPO_ARCH}"
DOCA_OS_VERSION="ubuntu2204"
DOCA_PKG_LINK="https://www.mellanox.com/downloads/DOCA/DOCA_v${DOCA_VERSION}/host/doca-host_${DOCA_VERSION}-204000-24.04-${DOCA_OS_VERSION}_amd64.deb"

# Upgrade the base packages (diff between image and Canonical upstream repo)
apt update -y
apt upgrade -y

# Cleanup apt
rm -rf /var/lib/apt/lists/*
apt autoremove -y
# Install wget
apt install -y --no-install-recommends wget

# Configure DOCA Repository, and install packages
apt update -y

# Install wget & Add the DOCA public repository
apt install -y --no-install-recommends wget software-properties-common gpg-agent
wget -qO - ${DOCA_UPSTREAM_REPO}/GPG-KEY-Mellanox.pub | apt-key add -
add-apt-repository "deb [trusted=yes] ${DOCA_UPSTREAM_REPO} ./"
apt update -y

# Install base-rt content
apt install -y --no-install-recommends \
doca-gpu \
doca-gpu-dev \
doca-prime-runtime \
doca-prime-sdk \
doca-sdk \
dpcp \
flexio \
ibacm \
ibverbs-utils \
librdmacm1 \
libibnetdisc5 \
libibumad3 \
libibmad5 \
libopensm \
libopenvswitch \
libyara8 \
mlnx-tools \
ofed-scripts \
openmpi \
openvswitch-common \
openvswitch-switch \
srptools \
mlnx-ethtool \
mlnx-iproute2 \
python3-pyverbs \
rdma-core \
ucx \
yara

# Cleanup apt
rm -rf /usr/lib/python3/dist-packages
apt remove -y software-properties-common gpg-agent
rm -rf /var/lib/apt/lists/*
apt autoremove -y
wget -qO - ${DOCA_PKG_LINK} -O doca-host.deb
apt install ./doca-host.deb
apt update
apt install -y doca-all
apt install -y doca-gpu doca-gpu-dev

# Now install the gdrcopy library according to: https://github.com/NVIDIA/gdrcopy
GDRCOPY_DIR=${WORKING_DIR}/gdrcopy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ limitations under the License.
- [DFP Post Processing](#dfp-post-processing)
- [Serialize](#serialize)
- [Write to File](#write-to-file)
- [Running Example Modular DFP Pipelines](#running-example-modular-dfp-pipelines)
- [System requirements](#system-requirements)
- [Building the services](#building-the-services)
- [Downloading the example datasets](#downloading-the-example-datasets)
- [Run Morpheus pipeline](#run-morpheus-pipeline)
- [Output Fields](#output-fields)

## Introduction

Expand Down Expand Up @@ -522,7 +528,7 @@ pip install s3fs
python examples/digital_fingerprinting/fetch_example_data.py all
```
### Morpheus Pipeline
### Run Morpheus pipeline
From the `examples/digital_fingerprinting/production` dir, run:
```bash
docker compose run morpheus_pipeline bash
Expand Down
3 changes: 2 additions & 1 deletion docs/source/stages/morpheus_stages.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ Stages are the building blocks of Morpheus pipelines. Below is a list of the mos

## Doca

- Doca Stage {py:class}`~morpheus.stages.doca.doca_source_stage.DocaSourceStage` A source stage used to receive raw packet data from a ConnectX-6 Dx NIC. This stage is not compiled by default refer to the [Doca Example](../../../examples/doca/README.md) for details on building this stage.
- Doca Source Stage {py:class}`~morpheus.stages.doca.doca_source_stage.DocaSourceStage` A source stage used to receive raw packet data in GPU memory from a ConnectX NIC using DOCA GPUNetIO function within a CUDA kernel to actually receive and process Ethernet network packets. Receive packets information is passed to next pipeline stage in the form of RawPacketMessage. This stage is not compiled by default refer to the [Doca Example](../../../examples/doca/README.md) for details on building this stage.
- Doca Convert Stage {py:class}`~morpheus.stages.doca.doca_source_stage.DocaConvertStage` Convert the RawPacketMessage format received by the DOCA Source Stage into a more complex message format MetaMessage. Packets' info never leave the GPU memory. This stage is not compiled by default refer to the [Doca Example](../../../examples/doca/README.md) for details on building this stage.

## General

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def process_task(control_message: ControlMessage):
if (model_cache is None):
raise RuntimeError(f"Could not find model for user {user_id}")

loaded_model = model_cache.load_model(client)
loaded_model = model_cache.load_model()

# TODO(Devin): Recovery strategy should be more robust/configurable in practice
except Exception as exec_info:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def on_data(self, message: MultiDFPMessage) -> MultiDFPMessage:
if (model_cache is None):
raise RuntimeError(f"Could not find model for user {user_id}")

loaded_model = model_cache.load_model(self._client)
loaded_model = model_cache.load_model()

except Exception:
logger.exception("Error trying to get model", exc_info=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def last_used(self):
def last_checked(self):
return self._last_checked

def load_model(self, _) -> AutoEncoder:
def load_model(self) -> AutoEncoder:

now = datetime.now()

Expand Down
103 changes: 44 additions & 59 deletions examples/doca/run.py → examples/doca/run_tcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from morpheus.config import CppConfig
from morpheus.config import PipelineModes
from morpheus.pipeline.linear_pipeline import LinearPipeline
from morpheus.stages.doca.doca_convert_stage import DocaConvertStage
from morpheus.stages.doca.doca_source_stage import DocaSourceStage
from morpheus.stages.general.monitor_stage import MonitorStage
from morpheus.stages.inference.triton_inference_stage import TritonInferenceStage
Expand Down Expand Up @@ -66,18 +67,7 @@
help="GPU PCI Address",
required=True,
)
@click.option(
"--traffic_type",
help="UDP or TCP traffic",
required=True,
)
def run_pipeline(pipeline_batch_size,
model_max_batch_size,
model_fea_length,
out_file,
nic_addr,
gpu_addr,
traffic_type):
def run_pipeline(pipeline_batch_size, model_max_batch_size, model_fea_length, out_file, nic_addr, gpu_addr):
# Enable the default logger
configure_logging(log_level=logging.DEBUG)

Expand All @@ -87,7 +77,7 @@ def run_pipeline(pipeline_batch_size,
config.mode = PipelineModes.NLP

# Below properties are specified by the command line
config.num_threads = 4
config.num_threads = 1
config.pipeline_batch_size = pipeline_batch_size
config.model_max_batch_size = model_max_batch_size
config.feature_length = model_fea_length
Expand All @@ -111,52 +101,47 @@ def run_pipeline(pipeline_batch_size,
pipeline = LinearPipeline(config)

# add doca source stage
pipeline.set_source(DocaSourceStage(config, nic_addr, gpu_addr, traffic_type))

if traffic_type == 'udp':
pipeline.add_stage(MonitorStage(config, description="DOCA GPUNetIO rate", unit='pkts'))

if traffic_type == 'tcp':
# add deserialize stage
pipeline.add_stage(DeserializeStage(config))
pipeline.add_stage(MonitorStage(config, description="Deserialize rate", unit='pkts'))

hashfile = '/workspace/models/training-tuning-scripts/sid-models/resources/bert-base-uncased-hash.txt'

# add preprocessing stage
pipeline.add_stage(
PreprocessNLPStage(config,
vocab_hash_file=hashfile,
do_lower_case=True,
truncation=True,
add_special_tokens=False,
column='data'))

pipeline.add_stage(MonitorStage(config, description="Tokenize rate", unit='pkts'))

# add inference stage
pipeline.add_stage(
TritonInferenceStage(
config,
# model_name="sid-minibert-trt",
model_name="sid-minibert-onnx",
server_url="localhost:8000",
force_convert_inputs=True,
use_shared_memory=True))

pipeline.add_stage(MonitorStage(config, description="Inference rate", unit='pkts'))

# add class stage
pipeline.add_stage(AddClassificationsStage(config))
pipeline.add_stage(MonitorStage(config, description="AddClass rate", unit='pkts'))

# serialize
pipeline.add_stage(SerializeStage(config))
pipeline.add_stage(MonitorStage(config, description="Serialize rate", unit='pkts'))

# write to file
pipeline.add_stage(WriteToFileStage(config, filename=out_file, overwrite=True))
pipeline.add_stage(MonitorStage(config, description="Write to file rate", unit='pkts'))
pipeline.set_source(DocaSourceStage(config, nic_addr, gpu_addr, 'tcp'))
pipeline.add_stage(DocaConvertStage(config))

# add deserialize stage
pipeline.add_stage(DeserializeStage(config))
pipeline.add_stage(MonitorStage(config, description="Deserialize rate", unit='pkts'))

hashfile = 'data/bert-base-uncased-hash.txt'

# add preprocessing stage
pipeline.add_stage(
PreprocessNLPStage(config,
vocab_hash_file=hashfile,
do_lower_case=True,
truncation=True,
add_special_tokens=False,
column='data'))

pipeline.add_stage(MonitorStage(config, description="Tokenize rate", unit='pkts'))

# add inference stage
pipeline.add_stage(
TritonInferenceStage(config,
model_name="sid-minibert-onnx",
server_url="localhost:8000",
force_convert_inputs=True,
use_shared_memory=True))

pipeline.add_stage(MonitorStage(config, description="Inference rate", unit='pkts'))

# add class stage
pipeline.add_stage(AddClassificationsStage(config))
pipeline.add_stage(MonitorStage(config, description="AddClass rate", unit='pkts'))

# serialize
pipeline.add_stage(SerializeStage(config))
pipeline.add_stage(MonitorStage(config, description="Serialize rate", unit='pkts'))

# write to file
pipeline.add_stage(WriteToFileStage(config, filename=out_file, overwrite=True))
pipeline.add_stage(MonitorStage(config, description="Write to file rate", unit='pkts'))

# Build the pipeline here to see types in the vizualization
pipeline.build()
Expand Down
Loading

0 comments on commit f3896d4

Please sign in to comment.