diff --git a/.github/workflows/ci_pipe.yml b/.github/workflows/ci_pipe.yml index 3482fd2b8d..0d4bb14c7d 100644 --- a/.github/workflows/ci_pipe.yml +++ b/.github/workflows/ci_pipe.yml @@ -21,16 +21,12 @@ on: run_check: required: true type: boolean - conda_core_run_build: - description: 'Runs the stage to build the morpheus-core conda package' - required: true - type: boolean conda_upload_label: description: 'The label to use when uploading the morpheus conda packages. Leave empty to disable uploading' required: true type: string conda_run_build: - description: 'Runs the conda-build stage to build the conda package with all morpheus components' + description: 'Runs the conda-build stage to build the morpheus conda packages' required: true type: boolean container: @@ -204,9 +200,9 @@ jobs: shell: bash run: ./morpheus/ci/scripts/github/docs.sh - package-core: - name: Package Core - if: ${{ inputs.conda_core_run_build }} + package: + name: Conda Package + if: ${{ inputs.conda_run_build }} needs: [documentation, test] runs-on: linux-amd64-cpu16 timeout-minutes: 60 @@ -240,38 +236,4 @@ jobs: CONDA_TOKEN: "${{ secrets.CONDA_TOKEN }}" SCRIPT_ARGS: "${{ inputs.conda_upload_label != '' && 'upload' || '' }}" CONDA_PKG_LABEL: "${{ inputs.conda_upload_label }}" - run: ./morpheus/ci/scripts/github/conda_core.sh $SCRIPT_ARGS - - package: - name: Package All - if: ${{ inputs.conda_run_build }} - needs: [check, documentation, test] - runs-on: linux-amd64-cpu16 - timeout-minutes: 60 - container: - credentials: - username: '$oauthtoken' - password: ${{ secrets.NGC_API_KEY }} - image: ${{ inputs.container }} - strategy: - fail-fast: true - - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - lfs: false - path: 'morpheus' - fetch-depth: 0 - submodules: 'recursive' - - - name: Get AWS credentials using OIDC - uses: aws-actions/configure-aws-credentials@v1-node16 - with: - role-to-assume: ${{ vars.AWS_ROLE_ARN }} - aws-region: ${{ vars.AWS_REGION }} - role-duration-seconds: 43200 # 12h - - - name: conda - shell: bash - run: ./morpheus/ci/scripts/github/conda.sh + run: ./morpheus/ci/scripts/github/conda_libs.sh $SCRIPT_ARGS diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 097d3c9e69..07ec5ca9e9 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -68,7 +68,6 @@ jobs: is_main_branch: ${{ github.ref_name == 'main' }} is_dev_branch: ${{ startsWith(github.ref_name, 'branch-') }} has_conda_build_label: ${{ steps.get-pr-info.outcome == 'success' && contains(fromJSON(steps.get-pr-info.outputs.pr-info).labels.*.name, 'conda-build') || false }} - has_conda_core_build_label: ${{ steps.get-pr-info.outcome == 'success' && contains(fromJSON(steps.get-pr-info.outputs.pr-info).labels.*.name, 'conda-core-build') || false }} has_skip_ci_label: ${{ steps.get-pr-info.outcome == 'success' && contains(fromJSON(steps.get-pr-info.outputs.pr-info).labels.*.name, 'skip-ci') || false }} pr_info: ${{ steps.get-pr-info.outcome == 'success' && steps.get-pr-info.outputs.pr-info || '' }} @@ -90,14 +89,11 @@ jobs: with: # Run checks for any PR branch run_check: ${{ fromJSON(needs.prepare.outputs.is_pr) }} - # Build morpheus-core conda package. This is done for main/dev branches and - # for PRs with the conda-core-build label - conda_core_run_build: ${{ !fromJSON(needs.prepare.outputs.is_pr) || fromJSON(needs.prepare.outputs.has_conda_core_build_label) }} - # Upload morpheus-core conda package only for non PR branches. Use 'main' for main branch and 'dev' for all other branches - conda_upload_label: ${{ !fromJSON(needs.prepare.outputs.is_pr) && (fromJSON(needs.prepare.outputs.is_main_branch) && 'main' || 'dev') || '' }} - # Run morpheus conda package, with all components. This is done for main/dev - # branches and for PRs with the conda-build label. + # Build conda packages for all the morpheus libraries - core, dfp, llm. This is + # done for main/dev branches and for PRs with the conda-build label conda_run_build: ${{ !fromJSON(needs.prepare.outputs.is_pr) || fromJSON(needs.prepare.outputs.has_conda_build_label) }} + # Upload morpheus conda packages only for non PR branches. Use 'main' for main branch and 'dev' for all other branches + conda_upload_label: ${{ !fromJSON(needs.prepare.outputs.is_pr) && (fromJSON(needs.prepare.outputs.is_main_branch) && 'main' || 'dev') || '' }} container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-build-240614 test_container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-test-240614 secrets: diff --git a/CMakeLists.txt b/CMakeLists.txt index a295b5ac5d..d50f872e85 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,7 +22,9 @@ option(BUILD_SHARED_LIBS "Default value for whether or not to build shared or st option(MORPHEUS_BUILD_BENCHMARKS "Whether or not to build benchmarks" OFF) option(MORPHEUS_BUILD_DOCS "Enable building of API documentation" OFF) option(MORPHEUS_BUILD_EXAMPLES "Whether or not to build examples" OFF) -option(MORPHEUS_BUILD_MORPHEUS_LLM "Whether or not to build morpheus_llm" OFF) +option(MORPHEUS_BUILD_MORPHEUS_CORE "Whether or not to build morpheus_core" ON) +option(MORPHEUS_BUILD_MORPHEUS_DFP "Whether or not to build morpheus_dfp" ON) +option(MORPHEUS_BUILD_MORPHEUS_LLM "Whether or not to build morpheus_llm" ON) option(MORPHEUS_BUILD_TESTS "Whether or not to build tests" OFF) option(MORPHEUS_ENABLE_DEBUG_INFO "Enable printing debug information" OFF) option(MORPHEUS_PYTHON_BUILD_STUBS "Whether or not to generated .pyi stub files for C++ Python modules. Disable to avoid requiring loading the NVIDIA GPU Driver during build" ON) diff --git a/ci/conda/recipes/morpheus-core/morpheus_core_build.sh b/ci/conda/recipes/morpheus-libs/cmake_common.sh similarity index 61% rename from ci/conda/recipes/morpheus-core/morpheus_core_build.sh rename to ci/conda/recipes/morpheus-libs/cmake_common.sh index 5ef4920b9e..dcfa69b13f 100644 --- a/ci/conda/recipes/morpheus-core/morpheus_core_build.sh +++ b/ci/conda/recipes/morpheus-libs/cmake_common.sh @@ -1,3 +1,4 @@ + # SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # @@ -16,26 +17,10 @@ # It is assumed that this script is executed from the root of the repo directory by conda-build # (https://conda-forge.org/docs/maintainer/knowledge_base.html#using-cmake) -# Need to ensure this value is set before checking it in the if block -MORPHEUS_SUPPORT_DOCA=-OFF -MORPHEUS_BUILD_MORPHEUS_LLM=-OFF - # This will store all of the cmake args. Make sure to prepend args to allow # incoming values to overwrite them -CMAKE_ARGS=${CMAKE_ARGS:-""} - -export CCACHE_BASEDIR=$(realpath ${SRC_DIR}/..) -export USE_SCCACHE=${USE_SCCACHE:-""} - -# Check for some mrc environment variables. Append to front of args to allow users to overwrite them -if [[ -n "${MORPHEUS_CACHE_DIR}" ]]; then - # Set the cache variable, then set the Staging prefix to allow for host searching - CMAKE_ARGS="-DMORPHEUS_CACHE_DIR=${MORPHEUS_CACHE_DIR} ${CMAKE_ARGS}" - - # Double check that the cache dir has been created - mkdir -p ${MORPHEUS_CACHE_DIR} -fi +# CMake flags common across all libraries CMAKE_ARGS="-DCMAKE_MESSAGE_CONTEXT_SHOW=ON ${CMAKE_ARGS}" CMAKE_ARGS="-DCMAKE_INSTALL_PREFIX=$PREFIX ${CMAKE_ARGS}" CMAKE_ARGS="-DCMAKE_INSTALL_LIBDIR=lib ${CMAKE_ARGS}" @@ -51,38 +36,12 @@ CMAKE_ARGS="-DPython_EXECUTABLE=${PYTHON} ${CMAKE_ARGS}" CMAKE_ARGS="-DPYTHON_EXECUTABLE=${PYTHON} ${CMAKE_ARGS}" # for pybind11 CMAKE_ARGS="--log-level=VERBOSE ${CMAKE_ARGS}" -if [[ "${USE_SCCACHE}" == "1" ]]; then - CMAKE_ARGS="-DCCACHE_PROGRAM_PATH=$(which sccache) ${CMAKE_ARGS}" -fi - -echo "CC : ${CC}" -echo "CXX : ${CXX}" -echo "CUDAHOSTCXX : ${CUDAHOSTCXX}" -echo "CUDA : ${CUDA}" -echo "CMAKE_ARGS : ${CMAKE_ARGS}" - -echo "========Begin Env========" -env -echo "========End Env========" - -BUILD_DIR="build-conda" +# Append to front of args to allow users to overwrite them +if [[ -n "${MORPHEUS_CACHE_DIR}" ]]; then + # Set the cache variable, then set the Staging prefix to allow for host searching + CMAKE_ARGS="-DMORPHEUS_CACHE_DIR=${MORPHEUS_CACHE_DIR} ${CMAKE_ARGS}" -# Check if the build directory already exists. And if so, delete the -# CMakeCache.txt and CMakeFiles to ensure a clean configuration -if [[ -d "./${BUILD_DIR}" ]]; then - echo "Deleting old CMake files at ./${BUILD_DIR}" - rm -rf "./${BUILD_DIR}/CMakeCache.txt" - rm -rf "./${BUILD_DIR}/CMakeFiles" + # Double check that the cache dir has been created + mkdir -p ${MORPHEUS_CACHE_DIR} fi -# Run configure -cmake -B ${BUILD_DIR} \ - ${CMAKE_ARGS} \ - --log-level=verbose \ - . - -# Build the components -cmake --build ${BUILD_DIR} -j${PARALLEL_LEVEL:-$(nproc)} --target install - -# Install just the mprpheus core python wheel components -${PYTHON} -m pip install -vv ${BUILD_DIR}/python/morpheus/dist/*.whl diff --git a/ci/conda/recipes/morpheus-core/conda_build_config.yaml b/ci/conda/recipes/morpheus-libs/conda_build_config.yaml similarity index 100% rename from ci/conda/recipes/morpheus-core/conda_build_config.yaml rename to ci/conda/recipes/morpheus-libs/conda_build_config.yaml diff --git a/ci/conda/recipes/morpheus-core/meta.yaml b/ci/conda/recipes/morpheus-libs/meta.yaml similarity index 63% rename from ci/conda/recipes/morpheus-core/meta.yaml rename to ci/conda/recipes/morpheus-libs/meta.yaml index f72004b95a..17db27a5dd 100644 --- a/ci/conda/recipes/morpheus-core/meta.yaml +++ b/ci/conda/recipes/morpheus-libs/meta.yaml @@ -13,19 +13,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +# This recipe splits into packages - morpheus-core, morpheus-dfp and morpheus-llm {% set version = environ.get('GIT_VERSION', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} package: - name: morpheus-core + name: morpheus-split version: {{ version }} source: git_url: ../../../.. outputs: - + ############################### morpheus-core ############################# - name: morpheus-core + type: conda_v2 build: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda_{{ cuda_compiler_version }}_py{{ python }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} @@ -35,8 +37,6 @@ outputs: - CMAKE_CUDA_ARCHITECTURES - MORPHEUS_CACHE_DIR - MORPHEUS_PYTHON_BUILD_STUBS - - MORPHEUS_SUPPORT_DOCA - - MORPHEUS_BUILD_MORPHEUS_LLM - PARALLEL_LEVEL run_exports: - {{ pin_subpackage("morpheus-core", max_pin="x.x") }} @@ -47,14 +47,12 @@ outputs: - {{ compiler("c") }} - {{ compiler("cuda") }} - {{ compiler("cxx") }} - - automake =1.16.5 # Needed for DOCA build - ccache - cmake =3.27 - cuda-cudart-dev {{ cuda_compiler_version }}.* # Needed by CMake to compile a test application - cuda-version {{ cuda_compiler_version }}.* - - libtool # Needed for DOCA build - ninja =1.11 - - pkg-config =0.29 # for mrc cmake + - pkg-config =0.29 - sysroot_linux-64 =2.17 host: # CUDA dependencies @@ -130,11 +128,82 @@ outputs: imports: - morpheus commands: - - echo # make sure test requirements get installed + - echo # pytest will be added post re-factoring + + ############################### morpheus-dfp ############################# + - name: morpheus-dfp + type: conda_v2 + build: + number: {{ GIT_DESCRIBE_NUMBER }} + string: cuda_{{ cuda_compiler_version }}_py{{ python }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + script_env: + - CMAKE_CUDA_ARCHITECTURES + - MORPHEUS_CACHE_DIR + - MORPHEUS_PYTHON_BUILD_STUBS + - PARALLEL_LEVEL + run_exports: + - {{ pin_subpackage("morpheus-dfp", max_pin="x.x") }} + script: morpheus_dfp_build.sh + + requirements: + build: + - ccache + - cmake =3.27 + host: + - {{ pin_subpackage('morpheus-core', exact=True) }} + - pip + - python {{ python }} + - scikit-build 0.17.6 + - versioneer-518 + run: + - {{ pin_subpackage('morpheus-core', exact=True) }} + + #test: Tests will be added post test refactoring + + ############################### morpheus-llm ############################# + - name: morpheus-llm + type: conda_v2 + build: + number: {{ GIT_DESCRIBE_NUMBER }} + string: cuda_{{ cuda_compiler_version }}_py{{ python }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + script_env: + - CMAKE_CUDA_ARCHITECTURES + - MORPHEUS_CACHE_DIR + - MORPHEUS_PYTHON_BUILD_STUBS + - PARALLEL_LEVEL + run_exports: + - {{ pin_subpackage("morpheus-llm", max_pin="x.x") }} + script: morpheus_llm_build.sh + + requirements: + build: + - {{ compiler("c") }} + - {{ compiler("cxx") }} + - ccache + - cmake =3.27 + - ninja =1.11 + - pkg-config =0.29 + host: + # morpheus-core has to be at the top. changing that order will result in different + # package versions getting installed creating unexpected version conflicts. + - {{ pin_subpackage('morpheus-core', exact=True) }} + - cython 3.0.* + - glog 0.6.* + - pip + - pybind11-stubgen 0.10.5 + - python {{ python }} + - rapidjson 1.1.0 + - scikit-build 0.17.6 + - versioneer-518 + - zlib 1.2.13 # required to build triton client + run: + - {{ pin_subpackage('morpheus-core', exact=True) }} + #test: Tests will be added post test refactoring + about: home: https://github.com/nv-morpheus/Morpheus license: Apache-2.0 license_family: Apache license_file: LICENSE - summary: Morpheus Cybersecurity Core Library + summary: Morpheus Cybersecurity Library diff --git a/ci/conda/recipes/morpheus-libs/morpheus_core_build.sh b/ci/conda/recipes/morpheus-libs/morpheus_core_build.sh new file mode 100644 index 0000000000..6f9caf3314 --- /dev/null +++ b/ci/conda/recipes/morpheus-libs/morpheus_core_build.sh @@ -0,0 +1,77 @@ +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# It is assumed that this script is executed from the root of the repo directory by conda-build +# (https://conda-forge.org/docs/maintainer/knowledge_base.html#using-cmake) + +# This will store all of the cmake args. Make sure to prepend args to allow +# incoming values to overwrite them + +source $RECIPE_DIR/cmake_common.sh + +CMAKE_ARGS=${CMAKE_ARGS:-""} + +export CCACHE_BASEDIR=$(realpath ${SRC_DIR}/..) +export USE_SCCACHE=${USE_SCCACHE:-""} + +if [[ -n "${MORPHEUS_CACHE_DIR}" ]]; then + # Set the cache variable, then set the Staging prefix to allow for host searching + CMAKE_ARGS="-DMORPHEUS_CACHE_DIR=${MORPHEUS_CACHE_DIR} ${CMAKE_ARGS}" + + # Double check that the cache dir has been created + mkdir -p ${MORPHEUS_CACHE_DIR} +fi + +# Enable core. Core is enabled by default and this is to just highlight that it is on +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_CORE=ON ${CMAKE_ARGS}" + +# Disable dfp, llm and doca +CMAKE_ARGS="-DMORPHEUS_SUPPORT_DOCA=OFF ${CMAKE_ARGS}" +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_DFP=OFF ${CMAKE_ARGS}" +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_LLM=OFF ${CMAKE_ARGS}" + +if [[ "${USE_SCCACHE}" == "1" ]]; then + CMAKE_ARGS="-DCCACHE_PROGRAM_PATH=$(which sccache) ${CMAKE_ARGS}" +fi + +echo "CC : ${CC}" +echo "CXX : ${CXX}" +echo "CUDAHOSTCXX : ${CUDAHOSTCXX}" +echo "CUDA : ${CUDA}" +echo "CMAKE_ARGS : ${CMAKE_ARGS}" + +echo "========Begin Env========" +env +echo "========End Env========" + +BUILD_DIR="build-conda-core" + +# remove the old build directory +if [[ -d "./${BUILD_DIR}" ]]; then + echo "Deleting old build dir at ./${BUILD_DIR}" + rm -rf "./${BUILD_DIR}/" +fi + +# Run configure +cmake -B ${BUILD_DIR} \ + ${CMAKE_ARGS} \ + --log-level=verbose \ + . + +# Build the components +cmake --build ${BUILD_DIR} -j${PARALLEL_LEVEL:-$(nproc)} --target install + +# Install just the morpheus core python wheel components +${PYTHON} -m pip install -vv ${BUILD_DIR}/python/morpheus/dist/*.whl diff --git a/ci/conda/recipes/morpheus-libs/morpheus_dfp_build.sh b/ci/conda/recipes/morpheus-libs/morpheus_dfp_build.sh new file mode 100644 index 0000000000..cc837f80b9 --- /dev/null +++ b/ci/conda/recipes/morpheus-libs/morpheus_dfp_build.sh @@ -0,0 +1,60 @@ +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# It is assumed that this script is executed from the root of the repo directory by conda-build +# (https://conda-forge.org/docs/maintainer/knowledge_base.html#using-cmake) + +# This will store all of the cmake args. Make sure to prepend args to allow +# incoming values to overwrite them + +source $RECIPE_DIR/cmake_common.sh + +CMAKE_ARGS=${CMAKE_ARGS:-""} + +export CCACHE_BASEDIR=$(realpath ${SRC_DIR}/..) +export USE_SCCACHE=${USE_SCCACHE:-""} + +# Enable DFP +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_DFP=ON ${CMAKE_ARGS}" + +# Disable core, llm and doca +CMAKE_ARGS="-DMORPHEUS_SUPPORT_DOCA=OFF ${CMAKE_ARGS}" +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_CORE=OFF ${CMAKE_ARGS}" +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_LLM=OFF ${CMAKE_ARGS}" + + +if [[ "${USE_SCCACHE}" == "1" ]]; then + CMAKE_ARGS="-DCCACHE_PROGRAM_PATH=$(which sccache) ${CMAKE_ARGS}" +fi + +BUILD_DIR="build-conda-dfp" + +# remove the old build directory +if [[ -d "./${BUILD_DIR}" ]]; then + echo "Deleting old build dir at ./${BUILD_DIR}" + rm -rf "./${BUILD_DIR}/" +fi + +# Run configure +cmake -B ${BUILD_DIR} \ + ${CMAKE_ARGS} \ + --log-level=verbose \ + . + +# Build the components +cmake --build ${BUILD_DIR} -j${PARALLEL_LEVEL:-$(nproc)} --target install + +# Install the mprpheus dfp python wheel components +${PYTHON} -m pip install -vv ${BUILD_DIR}/python/morpheus_dfp/dist/*.whl diff --git a/ci/conda/recipes/morpheus-libs/morpheus_llm_build.sh b/ci/conda/recipes/morpheus-libs/morpheus_llm_build.sh new file mode 100644 index 0000000000..d29ea0e396 --- /dev/null +++ b/ci/conda/recipes/morpheus-libs/morpheus_llm_build.sh @@ -0,0 +1,60 @@ +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# It is assumed that this script is executed from the root of the repo directory by conda-build +# (https://conda-forge.org/docs/maintainer/knowledge_base.html#using-cmake) + +# This will store all of the cmake args. Make sure to prepend args to allow +# incoming values to overwrite them + +source $RECIPE_DIR/cmake_common.sh + +CMAKE_ARGS=${CMAKE_ARGS:-""} + +export CCACHE_BASEDIR=$(realpath ${SRC_DIR}/..) +export USE_SCCACHE=${USE_SCCACHE:-""} + +# Enable llm +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_LLM=ON ${CMAKE_ARGS}" + +# Disable core, dfp and doca +CMAKE_ARGS="-DMORPHEUS_SUPPORT_DOCA=OFF ${CMAKE_ARGS}" +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_CORE=OFF ${CMAKE_ARGS}" +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_DFP=OFF ${CMAKE_ARGS}" + +if [[ "${USE_SCCACHE}" == "1" ]]; then + CMAKE_ARGS="-DCCACHE_PROGRAM_PATH=$(which sccache) ${CMAKE_ARGS}" +fi + +BUILD_DIR="build-conda-llm" + +# remove the old build directory +if [[ -d "./${BUILD_DIR}" ]]; then + echo "Deleting old build dir at ./${BUILD_DIR}" + rm -rf "./${BUILD_DIR}/" +fi + + +# Run configure +cmake -B ${BUILD_DIR} \ + ${CMAKE_ARGS} \ + --log-level=verbose \ + . + +# Build the components +cmake --build ${BUILD_DIR} -j${PARALLEL_LEVEL:-$(nproc)} --target install + +# Install just the morpheus llm python wheel components +${PYTHON} -m pip install -vv ${BUILD_DIR}/python/morpheus_llm/dist/*.whl diff --git a/ci/conda/recipes/morpheus/meta.yaml b/ci/conda/recipes/morpheus/meta.yaml index 00b15fe488..fc55e54f30 100644 --- a/ci/conda/recipes/morpheus/meta.yaml +++ b/ci/conda/recipes/morpheus/meta.yaml @@ -36,7 +36,6 @@ outputs: - MORPHEUS_CACHE_DIR - MORPHEUS_PYTHON_BUILD_STUBS - MORPHEUS_SUPPORT_DOCA - - MORPHEUS_BUILD_MORPHEUS_LLM - PARALLEL_LEVEL run_exports: - {{ pin_subpackage("morpheus", max_pin="x.x") }} diff --git a/ci/conda/recipes/morpheus/morpheus_build.sh b/ci/conda/recipes/morpheus/morpheus_build.sh index 723559b4af..ad5771566c 100644 --- a/ci/conda/recipes/morpheus/morpheus_build.sh +++ b/ci/conda/recipes/morpheus/morpheus_build.sh @@ -18,7 +18,6 @@ # Need to ensure this value is set before checking it in the if block MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-OFF} -MORPHEUS_BUILD_MORPHEUS_LLM=${MORPHEUS_BUILD_MORPHEUS_LLM:-OFF} # This will store all of the cmake args. Make sure to prepend args to allow # incoming values to overwrite them @@ -45,9 +44,10 @@ if [[ ${MORPHEUS_SUPPORT_DOCA} == @(TRUE|ON) ]]; then echo "MORPHEUS_SUPPORT_DOCA is ON. Setting CMAKE_CUDA_ARCHITECTURES to supported values: '${CMAKE_CUDA_ARCHITECTURES}'" fi -if [[ ${MORPHEUS_BUILD_MORPHEUS_LLM} == @(TRUE|ON) ]]; then - CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_LLM=ON ${CMAKE_ARGS}" -fi +# enable all functional blocks +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_CORE=ON ${CMAKE_ARGS}" +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_LLM=ON ${CMAKE_ARGS}" +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_DFP=ON ${CMAKE_ARGS}" CMAKE_ARGS="-DCMAKE_MESSAGE_CONTEXT_SHOW=ON ${CMAKE_ARGS}" CMAKE_ARGS="-DCMAKE_INSTALL_PREFIX=$PREFIX ${CMAKE_ARGS}" @@ -100,3 +100,4 @@ cmake --build ${BUILD_DIR} -j${PARALLEL_LEVEL:-$(nproc)} --target install # Install just the python wheel components ${PYTHON} -m pip install -vv ${BUILD_DIR}/python/morpheus/dist/*.whl ${PYTHON} -m pip install -vv ${BUILD_DIR}/python/morpheus_llm/dist/*.whl +${PYTHON} -m pip install -vv ${BUILD_DIR}/python/morpheus_dfp/dist/*.whl diff --git a/ci/conda/recipes/run_conda_build.sh b/ci/conda/recipes/run_conda_build.sh index 3f96d70736..20eb9092ea 100755 --- a/ci/conda/recipes/run_conda_build.sh +++ b/ci/conda/recipes/run_conda_build.sh @@ -126,7 +126,7 @@ CONDA_ARGS_ARRAY+=("-c" "${CONDA_CHANNEL_ALIAS:+"${CONDA_CHANNEL_ALIAS%/}/"}defa if [[ ${NUMARGS} == 0 ]]; then echo -e "${r}ERROR: No arguments were provided. Please provide at least one package to build. Available packages:${x}" echo -e "${r} morpheus${x}" - echo -e "${r} morpheus-core${x}" + echo -e "${r} morpheus-libs${x}" echo -e "${r} pydebug${x}" echo -e "${r}Exiting...${x}" exit 12 @@ -134,7 +134,6 @@ fi if hasArg morpheus; then export MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-OFF} - export MORPHEUS_BUILD_MORPHEUS_LLM=${MORPHEUS_BUILD_MORPHEUS_LLM:-ON} # Set GIT_VERSION to set the project version inside of meta.yaml export GIT_VERSION="$(get_version)" @@ -144,15 +143,13 @@ if hasArg morpheus; then set +x fi -if hasArg morpheus-core; then - export MORPHEUS_SUPPORT_DOCA=-OFF - export MORPHEUS_BUILD_MORPHEUS_LLM=-OFF +if hasArg morpheus-libs; then # Set GIT_VERSION to set the project version inside of meta.yaml export GIT_VERSION="$(get_version)" - echo "Running conda-build for morpheus-core v${GIT_VERSION}..." + echo "Running conda-build for morpheus libraries v${GIT_VERSION}..." set -x - conda ${CONDA_COMMAND} "${CONDA_ARGS_ARRAY[@]}" ${CONDA_ARGS} ci/conda/recipes/morpheus-core + conda ${CONDA_COMMAND} "${CONDA_ARGS_ARRAY[@]}" ${CONDA_ARGS} ci/conda/recipes/morpheus-libs set +x fi diff --git a/ci/runner/Dockerfile b/ci/runner/Dockerfile index ed150d0657..40b035c402 100644 --- a/ci/runner/Dockerfile +++ b/ci/runner/Dockerfile @@ -60,7 +60,6 @@ RUN rapids-dependency-file-generator \ rm -rf /tmp/conda ENV MORPHEUS_SUPPORT_DOCA=ON -ENV MORPHEUS_BUILD_MORPHEUS_LLM=ON COPY ./.devcontainer/docker/optional_deps/doca.sh /tmp/doca/ diff --git a/ci/scripts/github/build.sh b/ci/scripts/github/build.sh index 7babe4a40a..8f75893967 100755 --- a/ci/scripts/github/build.sh +++ b/ci/scripts/github/build.sh @@ -42,7 +42,7 @@ cmake --build ${BUILD_DIR} --parallel ${PARALLEL_LEVEL} log_sccache_stats rapids-logger "Archiving results" -tar cfj "${WORKSPACE_TMP}/wheel.tar.bz" ${BUILD_DIR}/python/morpheus/dist ${BUILD_DIR}/python/morpheus_llm/dist +tar cfj "${WORKSPACE_TMP}/wheel.tar.bz" ${BUILD_DIR}/python/morpheus/dist ${BUILD_DIR}/python/morpheus_llm/dist ${BUILD_DIR}/python/morpheus_dfp/dist MORPHEUS_LIBS=($(find ${MORPHEUS_ROOT}/${BUILD_DIR}/python/morpheus/morpheus/_lib -name "*.so" -exec realpath --relative-to ${MORPHEUS_ROOT} {} \;) \ $(find ${MORPHEUS_ROOT}/${BUILD_DIR}/python/morpheus_llm/morpheus_llm/_lib -name "*.so" -exec realpath --relative-to ${MORPHEUS_ROOT} {} \;) \ diff --git a/ci/scripts/github/checks.sh b/ci/scripts/github/checks.sh index 6b6e579feb..a00afe718f 100755 --- a/ci/scripts/github/checks.sh +++ b/ci/scripts/github/checks.sh @@ -50,6 +50,7 @@ log_sccache_stats rapids-logger "Installing Morpheus" pip install ./python/morpheus pip install ./python/morpheus_llm +pip install ./python/morpheus_dfp rapids-logger "Checking copyright headers" python ${MORPHEUS_ROOT}/ci/scripts/copyright.py --verify-apache-v2 --git-diff-commits ${CHANGE_TARGET} ${GIT_COMMIT} diff --git a/ci/scripts/github/cmake_all.sh b/ci/scripts/github/cmake_all.sh index 86b0d65dcd..0e378a505a 100644 --- a/ci/scripts/github/cmake_all.sh +++ b/ci/scripts/github/cmake_all.sh @@ -27,6 +27,7 @@ _FLAGS+=("-DMORPHEUS_BUILD_BENCHMARKS=ON") _FLAGS+=("-DMORPHEUS_BUILD_EXAMPLES=ON") _FLAGS+=("-DMORPHEUS_BUILD_TESTS=ON") _FLAGS+=("-DMORPHEUS_BUILD_MORPHEUS_LLM=ON") +_FLAGS+=("-DMORPHEUS_BUILD_MORPHEUS_DFP=ON") if [[ "${LOCAL_CI}" == "" ]]; then _FLAGS+=("-DCCACHE_PROGRAM_PATH=$(which sccache)") fi diff --git a/ci/scripts/github/cmake_core.sh b/ci/scripts/github/cmake_core.sh deleted file mode 100644 index 20d250e45c..0000000000 --- a/ci/scripts/github/cmake_core.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -_FLAGS=() -_FLAGS+=("-B" "${BUILD_DIR}") -_FLAGS+=("-G" "Ninja") -_FLAGS+=("-DCMAKE_MESSAGE_CONTEXT_SHOW=ON") -_FLAGS+=("-DMORPHEUS_CUDA_ARCHITECTURES=RAPIDS") -_FLAGS+=("-DMORPHEUS_USE_CONDA=ON") -_FLAGS+=("-DMORPHEUS_USE_CCACHE=ON") -_FLAGS+=("-DMORPHEUS_PYTHON_INPLACE_BUILD=OFF") -_FLAGS+=("-DMORPHEUS_PYTHON_BUILD_STUBS=ON") -_FLAGS+=("-DMORPHEUS_BUILD_BENCHMARKS=OFF") -_FLAGS+=("-DMORPHEUS_BUILD_EXAMPLES=OFF") -_FLAGS+=("-DMORPHEUS_BUILD_TESTS=OFF") -_FLAGS+=("-DMORPHEUS_BUILD_MORPHEUS_LLM=OFF") -_FLAGS+=("-DMORPHEUS_SUPPORT_DOCA=OFF") -if [[ "${LOCAL_CI}" == "" ]]; then - _FLAGS+=("-DCCACHE_PROGRAM_PATH=$(which sccache)") -fi -export CMAKE_BUILD_ALL_FEATURES="${_FLAGS[@]}" -unset _FLAGS diff --git a/ci/scripts/github/conda_core.sh b/ci/scripts/github/conda_libs.sh similarity index 81% rename from ci/scripts/github/conda_core.sh rename to ci/scripts/github/conda_libs.sh index b2006b6162..7969a30c83 100755 --- a/ci/scripts/github/conda_core.sh +++ b/ci/scripts/github/conda_libs.sh @@ -18,7 +18,6 @@ set -e CI_SCRIPT_ARGS="$@" source ${WORKSPACE}/ci/scripts/github/common.sh -source ${WORKSPACE}/ci/scripts/github/cmake_core.sh cd ${MORPHEUS_ROOT} @@ -42,21 +41,21 @@ fi # Print the info just to be sure base is active conda info -rapids-logger "Building Conda Package morpheus-core" +rapids-logger "Building Morpheus Libraries" # Run the conda build, and upload to conda forge if requested export MORPHEUS_PYTHON_BUILD_STUBS=OFF export CONDA_ARGS="--skip-existing" -${MORPHEUS_ROOT}/ci/conda/recipes/run_conda_build.sh morpheus-core "${CI_SCRIPT_ARGS}" +${MORPHEUS_ROOT}/ci/conda/recipes/run_conda_build.sh morpheus-libs "${CI_SCRIPT_ARGS}" # If we didn't receive the upload argument, upload the artifact to S3 if [[ " ${CI_SCRIPT_ARGS} " =~ " upload " ]]; then - rapids-logger "Building Conda Package morpheus-core... Done" + rapids-logger "Building Morpheus Libraries... Done" else # if we didn't receive the upload argument, we can still upload the artifact to S3 - tar cfj "${WORKSPACE_TMP}/conda_morpheus_core.tar.bz" "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" + tar cfj "${WORKSPACE_TMP}/conda_libs.tar.bz" "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" ls -lh ${WORKSPACE_TMP}/ rapids-logger "Pushing results to ${DISPLAY_ARTIFACT_URL}/" - upload_artifact "${WORKSPACE_TMP}/conda_morpheus_core.tar.bz" + upload_artifact "${WORKSPACE_TMP}/conda_libs.tar.bz" fi diff --git a/ci/scripts/github/docs.sh b/ci/scripts/github/docs.sh index a0e2057fe8..441c8495b0 100755 --- a/ci/scripts/github/docs.sh +++ b/ci/scripts/github/docs.sh @@ -32,6 +32,7 @@ tar xf "${WORKSPACE_TMP}/wheel.tar.bz" pip install ${MORPHEUS_ROOT}/${BUILD_DIR}/python/morpheus/dist/*.whl pip install ${MORPHEUS_ROOT}/${BUILD_DIR}/python/morpheus_llm/dist/*.whl +pip install ${MORPHEUS_ROOT}/${BUILD_DIR}/python/morpheus_dfp/dist/*.whl rapids-logger "Pulling LFS assets" cd ${MORPHEUS_ROOT} diff --git a/ci/scripts/run_ci_local.sh b/ci/scripts/run_ci_local.sh index 8f7df827da..ce4311b9a6 100755 --- a/ci/scripts/run_ci_local.sh +++ b/ci/scripts/run_ci_local.sh @@ -21,13 +21,13 @@ case "$1" in STAGES=("bash") ;; "all" ) - STAGES=("checks" "build" "docs" "test" "conda_core" "conda") + STAGES=("checks" "build" "docs" "test" "conda_libs" "conda") ;; - "checks" | "build" | "docs" | "test" | "conda_core" | "conda" | "bash" ) + "checks" | "build" | "docs" | "test" | "conda_libs" | "conda" | "bash" ) STAGES=("$1") ;; * ) - echo "Error: Invalid argument \"$1\" provided. Expected values: \"all\", \"checks\", \"build\", \"docs\", \"test\", \"conda_core\", \"conda\", or \"bash\"" + echo "Error: Invalid argument \"$1\" provided. Expected values: \"all\", \"checks\", \"build\", \"docs\", \"test\", \"conda_libs\", \"conda\", or \"bash\"" exit 1 ;; esac diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index 2b59d30593..db88a38401 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -75,36 +75,13 @@ if(MORPHEUS_BUILD_TESTS) ) endif() -# cccl -- get an explicit cccl build, matx tries to pull a tag that doesn't exist. -# ========= -morpheus_utils_configure_cccl() - -# matx -# ==== -morpheus_utils_configure_matx() - -# pybind11 -# ========= -morpheus_utils_configure_pybind11() - -# RD-Kafka -# ===== -morpheus_utils_configure_rdkafka() - -# RxCpp -# ===== -morpheus_utils_configure_rxcpp() - -# MRC (Should come after all third party but before NVIDIA repos) -# ===== -morpheus_utils_configure_mrc() - -# CuDF -# ===== -morpheus_utils_configure_cudf() +# Include dependencies based on components being built +if(MORPHEUS_BUILD_MORPHEUS_CORE) + include(dependencies_core) +endif() -# Triton-client -# ===== -morpheus_utils_configure_tritonclient() +if(MORPHEUS_BUILD_MORPHEUS_LLM) + include(dependencies_llm) +endif() list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/cmake/dependencies_core.cmake b/cmake/dependencies_core.cmake new file mode 100644 index 0000000000..b8c5457a69 --- /dev/null +++ b/cmake/dependencies_core.cmake @@ -0,0 +1,50 @@ +# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +list(APPEND CMAKE_MESSAGE_CONTEXT "dep_core") + +# cccl -- get an explicit cccl build, matx tries to pull a tag that doesn't exist. +# ========= +morpheus_utils_configure_cccl() + +# matx +# ==== +morpheus_utils_configure_matx() + +# pybind11 +# ========= +morpheus_utils_configure_pybind11() + +# RD-Kafka +# ===== +morpheus_utils_configure_rdkafka() + +# RxCpp +# ===== +morpheus_utils_configure_rxcpp() + +# MRC (Should come after all third party but before NVIDIA repos) +# ===== +morpheus_utils_configure_mrc() + +# CuDF +# ===== +morpheus_utils_configure_cudf() + +# Triton-client +# ===== +morpheus_utils_configure_tritonclient() + +list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/cmake/dependencies_llm.cmake b/cmake/dependencies_llm.cmake new file mode 100644 index 0000000000..fc1f062ec9 --- /dev/null +++ b/cmake/dependencies_llm.cmake @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +list(APPEND CMAKE_MESSAGE_CONTEXT "dep_llm") + +# pybind11 +# ========= +morpheus_utils_configure_pybind11() + +# RD-Kafka +# ===== +morpheus_utils_configure_rdkafka() + +# MRC (Should come after all third party but before NVIDIA repos) +# ===== +morpheus_utils_configure_mrc() + +# CuDF +morpheus_utils_configure_cudf() + +# Triton-client +# ===== +morpheus_utils_configure_tritonclient() + +list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/docker/build_container.sh b/docker/build_container.sh index ca7f7cad78..85877583f7 100755 --- a/docker/build_container.sh +++ b/docker/build_container.sh @@ -36,6 +36,7 @@ LINUX_DISTRO=${LINUX_DISTRO:-ubuntu} LINUX_VER=${LINUX_VER:-22.04} MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-"OFF"} MORPHEUS_BUILD_MORPHEUS_LLM=${MORPHEUS_BUILD_MORPHEUS_LLM:-"ON"} +MORPHEUS_BUILD_MORPHEUS_DFP=${MORPHEUS_BUILD_MORPHEUS_DFP:-"ON"} PYTHON_VER=${PYTHON_VER:-3.10} # Determine the relative path from $PWD to $MORPHEUS_ROOT @@ -53,6 +54,7 @@ DOCKER_ARGS="${DOCKER_ARGS} --build-arg LINUX_VER=${LINUX_VER}" DOCKER_ARGS="${DOCKER_ARGS} --build-arg MORPHEUS_ROOT_HOST=${MORPHEUS_ROOT_HOST}" DOCKER_ARGS="${DOCKER_ARGS} --build-arg MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA}" DOCKER_ARGS="${DOCKER_ARGS} --build-arg MORPHEUS_BUILD_MORPHEUS_LLM=${MORPHEUS_BUILD_MORPHEUS_LLM}" +DOCKER_ARGS="${DOCKER_ARGS} --build-arg MORPHEUS_BUILD_MORPHEUS_DFP=${MORPHEUS_BUILD_MORPHEUS_DFP}" DOCKER_ARGS="${DOCKER_ARGS} --build-arg PYTHON_VER=${PYTHON_VER}" DOCKER_ARGS="${DOCKER_ARGS} --network=host" diff --git a/docker/run_container_release.sh b/docker/run_container_release.sh index a97102380b..5882b4115e 100755 --- a/docker/run_container_release.sh +++ b/docker/run_container_release.sh @@ -29,6 +29,7 @@ pushd ${SCRIPT_DIR} &> /dev/null MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-OFF} MORPHEUS_BUILD_MORPHEUS_LLM=${MORPHEUS_BUILD_MORPHEUS_LLM:-ON} +MORPHEUS_BUILD_MORPHEUS_DFP=${MORPHEUS_BUILD_MORPHEUS_DFP:-ON} DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME:-"nvcr.io/nvidia/morpheus/morpheus"} DOCKER_IMAGE_TAG=${DOCKER_IMAGE_TAG:-"$(git describe --tags --abbrev=0)-runtime"} diff --git a/docs/source/developer_guide/contributing.md b/docs/source/developer_guide/contributing.md index 22bca9fdd0..ea1ecbaa34 100644 --- a/docs/source/developer_guide/contributing.md +++ b/docs/source/developer_guide/contributing.md @@ -191,6 +191,7 @@ git submodule update --init --recursive ```bash pip install -e ${MORPHEUS_ROOT}/python/morpheus pip install -e ${MORPHEUS_ROOT}/python/morpheus_llm + pip install -e ${MORPHEUS_ROOT}/python/morpheus_dfp ``` Once Morpheus has been built, it can be installed into the current virtual environment. 1. Test the build (Note: some tests will be skipped)\ diff --git a/docs/source/developer_guide/guides/6_digital_fingerprinting_reference.md b/docs/source/developer_guide/guides/6_digital_fingerprinting_reference.md index fd96515309..b9a2e3a786 100644 --- a/docs/source/developer_guide/guides/6_digital_fingerprinting_reference.md +++ b/docs/source/developer_guide/guides/6_digital_fingerprinting_reference.md @@ -209,7 +209,7 @@ For input files containing an ISO 8601 formatted date string the `iso_date_regex from functools import partial from morpheus.utils.file_utils import date_extractor -from dfp.utils.regex_utils import iso_date_regex +from morpheus_dfp.utils.regex_utils import iso_date_regex ``` ```python # Batch files into buckets by time. Use the default ISO date extractor from the filename diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_azure_pipeline.py b/examples/digital_fingerprinting/production/dfp_azure_pipeline.py similarity index 96% rename from examples/digital_fingerprinting/production/morpheus/dfp_azure_pipeline.py rename to examples/digital_fingerprinting/production/dfp_azure_pipeline.py index edee62d68b..d470217b83 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_azure_pipeline.py +++ b/examples/digital_fingerprinting/production/dfp_azure_pipeline.py @@ -24,17 +24,6 @@ import click import mlflow import pandas as pd -from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage -from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage -from dfp.stages.dfp_inference_stage import DFPInferenceStage -from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage -from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage -from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage -from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage -from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage -from dfp.stages.dfp_training import DFPTraining -from dfp.stages.multi_file_source import MultiFileSource -from dfp.utils.regex_utils import iso_date_regex from morpheus.cli.utils import get_log_levels from morpheus.cli.utils import get_package_relative_file @@ -58,6 +47,17 @@ from morpheus.utils.file_utils import date_extractor from morpheus.utils.file_utils import load_labels_file from morpheus.utils.logger import configure_logging +from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage +from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage +from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage +from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage +from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage +from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage +from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage +from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage +from morpheus_dfp.stages.dfp_training import DFPTraining +from morpheus_dfp.stages.multi_file_source import MultiFileSource +from morpheus_dfp.utils.regex_utils import iso_date_regex def _file_type_name_to_enum(file_type: str) -> FileTypes: diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_duo_pipeline.py b/examples/digital_fingerprinting/production/dfp_duo_pipeline.py similarity index 95% rename from examples/digital_fingerprinting/production/morpheus/dfp_duo_pipeline.py rename to examples/digital_fingerprinting/production/dfp_duo_pipeline.py index 10fc7592ad..2cd08bfb7b 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_duo_pipeline.py +++ b/examples/digital_fingerprinting/production/dfp_duo_pipeline.py @@ -24,17 +24,6 @@ import click import mlflow import pandas as pd -from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage -from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage -from dfp.stages.dfp_inference_stage import DFPInferenceStage -from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage -from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage -from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage -from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage -from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage -from dfp.stages.dfp_training import DFPTraining -from dfp.stages.multi_file_source import MultiFileSource -from dfp.utils.regex_utils import iso_date_regex from morpheus.cli.utils import get_log_levels from morpheus.cli.utils import get_package_relative_file @@ -59,6 +48,17 @@ from morpheus.utils.file_utils import date_extractor from morpheus.utils.file_utils import load_labels_file from morpheus.utils.logger import configure_logging +from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage +from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage +from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage +from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage +from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage +from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage +from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage +from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage +from morpheus_dfp.stages.dfp_training import DFPTraining +from morpheus_dfp.stages.multi_file_source import MultiFileSource +from morpheus_dfp.utils.regex_utils import iso_date_regex def _file_type_name_to_enum(file_type: str) -> FileTypes: diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py b/examples/digital_fingerprinting/production/dfp_integrated_training_batch_pipeline.py similarity index 97% rename from examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py rename to examples/digital_fingerprinting/production/dfp_integrated_training_batch_pipeline.py index fe3d99f187..7782961760 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py +++ b/examples/digital_fingerprinting/production/dfp_integrated_training_batch_pipeline.py @@ -17,23 +17,23 @@ from datetime import datetime import click -# When segment modules are imported, they're added to the module registry. -# To avoid flake8 warnings about unused code, the noqa flag is used during import -import dfp.modules # noqa: F401 # pylint:disable=unused-import -from dfp.utils.config_generator import ConfigGenerator -from dfp.utils.config_generator import generate_ae_config -from dfp.utils.dfp_arg_parser import DFPArgParser -from dfp.utils.schema_utils import Schema -from dfp.utils.schema_utils import SchemaBuilder import morpheus.loaders # noqa: F401 # pylint:disable=unused-import import morpheus.modules # noqa: F401 # pylint:disable=unused-import +# When segment modules are imported, they're added to the module registry. +# To avoid flake8 warnings about unused code, the noqa flag is used during import +import morpheus_dfp.modules # noqa: F401 # pylint:disable=unused-import from morpheus.cli.utils import get_log_levels from morpheus.cli.utils import parse_log_level from morpheus.config import Config from morpheus.pipeline.pipeline import Pipeline from morpheus.stages.general.multi_port_modules_stage import MultiPortModulesStage from morpheus.stages.input.control_message_file_source_stage import ControlMessageFileSourceStage +from morpheus_dfp.utils.config_generator import ConfigGenerator +from morpheus_dfp.utils.config_generator import generate_ae_config +from morpheus_dfp.utils.dfp_arg_parser import DFPArgParser +from morpheus_dfp.utils.schema_utils import Schema +from morpheus_dfp.utils.schema_utils import SchemaBuilder @click.command() diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py b/examples/digital_fingerprinting/production/dfp_integrated_training_streaming_pipeline.py similarity index 97% rename from examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py rename to examples/digital_fingerprinting/production/dfp_integrated_training_streaming_pipeline.py index 93df5c3e53..198bfa528d 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py +++ b/examples/digital_fingerprinting/production/dfp_integrated_training_streaming_pipeline.py @@ -17,23 +17,23 @@ from datetime import datetime import click -# When segment modules are imported, they're added to the module registry. -# To avoid flake8 warnings about unused code, the noqa flag is used during import. -import dfp.modules # noqa: F401 # pylint:disable=unused-import -from dfp.utils.config_generator import ConfigGenerator -from dfp.utils.config_generator import generate_ae_config -from dfp.utils.dfp_arg_parser import DFPArgParser -from dfp.utils.schema_utils import Schema -from dfp.utils.schema_utils import SchemaBuilder import morpheus.loaders # noqa: F401 # pylint:disable=unused-import import morpheus.modules # noqa: F401 # pylint:disable=unused-import +# When segment modules are imported, they're added to the module registry. +# To avoid flake8 warnings about unused code, the noqa flag is used during import. +import morpheus_dfp.modules # noqa: F401 # pylint:disable=unused-import from morpheus.cli.utils import get_log_levels from morpheus.cli.utils import parse_log_level from morpheus.config import Config from morpheus.pipeline.pipeline import Pipeline from morpheus.stages.general.multi_port_modules_stage import MultiPortModulesStage from morpheus.stages.input.control_message_kafka_source_stage import ControlMessageKafkaSourceStage +from morpheus_dfp.utils.config_generator import ConfigGenerator +from morpheus_dfp.utils.config_generator import generate_ae_config +from morpheus_dfp.utils.dfp_arg_parser import DFPArgParser +from morpheus_dfp.utils.schema_utils import Schema +from morpheus_dfp.utils.schema_utils import SchemaBuilder @click.command() diff --git a/examples/digital_fingerprinting/production/grafana/run.py b/examples/digital_fingerprinting/production/grafana/run.py index c64e156a4d..c62c0de1c6 100644 --- a/examples/digital_fingerprinting/production/grafana/run.py +++ b/examples/digital_fingerprinting/production/grafana/run.py @@ -26,17 +26,6 @@ import logging_loki import mlflow import pandas as pd -from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage -from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage -from dfp.stages.dfp_inference_stage import DFPInferenceStage -from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage -from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage -from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage -from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage -from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage -from dfp.stages.dfp_training import DFPTraining -from dfp.stages.multi_file_source import MultiFileSource -from dfp.utils.regex_utils import iso_date_regex from morpheus.cli.utils import get_log_levels from morpheus.cli.utils import get_package_relative_file @@ -60,6 +49,17 @@ from morpheus.utils.file_utils import date_extractor from morpheus.utils.file_utils import load_labels_file from morpheus.utils.logger import configure_logging +from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage +from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage +from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage +from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage +from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage +from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage +from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage +from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage +from morpheus_dfp.stages.dfp_training import DFPTraining +from morpheus_dfp.stages.multi_file_source import MultiFileSource +from morpheus_dfp.utils.regex_utils import iso_date_regex def _file_type_name_to_enum(file_type: str) -> FileTypes: diff --git a/examples/digital_fingerprinting/production/morpheus/benchmarks/benchmark_conf_generator.py b/examples/digital_fingerprinting/production/morpheus/benchmarks/benchmark_conf_generator.py index f455b3eea6..d8a1825b72 100644 --- a/examples/digital_fingerprinting/production/morpheus/benchmarks/benchmark_conf_generator.py +++ b/examples/digital_fingerprinting/production/morpheus/benchmarks/benchmark_conf_generator.py @@ -25,10 +25,11 @@ import mlflow import pandas as pd -from dfp.utils.config_generator import ConfigGenerator -from dfp.utils.config_generator import generate_ae_config -from dfp.utils.dfp_arg_parser import DFPArgParser -from dfp.utils.schema_utils import SchemaBuilder + +from morpheus_dfp.utils.config_generator import ConfigGenerator +from morpheus_dfp.utils.config_generator import generate_ae_config +from morpheus_dfp.utils.dfp_arg_parser import DFPArgParser +from morpheus_dfp.utils.schema_utils import SchemaBuilder logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py b/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py index 9583821290..051e3b7f25 100644 --- a/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py +++ b/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py @@ -20,23 +20,11 @@ import typing import boto3 -import dfp.modules # noqa: F401 # pylint:disable=unused-import import pytest -from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage -from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage -from dfp.stages.dfp_inference_stage import DFPInferenceStage -from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage -from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage -from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage -from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage -from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage -from dfp.stages.dfp_training import DFPTraining -from dfp.stages.multi_file_source import MultiFileSource -from dfp.utils.regex_utils import iso_date_regex -from dfp.utils.schema_utils import Schema import morpheus.loaders # noqa: F401 # pylint:disable=unused-import import morpheus.modules # noqa: F401 # pylint:disable=unused-import +import morpheus_dfp.modules # noqa: F401 # pylint:disable=unused-import from benchmarks.benchmark_conf_generator import BenchmarkConfGenerator from benchmarks.benchmark_conf_generator import load_json from benchmarks.benchmark_conf_generator import set_mlflow_tracking_uri @@ -53,6 +41,18 @@ from morpheus.utils.column_info import DataFrameInputSchema from morpheus.utils.file_utils import date_extractor from morpheus.utils.logger import configure_logging +from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage +from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage +from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage +from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage +from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage +from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage +from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage +from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage +from morpheus_dfp.stages.dfp_training import DFPTraining +from morpheus_dfp.stages.multi_file_source import MultiFileSource +from morpheus_dfp.utils.regex_utils import iso_date_regex +from morpheus_dfp.utils.schema_utils import Schema logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_inference.ipynb b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_inference.ipynb index 54b004d4d5..7047b9003c 100644 --- a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_inference.ipynb +++ b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_inference.ipynb @@ -47,15 +47,15 @@ "from datetime import datetime\n", "from functools import partial\n", "\n", - "from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage\n", - "from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage\n", - "from dfp.stages.dfp_inference_stage import DFPInferenceStage\n", - "from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage\n", - "from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage\n", - "from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage\n", - "from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage\n", - "from dfp.stages.multi_file_source import MultiFileSource\n", - "from dfp.utils.regex_utils import iso_date_regex\n", + "from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage\n", + "from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage\n", + "from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage\n", + "from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage\n", + "from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage\n", + "from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage\n", + "from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage\n", + "from morpheus_dfp.stages.multi_file_source import MultiFileSource\n", + "from morpheus_dfp.utils.regex_utils import iso_date_regex\n", "\n", "from morpheus.common import FileTypes\n", "from morpheus.common import FilterSource\n", diff --git a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_integrated_training.ipynb b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_integrated_training.ipynb index 1ea01f0a87..3377fb2158 100644 --- a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_integrated_training.ipynb +++ b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_integrated_training.ipynb @@ -45,12 +45,12 @@ "\n", "# When segment modules are imported, they're added to the module registry.\n", "# To avoid flake8 warnings about unused code, the noqa flag is used during import.\n", - "import dfp.modules # noqa: F401\n", - "from dfp.utils.config_generator import ConfigGenerator\n", - "from dfp.utils.config_generator import generate_ae_config\n", - "from dfp.utils.dfp_arg_parser import DFPArgParser\n", - "from dfp.utils.schema_utils import Schema\n", - "from dfp.utils.schema_utils import SchemaBuilder\n", + "import morpheus_dfp.modules # noqa: F401\n", + "from morpheus_dfp.utils.config_generator import ConfigGenerator\n", + "from morpheus_dfp.utils.config_generator import generate_ae_config\n", + "from morpheus_dfp.utils.dfp_arg_parser import DFPArgParser\n", + "from morpheus_dfp.utils.schema_utils import Schema\n", + "from morpheus_dfp.utils.schema_utils import SchemaBuilder\n", "\n", "import morpheus.loaders # noqa: F401\n", "import morpheus.modules # noqa: F401\n", diff --git a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_training.ipynb b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_training.ipynb index 0e471d9361..a30d892b5e 100644 --- a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_training.ipynb +++ b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_training.ipynb @@ -45,15 +45,15 @@ "import typing\n", "from datetime import datetime\n", "\n", - "from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage\n", - "from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage\n", - "from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage\n", - "from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage\n", - "from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage\n", - "from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage\n", - "from dfp.stages.dfp_training import DFPTraining\n", - "from dfp.stages.multi_file_source import MultiFileSource\n", - "from dfp.utils.regex_utils import iso_date_regex\n", + "from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage\n", + "from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage\n", + "from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage\n", + "from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage\n", + "from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage\n", + "from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage\n", + "from morpheus_dfp.stages.dfp_training import DFPTraining\n", + "from morpheus_dfp.stages.multi_file_source import MultiFileSource\n", + "from morpheus_dfp.utils.regex_utils import iso_date_regex\n", "\n", "from morpheus.common import FileTypes\n", "from morpheus.cli.utils import get_log_levels\n", diff --git a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_inference.ipynb b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_inference.ipynb index fe6982dcd9..c407b5caef 100644 --- a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_inference.ipynb +++ b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_inference.ipynb @@ -45,15 +45,15 @@ "import typing\n", "from datetime import datetime\n", "\n", - "from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage\n", - "from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage\n", - "from dfp.stages.dfp_inference_stage import DFPInferenceStage\n", - "from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage\n", - "from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage\n", - "from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage\n", - "from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage\n", - "from dfp.stages.multi_file_source import MultiFileSource\n", - "from dfp.utils.regex_utils import iso_date_regex\n", + "from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage\n", + "from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage\n", + "from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage\n", + "from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage\n", + "from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage\n", + "from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage\n", + "from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage\n", + "from morpheus_dfp.stages.multi_file_source import MultiFileSource\n", + "from morpheus_dfp.utils.regex_utils import iso_date_regex\n", "\n", "from morpheus.common import FileTypes\n", "from morpheus.common import FilterSource\n", diff --git a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_integrated_training.ipynb b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_integrated_training.ipynb index 64b2b92ef5..60fbd83b5b 100644 --- a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_integrated_training.ipynb +++ b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_integrated_training.ipynb @@ -45,14 +45,14 @@ "\n", "# When segment modules are imported, they're added to the module registry.\n", "# To avoid flake8 warnings about unused code, the noqa flag is used during import.\n", - "import dfp.modules # noqa: F401\n", + "import morpheus_dfp.modules # noqa: F401\n", "from morpheus import modules # noqa: F401\n", "from morpheus import loaders # noqa: F401\n", - "from dfp.utils.config_generator import ConfigGenerator\n", - "from dfp.utils.config_generator import generate_ae_config\n", - "from dfp.utils.dfp_arg_parser import DFPArgParser\n", - "from dfp.utils.schema_utils import Schema\n", - "from dfp.utils.schema_utils import SchemaBuilder\n", + "from morpheus_dfp.utils.config_generator import ConfigGenerator\n", + "from morpheus_dfp.utils.config_generator import generate_ae_config\n", + "from morpheus_dfp.utils.dfp_arg_parser import DFPArgParser\n", + "from morpheus_dfp.utils.schema_utils import Schema\n", + "from morpheus_dfp.utils.schema_utils import SchemaBuilder\n", "\n", "import morpheus.loaders # noqa: F401\n", "import morpheus.modules # noqa: F401\n", diff --git a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_training.ipynb b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_training.ipynb index cacb071361..35a4fa02d5 100644 --- a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_training.ipynb +++ b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_training.ipynb @@ -45,15 +45,15 @@ "import typing\n", "from datetime import datetime\n", "\n", - "from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage\n", - "from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage\n", - "from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage\n", - "from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage\n", - "from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage\n", - "from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage\n", - "from dfp.stages.dfp_training import DFPTraining\n", - "from dfp.stages.multi_file_source import MultiFileSource\n", - "from dfp.utils.regex_utils import iso_date_regex\n", + "from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage\n", + "from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage\n", + "from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage\n", + "from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage\n", + "from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage\n", + "from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage\n", + "from morpheus_dfp.stages.dfp_training import DFPTraining\n", + "from morpheus_dfp.stages.multi_file_source import MultiFileSource\n", + "from morpheus_dfp.utils.regex_utils import iso_date_regex\n", "\n", "from morpheus.common import FileTypes\n", "from morpheus.cli.utils import get_log_levels\n", diff --git a/examples/digital_fingerprinting/visualization/dfp_viz_azure_pipeline.py b/examples/digital_fingerprinting/visualization/dfp_viz_azure_pipeline.py index eb8e863754..0143dcc6c6 100644 --- a/examples/digital_fingerprinting/visualization/dfp_viz_azure_pipeline.py +++ b/examples/digital_fingerprinting/visualization/dfp_viz_azure_pipeline.py @@ -23,17 +23,6 @@ import click import mlflow import pandas as pd -from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage -from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage -from dfp.stages.dfp_inference_stage import DFPInferenceStage -from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage -from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage -from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage -from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage -from dfp.stages.dfp_training import DFPTraining -from dfp.stages.dfp_viz_postproc import DFPVizPostprocStage -from dfp.stages.multi_file_source import MultiFileSource -from dfp.utils.regex_utils import iso_date_regex from morpheus.cli.utils import get_log_levels from morpheus.cli.utils import get_package_relative_file @@ -53,6 +42,17 @@ from morpheus.utils.file_utils import date_extractor from morpheus.utils.file_utils import load_labels_file from morpheus.utils.logger import configure_logging +from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage +from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage +from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage +from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage +from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage +from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage +from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage +from morpheus_dfp.stages.dfp_training import DFPTraining +from morpheus_dfp.stages.dfp_viz_postproc import DFPVizPostprocStage +from morpheus_dfp.stages.multi_file_source import MultiFileSource +from morpheus_dfp.utils.regex_utils import iso_date_regex @click.command() diff --git a/examples/digital_fingerprinting/visualization/dfp_viz_duo_pipeline.py b/examples/digital_fingerprinting/visualization/dfp_viz_duo_pipeline.py index 325db0f540..475e34e245 100644 --- a/examples/digital_fingerprinting/visualization/dfp_viz_duo_pipeline.py +++ b/examples/digital_fingerprinting/visualization/dfp_viz_duo_pipeline.py @@ -23,17 +23,6 @@ import click import mlflow import pandas as pd -from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage -from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage -from dfp.stages.dfp_inference_stage import DFPInferenceStage -from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage -from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage -from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage -from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage -from dfp.stages.dfp_training import DFPTraining -from dfp.stages.dfp_viz_postproc import DFPVizPostprocStage -from dfp.stages.multi_file_source import MultiFileSource -from dfp.utils.regex_utils import iso_date_regex from morpheus.cli.utils import get_log_levels from morpheus.cli.utils import get_package_relative_file @@ -54,6 +43,17 @@ from morpheus.utils.file_utils import date_extractor from morpheus.utils.file_utils import load_labels_file from morpheus.utils.logger import configure_logging +from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage +from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage +from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage +from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage +from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage +from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage +from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage +from morpheus_dfp.stages.dfp_training import DFPTraining +from morpheus_dfp.stages.dfp_viz_postproc import DFPVizPostprocStage +from morpheus_dfp.stages.multi_file_source import MultiFileSource +from morpheus_dfp.utils.regex_utils import iso_date_regex @click.command() diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index b41fabadf0..1a90518cbe 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -15,10 +15,16 @@ list(APPEND CMAKE_MESSAGE_CONTEXT "python") -add_subdirectory(morpheus) +if (MORPHEUS_BUILD_MORPHEUS_CORE) + add_subdirectory(morpheus) +endif() if (MORPHEUS_BUILD_MORPHEUS_LLM) -add_subdirectory(morpheus_llm) + add_subdirectory(morpheus_llm) +endif() + +if (MORPHEUS_BUILD_MORPHEUS_DFP) + add_subdirectory(morpheus_dfp) endif() list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/python/morpheus/morpheus/stages/input/http_server_source_stage.py b/python/morpheus/morpheus/stages/input/http_server_source_stage.py index 4691b4578c..78fa889841 100644 --- a/python/morpheus/morpheus/stages/input/http_server_source_stage.py +++ b/python/morpheus/morpheus/stages/input/http_server_source_stage.py @@ -254,7 +254,6 @@ def _generate_frames(self, subscription: mrc.Subscription) -> typing.Iterator[Me or not subscription.is_subscribed()): self._processing = False else: - logger.debug("Queue empty, sleeping ...") time.sleep(self._sleep_time) except Closed: logger.error("Queue closed unexpectedly, shutting down") diff --git a/python/morpheus_dfp/CMakeLists.txt b/python/morpheus_dfp/CMakeLists.txt new file mode 100644 index 0000000000..fbb1afe6ad --- /dev/null +++ b/python/morpheus_dfp/CMakeLists.txt @@ -0,0 +1,46 @@ +# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +list(APPEND CMAKE_MESSAGE_CONTEXT "morpheus_dfp") + +# Save the root of the python for relative paths +set(MORPHEUS_DFP_PY_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..) + + +# ##################################### +# #### Morpheus DFP Python Setup ###### +# ##################################### +morpheus_utils_python_configure() + +morpheus_utils_create_python_package(morpheus_dfp + PROJECT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + SOURCE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/morpheus_dfp" +) + +if(MORPHEUS_PYTHON_INPLACE_BUILD) + list(APPEND extra_args "IS_INPLACE") +endif() + +if(MORPHEUS_PYTHON_BUILD_WHEEL) + list(APPEND extra_args "BUILD_WHEEL") +endif() + +if(MORPHEUS_PYTHON_PERFORM_INSTALL) + list(APPEND extra_args "INSTALL_WHEEL") +endif() + +morpheus_utils_build_python_package(morpheus_dfp ${extra_args}) + +list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/python/morpheus_dfp/MANIFEST.in b/python/morpheus_dfp/MANIFEST.in new file mode 100644 index 0000000000..807dc3f1b0 --- /dev/null +++ b/python/morpheus_dfp/MANIFEST.in @@ -0,0 +1,2 @@ +include morpheus_dfp/_version.py +recursive-include morpheus_dfp py.typed diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/__init__.py b/python/morpheus_dfp/morpheus_dfp/__init__.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/__init__.py rename to python/morpheus_dfp/morpheus_dfp/__init__.py diff --git a/python/morpheus_dfp/morpheus_dfp/_version.py b/python/morpheus_dfp/morpheus_dfp/_version.py new file mode 100644 index 0000000000..af416901c7 --- /dev/null +++ b/python/morpheus_dfp/morpheus_dfp/_version.py @@ -0,0 +1,685 @@ +# SPDX-FileCopyrightText: Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. Generated by +# versioneer-0.22 (https://github.com/python-versioneer/python-versioneer) +"""Git implementation of _version.py.""" + +import errno +import functools +import os +import re +import subprocess +import sys +from typing import Callable +from typing import Dict + + +def get_keywords(): + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "$Format:%d$" + git_full = "$Format:%H$" + git_date = "$Format:%ci$" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_config(): + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "pep440" + cfg.tag_prefix = "v" + cfg.parentdir_prefix = "morpheus-" + cfg.versionfile_source = "morpheus/_version.py" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} + + +def register_vcs_handler(vcs, method): # decorator + """Create decorator to mark a method as the handler of a VCS.""" + + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): + """Call the given command(s).""" + assert isinstance(commands, list) + process = None + + popen_kwargs = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: + try: + dispcmd = str([command] + args) + # remember shell=False, so use git.cmd on windows, not just git + process = subprocess.Popen([command] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + **popen_kwargs) + break + except OSError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands, )) + return None, None + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, process.returncode + return stdout, process.returncode + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for _ in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return { + "version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None + } + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print("Tried directories %s but none started with prefix %s" % (str(rootdirs), parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") + date = keywords.get("date") + if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = {r.strip() for r in refnames.strip("()").split(",")} + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = {r for r in refs if re.search(r'\d', r)} + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r'\d', r): + continue + if verbose: + print("picking %s" % r) + return { + "version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date + } + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None + } + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + MATCH_ARGS = ["--match", "%s*" % tag_prefix] if tag_prefix else [] + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty", "--always", "--long", *MATCH_ARGS], cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparsable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_branch(pieces): + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). + + Exceptions: + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver): + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces): + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: + if pieces["distance"]: + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%d" % (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] + else: + # exception #1 + rendered = "0.post0.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_post_branch(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None + } + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date") + } + + +def get_versions(): + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for _ in cfg.versionfile_source.split('/'): + root = os.path.dirname(root) + except NameError: + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None + } + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None + } diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/__init__.py b/python/morpheus_dfp/morpheus_dfp/modules/__init__.py similarity index 68% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/__init__.py rename to python/morpheus_dfp/morpheus_dfp/modules/__init__.py index f274245601..cf1e5741ba 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/__init__.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/__init__.py @@ -17,16 +17,16 @@ # When segment modules are imported, they're added to the module registry. # To avoid flake8 warnings about unused code, the noqa flag is used during import. -from dfp.modules import dfp_data_prep -from dfp.modules import dfp_deployment -from dfp.modules import dfp_inference -from dfp.modules import dfp_inference_pipe -from dfp.modules import dfp_postprocessing -from dfp.modules import dfp_preproc -from dfp.modules import dfp_rolling_window -from dfp.modules import dfp_split_users -from dfp.modules import dfp_training -from dfp.modules import dfp_training_pipe +from morpheus_dfp.modules import dfp_data_prep +from morpheus_dfp.modules import dfp_deployment +from morpheus_dfp.modules import dfp_inference +from morpheus_dfp.modules import dfp_inference_pipe +from morpheus_dfp.modules import dfp_postprocessing +from morpheus_dfp.modules import dfp_preproc +from morpheus_dfp.modules import dfp_rolling_window +from morpheus_dfp.modules import dfp_split_users +from morpheus_dfp.modules import dfp_training +from morpheus_dfp.modules import dfp_training_pipe __all__ = [ "dfp_split_users", diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_data_prep.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_data_prep.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_data_prep.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_data_prep.py index 0a91a21b2e..dfaad30e87 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_data_prep.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_data_prep.py @@ -24,8 +24,7 @@ from morpheus.utils.column_info import process_dataframe from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_DATA_PREP +from morpheus_dfp.utils.module_ids import DFP_DATA_PREP logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_deployment.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_deployment.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_deployment.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_deployment.py index e4744c32b6..05611fbca0 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_deployment.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_deployment.py @@ -22,10 +22,9 @@ from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import merge_dictionaries from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_DEPLOYMENT -from ..utils.module_ids import DFP_INFERENCE_PIPE -from ..utils.module_ids import DFP_TRAINING_PIPE +from morpheus_dfp.utils.module_ids import DFP_DEPLOYMENT +from morpheus_dfp.utils.module_ids import DFP_INFERENCE_PIPE +from morpheus_dfp.utils.module_ids import DFP_TRAINING_PIPE logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_inference.py similarity index 97% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_inference.py index 4ad65d501c..c710d09f9f 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_inference.py @@ -16,8 +16,6 @@ import time import mrc -from dfp.utils.model_cache import ModelCache -from dfp.utils.model_cache import ModelManager from mlflow.tracking.client import MlflowClient from mrc.core import operators as ops @@ -27,8 +25,9 @@ from morpheus.messages import MessageMeta from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_INFERENCE +from morpheus_dfp.utils.model_cache import ModelCache +from morpheus_dfp.utils.model_cache import ModelManager +from morpheus_dfp.utils.module_ids import DFP_INFERENCE logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference_pipe.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_inference_pipe.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference_pipe.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_inference_pipe.py index 24f72c278a..26c36ca763 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference_pipe.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_inference_pipe.py @@ -23,13 +23,12 @@ from morpheus.utils.module_ids import WRITE_TO_FILE from morpheus.utils.module_utils import merge_dictionaries from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_DATA_PREP -from ..utils.module_ids import DFP_INFERENCE -from ..utils.module_ids import DFP_INFERENCE_PIPE -from ..utils.module_ids import DFP_POST_PROCESSING -from ..utils.module_ids import DFP_PREPROC -from ..utils.module_ids import DFP_ROLLING_WINDOW +from morpheus_dfp.utils.module_ids import DFP_DATA_PREP +from morpheus_dfp.utils.module_ids import DFP_INFERENCE +from morpheus_dfp.utils.module_ids import DFP_INFERENCE_PIPE +from morpheus_dfp.utils.module_ids import DFP_POST_PROCESSING +from morpheus_dfp.utils.module_ids import DFP_PREPROC +from morpheus_dfp.utils.module_ids import DFP_ROLLING_WINDOW logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_postprocessing.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_postprocessing.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_postprocessing.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_postprocessing.py index 908c0d61c5..3375fc40af 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_postprocessing.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_postprocessing.py @@ -22,8 +22,7 @@ from morpheus.messages import ControlMessage from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_POST_PROCESSING +from morpheus_dfp.utils.module_ids import DFP_POST_PROCESSING logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_preproc.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_preproc.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_preproc.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_preproc.py index 4dd89334dc..54f934495f 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_preproc.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_preproc.py @@ -24,9 +24,8 @@ from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import merge_dictionaries from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_PREPROC -from ..utils.module_ids import DFP_SPLIT_USERS +from morpheus_dfp.utils.module_ids import DFP_PREPROC +from morpheus_dfp.utils.module_ids import DFP_SPLIT_USERS logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_rolling_window.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_rolling_window.py similarity index 97% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_rolling_window.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_rolling_window.py index bfdbe13e2c..ea9b1fede8 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_rolling_window.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_rolling_window.py @@ -19,8 +19,6 @@ import mrc import pandas as pd -from dfp.utils.cached_user_window import CachedUserWindow -from dfp.utils.logging_timer import log_time from mrc.core import operators as ops import cudf @@ -29,8 +27,9 @@ from morpheus.messages import MessageMeta from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_ROLLING_WINDOW +from morpheus_dfp.utils.cached_user_window import CachedUserWindow +from morpheus_dfp.utils.logging_timer import log_time +from morpheus_dfp.utils.module_ids import DFP_ROLLING_WINDOW logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_split_users.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_split_users.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_split_users.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_split_users.py index a5757ac407..7ec8c7f0f4 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_split_users.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_split_users.py @@ -17,7 +17,6 @@ import mrc import pandas as pd -from dfp.utils.logging_timer import log_time from mrc.core import operators as ops import cudf @@ -26,8 +25,8 @@ from morpheus.messages import MessageMeta from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_SPLIT_USERS +from morpheus_dfp.utils.logging_timer import log_time +from morpheus_dfp.utils.module_ids import DFP_SPLIT_USERS logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_training.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_training.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_training.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_training.py index aae735e413..6bc41d1d09 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_training.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_training.py @@ -25,8 +25,7 @@ from morpheus.models.dfencoder import AutoEncoder from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_TRAINING +from morpheus_dfp.utils.module_ids import DFP_TRAINING logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_training_pipe.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_training_pipe.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_training_pipe.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_training_pipe.py index 9fcbd946af..cfa4c49fdc 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_training_pipe.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_training_pipe.py @@ -21,12 +21,11 @@ from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import merge_dictionaries from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_DATA_PREP -from ..utils.module_ids import DFP_PREPROC -from ..utils.module_ids import DFP_ROLLING_WINDOW -from ..utils.module_ids import DFP_TRAINING -from ..utils.module_ids import DFP_TRAINING_PIPE +from morpheus_dfp.utils.module_ids import DFP_DATA_PREP +from morpheus_dfp.utils.module_ids import DFP_PREPROC +from morpheus_dfp.utils.module_ids import DFP_ROLLING_WINDOW +from morpheus_dfp.utils.module_ids import DFP_TRAINING +from morpheus_dfp.utils.module_ids import DFP_TRAINING_PIPE logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/__init__.py b/python/morpheus_dfp/morpheus_dfp/py.typed similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/__init__.py rename to python/morpheus_dfp/morpheus_dfp/py.typed diff --git a/python/morpheus_dfp/morpheus_dfp/stages/__init__.py b/python/morpheus_dfp/morpheus_dfp/stages/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_batcher_stage.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_file_batcher_stage.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_batcher_stage.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_file_batcher_stage.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_to_df.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_file_to_df.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_to_df.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_file_to_df.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_inference_stage.py similarity index 97% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_inference_stage.py index 7d37c9514d..0324a5e67f 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py +++ b/python/morpheus_dfp/morpheus_dfp/stages/dfp_inference_stage.py @@ -25,9 +25,8 @@ from morpheus.messages import ControlMessage from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.stage_schema import StageSchema - -from ..utils.model_cache import ModelCache -from ..utils.model_cache import ModelManager +from morpheus_dfp.utils.model_cache import ModelCache +from morpheus_dfp.utils.model_cache import ModelManager logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_mlflow_model_writer.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_mlflow_model_writer.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_mlflow_model_writer.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_mlflow_model_writer.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_postprocessing_stage.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_postprocessing_stage.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_postprocessing_stage.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_postprocessing_stage.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_preprocessing_stage.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_preprocessing_stage.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_preprocessing_stage.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_preprocessing_stage.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_rolling_window_stage.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_rolling_window_stage.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_rolling_window_stage.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_rolling_window_stage.py index 9acc1ea151..7ef67ec88c 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_rolling_window_stage.py +++ b/python/morpheus_dfp/morpheus_dfp/stages/dfp_rolling_window_stage.py @@ -29,9 +29,8 @@ from morpheus.messages import MessageMeta from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.stage_schema import StageSchema - -from ..utils.cached_user_window import CachedUserWindow -from ..utils.logging_timer import log_time +from morpheus_dfp.utils.cached_user_window import CachedUserWindow +from morpheus_dfp.utils.logging_timer import log_time logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_split_users_stage.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_split_users_stage.py similarity index 99% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_split_users_stage.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_split_users_stage.py index cd687e0a2c..e88b3dfc49 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_split_users_stage.py +++ b/python/morpheus_dfp/morpheus_dfp/stages/dfp_split_users_stage.py @@ -28,8 +28,7 @@ from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.stage_schema import StageSchema from morpheus.utils.type_aliases import DataFrameType - -from ..utils.logging_timer import log_time +from morpheus_dfp.utils.logging_timer import log_time logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_training.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_training.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_training.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_training.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_viz_postproc.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_viz_postproc.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_viz_postproc.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_viz_postproc.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/multi_file_source.py b/python/morpheus_dfp/morpheus_dfp/stages/multi_file_source.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/multi_file_source.py rename to python/morpheus_dfp/morpheus_dfp/stages/multi_file_source.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/write_to_s3_stage.py b/python/morpheus_dfp/morpheus_dfp/stages/write_to_s3_stage.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/write_to_s3_stage.py rename to python/morpheus_dfp/morpheus_dfp/stages/write_to_s3_stage.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/__init__.py b/python/morpheus_dfp/morpheus_dfp/utils/__init__.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/__init__.py rename to python/morpheus_dfp/morpheus_dfp/utils/__init__.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/cached_user_window.py b/python/morpheus_dfp/morpheus_dfp/utils/cached_user_window.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/cached_user_window.py rename to python/morpheus_dfp/morpheus_dfp/utils/cached_user_window.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/config_generator.py b/python/morpheus_dfp/morpheus_dfp/utils/config_generator.py similarity index 96% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/config_generator.py rename to python/morpheus_dfp/morpheus_dfp/utils/config_generator.py index 2da7c6cdc0..036e2c90eb 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/utils/config_generator.py +++ b/python/morpheus_dfp/morpheus_dfp/utils/config_generator.py @@ -14,18 +14,17 @@ import os -from dfp.utils.dfp_arg_parser import DFPArgParser -from dfp.utils.dfp_arg_parser import pyobj2str -from dfp.utils.module_ids import DFP_DEPLOYMENT -from dfp.utils.regex_utils import iso_date_regex_pattern -from dfp.utils.schema_utils import Schema - from morpheus.cli.utils import get_package_relative_file from morpheus.config import Config from morpheus.config import ConfigAutoEncoder from morpheus.messages import ControlMessage from morpheus.utils.file_utils import load_labels_file from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE +from morpheus_dfp.utils.dfp_arg_parser import DFPArgParser +from morpheus_dfp.utils.dfp_arg_parser import pyobj2str +from morpheus_dfp.utils.module_ids import DFP_DEPLOYMENT +from morpheus_dfp.utils.regex_utils import iso_date_regex_pattern +from morpheus_dfp.utils.schema_utils import Schema class ConfigGenerator: diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/dfp_arg_parser.py b/python/morpheus_dfp/morpheus_dfp/utils/dfp_arg_parser.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/dfp_arg_parser.py rename to python/morpheus_dfp/morpheus_dfp/utils/dfp_arg_parser.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/logging_timer.py b/python/morpheus_dfp/morpheus_dfp/utils/logging_timer.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/logging_timer.py rename to python/morpheus_dfp/morpheus_dfp/utils/logging_timer.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/model_cache.py b/python/morpheus_dfp/morpheus_dfp/utils/model_cache.py similarity index 99% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/model_cache.py rename to python/morpheus_dfp/morpheus_dfp/utils/model_cache.py index 54b7c57d11..e3150725d5 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/utils/model_cache.py +++ b/python/morpheus_dfp/morpheus_dfp/utils/model_cache.py @@ -26,8 +26,7 @@ from mlflow.tracking.client import MlflowClient from morpheus.models.dfencoder import AutoEncoder - -from .logging_timer import log_time +from morpheus_dfp.utils.logging_timer import log_time logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/module_ids.py b/python/morpheus_dfp/morpheus_dfp/utils/module_ids.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/module_ids.py rename to python/morpheus_dfp/morpheus_dfp/utils/module_ids.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/regex_utils.py b/python/morpheus_dfp/morpheus_dfp/utils/regex_utils.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/regex_utils.py rename to python/morpheus_dfp/morpheus_dfp/utils/regex_utils.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/schema_utils.py b/python/morpheus_dfp/morpheus_dfp/utils/schema_utils.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/schema_utils.py rename to python/morpheus_dfp/morpheus_dfp/utils/schema_utils.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/user_model_manager.py b/python/morpheus_dfp/morpheus_dfp/utils/user_model_manager.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/user_model_manager.py rename to python/morpheus_dfp/morpheus_dfp/utils/user_model_manager.py diff --git a/python/morpheus_dfp/setup.cfg b/python/morpheus_dfp/setup.cfg new file mode 100644 index 0000000000..ade54b665b --- /dev/null +++ b/python/morpheus_dfp/setup.cfg @@ -0,0 +1,26 @@ +# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# See the docstring in versioneer.py for instructions. Note that you must +# re-run 'versioneer.py setup' after changing this section, and commit the +# resulting files. + +# ===== versioneer Config ===== +[versioneer] +VCS = git +style = pep440 +versionfile_source = morpheus_dfp/_version.py +versionfile_build = morpheus_dfp/_version.py +tag_prefix = v +parentdir_prefix = morpheus_dfp- + diff --git a/python/morpheus_dfp/setup.py b/python/morpheus_dfp/setup.py new file mode 100644 index 0000000000..37e5aecbfb --- /dev/null +++ b/python/morpheus_dfp/setup.py @@ -0,0 +1,45 @@ +# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# See the docstring in versioneer.py for instructions. Note that you must +# re-run 'versioneer setup' after changing this section, and commit the +# resulting files. + +import versioneer +from setuptools import find_packages # noqa: E402 +from setuptools import setup # noqa: E402 + +setup( + name="morpheus_dfp", + version=versioneer.get_version(), # pylint: disable=no-member + description="Morpheus Digital Finger Printing", + classifiers=[ + "Development Status :: 3 - Alpha", + "Environment :: GPU :: NVIDIA CUDA", + "Intended Audience :: Developers", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Security", + "Topic :: System :: Networking :: Monitoring", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3 :: Only", + ], + author="NVIDIA Corporation", + include_package_data=True, + packages=find_packages(), + install_requires=[], + license="Apache", + python_requires='>=3.10, <4', + cmdclass=versioneer.get_cmdclass(), # pylint: disable=no-member +) diff --git a/python/morpheus_dfp/todo.md b/python/morpheus_dfp/todo.md new file mode 100644 index 0000000000..08f15bbf70 --- /dev/null +++ b/python/morpheus_dfp/todo.md @@ -0,0 +1,55 @@ + + +# Tasks +- [X] Refactor Morpheus production stages + +- [X] Add morpheus_dfp Conda recipe + +- [X] Update CI to build and upload morpheus_dfp Conda package + +- [] Update docs to include the DFP apis and README for using the DFP library + +- [] Move DFP unit tests from tests/examples/digital_fingerprinting to tests/morpheus_dfp + +- [] Refactor DFP benchmarks + +- [] Update DFP example docker file to install the morpheus_dfp package instead of using the Morpheus image as base + +- [] Consolidate version file used in setup.py across all Morpheus packages + + +# Q&A for future reference +1. Do we refactor sample pipelines to python/morpheus_dfp/morpheus_dfp/pipeline? + No. They are not part of the library. They are just examples. + +2. Do we refactor data (just the script for pulling the data, fetch_example_data.py) used for running the sample DFP pipelines? + No. Same as above. + +3. Do we refactor Morpheus DFP starter example? + No. Starter will be dropped, #1715 + +4. Visualizations? + No. Sample pipeline. + +5. Demo? + No. Sample pipeline. + +6. Refactor notebooks? + No. Sample only. + +7. Refactor DFP example control messages? + No. diff --git a/python/morpheus_llm/CMakeLists.txt b/python/morpheus_llm/CMakeLists.txt index 2383ee8ec3..d2093e759c 100644 --- a/python/morpheus_llm/CMakeLists.txt +++ b/python/morpheus_llm/CMakeLists.txt @@ -16,7 +16,6 @@ list(APPEND CMAKE_MESSAGE_CONTEXT "morpheus_llm") # Save the root of the python for relative paths -# Where is this used, fixme set(MORPHEUS_LLM_PY_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..) @@ -25,8 +24,7 @@ set(MORPHEUS_LLM_PY_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..) # ################################# morpheus_utils_python_configure() -# Include the main morpheus code -# fixme: add another subdirectory src and change SOURCE_DIRECTORY to ${CMAKE_CURRENT_SOURCE_DIR}/src +# Include the main morpheus_llm code morpheus_utils_create_python_package(morpheus_llm PROJECT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" SOURCE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/morpheus_llm" @@ -34,7 +32,7 @@ morpheus_utils_create_python_package(morpheus_llm add_subdirectory(morpheus_llm) -# Complete the python package - fixme do these need to be redefined +# Complete the python package if(MORPHEUS_PYTHON_INPLACE_BUILD) list(APPEND extra_args "IS_INPLACE") endif() diff --git a/python/morpheus_llm/morpheus_llm/_lib/cmake/libmorpheus_llm.cmake b/python/morpheus_llm/morpheus_llm/_lib/cmake/libmorpheus_llm.cmake index d4377c64db..83ba243398 100644 --- a/python/morpheus_llm/morpheus_llm/_lib/cmake/libmorpheus_llm.cmake +++ b/python/morpheus_llm/morpheus_llm/_lib/cmake/libmorpheus_llm.cmake @@ -27,42 +27,32 @@ add_library(morpheus_llm add_library(${PROJECT_NAME}::morpheus_llm ALIAS morpheus_llm) -# fixme check if all these are needed +# morpheus_llm can be built two ways - +# 1. For development purposes (eg. scripts/compile.sh) all the functional blocks are built. +# This includes morpheus (core), morpheus_llm, morpheus_dfp etc. In this case we +# set dependencies on build targets across components. +# 2. For conda packaging purposes morpheus_llm is built on its own. In this case +# the dependencies (including morpheus-core) are loaded from the conda enviroment. +if (MORPHEUS_BUILD_MORPHEUS_CORE) + # Add a dependency on the morpheus cpython libraries + get_property(py_morpheus_target GLOBAL PROPERTY py_morpheus_target_property) + add_dependencies(morpheus_llm ${py_morpheus_target}) +else() + rapids_find_package(morpheus REQUIRED) +endif() + target_link_libraries(morpheus_llm PRIVATE - matx::matx $<$:ZLIB::ZLIB> PUBLIC $ cudf::cudf - CUDA::nvtx3 mrc::pymrc ${PROJECT_NAME}::morpheus - ) -# Add the include directories of the cudf_helpers_project since we dont want to link directly to it -get_property(cudf_helpers_target GLOBAL PROPERTY cudf_helpers_target_property) -get_target_property(cudf_helpers_include ${cudf_helpers_target} INTERFACE_INCLUDE_DIRECTORIES) - -target_include_directories(morpheus - PRIVATE - ${cudf_helpers_include} -) - -# Also add a dependency to the target so that the headers are generated before the target is built -add_dependencies(morpheus_llm ${cudf_helpers_target}) -message("add_dependencies morpheus_llm ${cudf_helpers_target}") - -# Add a dependency on the morpheus cpython libraries -get_property(py_morpheus_target GLOBAL PROPERTY py_morpheus_target_property) -add_dependencies(morpheus_llm ${py_morpheus_target}) -message("add_dependencies morpheus_llm ${py_morpheus_target}") - -# fixme: find another way to include morpheus headers target_include_directories(morpheus_llm PUBLIC - $ $ $ ) diff --git a/python/morpheus_llm/morpheus_llm/error.py b/python/morpheus_llm/morpheus_llm/error.py new file mode 100644 index 0000000000..2505d987dd --- /dev/null +++ b/python/morpheus_llm/morpheus_llm/error.py @@ -0,0 +1,18 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IMPORT_ERROR_MESSAGE = ( + "{package} not found. Install it and other additional dependencies by running the following command:\n" + "`conda env update --solver=libmamba -n morpheus " + "--file conda/environments/examples_cuda-121_arch-x86_64.yaml`") diff --git a/python/morpheus_llm/morpheus_llm/llm/nodes/langchain_agent_node.py b/python/morpheus_llm/morpheus_llm/llm/nodes/langchain_agent_node.py index e63b1d351c..0e96c600fd 100644 --- a/python/morpheus_llm/morpheus_llm/llm/nodes/langchain_agent_node.py +++ b/python/morpheus_llm/morpheus_llm/llm/nodes/langchain_agent_node.py @@ -16,13 +16,19 @@ import logging import typing -from langchain_core.exceptions import OutputParserException - +from morpheus_llm.error import IMPORT_ERROR_MESSAGE from morpheus_llm.llm import LLMContext from morpheus_llm.llm import LLMNodeBase logger = logging.getLogger(__name__) +IMPORT_EXCEPTION = None + +try: + from langchain_core.exceptions import OutputParserException +except ImportError as import_exc: + IMPORT_EXCEPTION = import_exc + if typing.TYPE_CHECKING: from langchain.agents import AgentExecutor @@ -47,6 +53,9 @@ def __init__(self, agent_executor: "AgentExecutor", replace_exceptions: bool = False, replace_exceptions_value: typing.Optional[str] = None): + if IMPORT_EXCEPTION is not None: + raise ImportError(IMPORT_ERROR_MESSAGE.format('langchain_core')) from IMPORT_EXCEPTION + super().__init__() self._agent_executor = agent_executor diff --git a/python/morpheus_llm/morpheus_llm/llm/services/nemo_llm_service.py b/python/morpheus_llm/morpheus_llm/llm/services/nemo_llm_service.py index ef80814929..30cda8e02c 100644 --- a/python/morpheus_llm/morpheus_llm/llm/services/nemo_llm_service.py +++ b/python/morpheus_llm/morpheus_llm/llm/services/nemo_llm_service.py @@ -18,16 +18,13 @@ import warnings from morpheus.utils.env_config_value import EnvConfigValue +from morpheus_llm.error import IMPORT_ERROR_MESSAGE from morpheus_llm.llm.services.llm_service import LLMClient from morpheus_llm.llm.services.llm_service import LLMService logger = logging.getLogger(__name__) IMPORT_EXCEPTION = None -IMPORT_ERROR_MESSAGE = ( - "NemoLLM not found. Install it and other additional dependencies by running the following command:\n" - "`conda env update --solver=libmamba -n morpheus " - "--file conda/environments/examples_cuda-121_arch-x86_64.yaml --prune`") try: import nemollm @@ -53,7 +50,7 @@ class NeMoLLMClient(LLMClient): def __init__(self, parent: "NeMoLLMService", *, model_name: str, **model_kwargs) -> None: if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError(IMPORT_ERROR_MESSAGE.format(package='nemollm')) from IMPORT_EXCEPTION super().__init__() @@ -231,7 +228,7 @@ def __init__(self, """ if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError(IMPORT_ERROR_MESSAGE.format(package='nemollm')) from IMPORT_EXCEPTION super().__init__() diff --git a/python/morpheus_llm/morpheus_llm/llm/services/nvfoundation_llm_service.py b/python/morpheus_llm/morpheus_llm/llm/services/nvfoundation_llm_service.py index d1e706b7c2..709f394712 100644 --- a/python/morpheus_llm/morpheus_llm/llm/services/nvfoundation_llm_service.py +++ b/python/morpheus_llm/morpheus_llm/llm/services/nvfoundation_llm_service.py @@ -16,17 +16,13 @@ import typing from morpheus.utils.env_config_value import EnvConfigValue +from morpheus_llm.error import IMPORT_ERROR_MESSAGE from morpheus_llm.llm.services.llm_service import LLMClient from morpheus_llm.llm.services.llm_service import LLMService logger = logging.getLogger(__name__) IMPORT_EXCEPTION = None -IMPORT_ERROR_MESSAGE = ( - "The `langchain-nvidia-ai-endpoints` package was not found. Install it and other additional dependencies by " - "running the following command:" - "`conda env update --solver=libmamba -n morpheus " - "--file conda/environments/examples_cuda-121_arch-x86_64.yaml`") try: from langchain_core.prompt_values import StringPromptValue @@ -52,7 +48,8 @@ class NVFoundationLLMClient(LLMClient): def __init__(self, parent: "NVFoundationLLMService", *, model_name: str, **model_kwargs) -> None: if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError( + IMPORT_ERROR_MESSAGE.format(package='langchain-nvidia-ai-endpoints')) from IMPORT_EXCEPTION super().__init__() @@ -218,7 +215,8 @@ class BaseURL(EnvConfigValue): def __init__(self, *, api_key: APIKey | str = None, base_url: BaseURL | str = None, **model_kwargs) -> None: if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError( + IMPORT_ERROR_MESSAGE.format(package='langchain-nvidia-ai-endpoints')) from IMPORT_EXCEPTION super().__init__() diff --git a/python/morpheus_llm/morpheus_llm/llm/services/openai_chat_service.py b/python/morpheus_llm/morpheus_llm/llm/services/openai_chat_service.py index d4eaac4503..2df6048d5a 100644 --- a/python/morpheus_llm/morpheus_llm/llm/services/openai_chat_service.py +++ b/python/morpheus_llm/morpheus_llm/llm/services/openai_chat_service.py @@ -23,16 +23,13 @@ import appdirs from morpheus.utils.env_config_value import EnvConfigValue +from morpheus_llm.error import IMPORT_ERROR_MESSAGE from morpheus_llm.llm.services.llm_service import LLMClient from morpheus_llm.llm.services.llm_service import LLMService logger = logging.getLogger(__name__) IMPORT_EXCEPTION = None -IMPORT_ERROR_MESSAGE = ("OpenAIChatService & OpenAIChatClient require the openai package to be installed. " - "Install it by running the following command:\n" - "`conda env update --solver=libmamba -n morpheus " - "--file conda/environments/examples_cuda-121_arch-x86_64.yaml --prune`") try: import openai @@ -107,7 +104,7 @@ def __init__(self, json=False, **model_kwargs) -> None: if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError(IMPORT_ERROR_MESSAGE.format(package='openai')) from IMPORT_EXCEPTION super().__init__() @@ -400,7 +397,7 @@ def __init__(self, default_model_kwargs: dict = None) -> None: if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError(IMPORT_ERROR_MESSAGE.format(package='openai')) from IMPORT_EXCEPTION super().__init__() diff --git a/python/morpheus_llm/morpheus_llm/service/vdb/faiss_vdb_service.py b/python/morpheus_llm/morpheus_llm/service/vdb/faiss_vdb_service.py index 4876c6b139..8a31ed8085 100644 --- a/python/morpheus_llm/morpheus_llm/service/vdb/faiss_vdb_service.py +++ b/python/morpheus_llm/morpheus_llm/service/vdb/faiss_vdb_service.py @@ -18,13 +18,13 @@ import typing from morpheus.utils.type_aliases import DataFrameType +from morpheus_llm.error import IMPORT_ERROR_MESSAGE from morpheus_llm.service.vdb.vector_db_service import VectorDBResourceService from morpheus_llm.service.vdb.vector_db_service import VectorDBService logger = logging.getLogger(__name__) IMPORT_EXCEPTION = None -IMPORT_ERROR_MESSAGE = "FaissDBResourceService requires the FAISS library to be installed." try: from langchain.embeddings.base import Embeddings @@ -47,7 +47,7 @@ class FaissVectorDBResourceService(VectorDBResourceService): def __init__(self, parent: "FaissVectorDBService", *, name: str) -> None: if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError(IMPORT_ERROR_MESSAGE.format(package='langchain and faiss-gpu')) from IMPORT_EXCEPTION super().__init__() @@ -282,7 +282,7 @@ class FaissVectorDBService(VectorDBService): def __init__(self, local_dir: str, embeddings: "Embeddings"): if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError(IMPORT_ERROR_MESSAGE.format(package='langchain and faiss-gpu')) from IMPORT_EXCEPTION self._local_dir = local_dir self._embeddings = embeddings diff --git a/python/morpheus_llm/morpheus_llm/service/vdb/milvus_vector_db_service.py b/python/morpheus_llm/morpheus_llm/service/vdb/milvus_vector_db_service.py index d9a378ceef..5c3f020aea 100644 --- a/python/morpheus_llm/morpheus_llm/service/vdb/milvus_vector_db_service.py +++ b/python/morpheus_llm/morpheus_llm/service/vdb/milvus_vector_db_service.py @@ -24,13 +24,13 @@ from morpheus.io.utils import truncate_string_cols_by_bytes from morpheus.utils.type_aliases import DataFrameType from morpheus.utils.type_utils import is_cudf_type +from morpheus_llm.error import IMPORT_ERROR_MESSAGE from morpheus_llm.service.vdb.vector_db_service import VectorDBResourceService from morpheus_llm.service.vdb.vector_db_service import VectorDBService logger = logging.getLogger(__name__) IMPORT_EXCEPTION = None -IMPORT_ERROR_MESSAGE = "MilvusVectorDBResourceService requires the milvus and pymilvus packages to be installed." # Milvus has a max string length in bytes of 65,535. Multi-byte characters like "ñ" will have a string length of 1, the # byte length encoded as UTF-8 will be 2 @@ -233,7 +233,7 @@ class MilvusVectorDBResourceService(VectorDBResourceService): def __init__(self, name: str, client: "MilvusClient", truncate_long_strings: bool = False) -> None: if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError(IMPORT_ERROR_MESSAGE.format(package='pymilvus')) from IMPORT_EXCEPTION super().__init__() diff --git a/scripts/compile.sh b/scripts/compile.sh index cf8c628515..f0fc750b75 100755 --- a/scripts/compile.sh +++ b/scripts/compile.sh @@ -33,7 +33,9 @@ cmake -S . -B ${BUILD_DIR} -GNinja \ -DMORPHEUS_USE_CCACHE=ON \ -DMORPHEUS_USE_CONDA=${MORPHEUS_USE_CONDA:-ON} \ -DMORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-OFF} \ + -DMORPHEUS_BUILD_MORPHEUS_CORE=${MORPHEUS_BUILD_MORPHEUS_CORE:-ON} \ -DMORPHEUS_BUILD_MORPHEUS_LLM=${MORPHEUS_BUILD_MORPHEUS_LLM:-ON} \ + -DMORPHEUS_BUILD_MORPHEUS_DFP=${MORPHEUS_BUILD_MORPHEUS_DFP:-ON} \ ${INSTALL_PREFIX:+-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}} \ ${CMAKE_ARGS:+${CMAKE_ARGS}} \ ${CMAKE_CONFIGURE_EXTRA_ARGS:+${CMAKE_CONFIGURE_EXTRA_ARGS}} diff --git a/tests/benchmarks/conftest.py b/tests/benchmarks/conftest.py index 1e21affaa8..607febf434 100644 --- a/tests/benchmarks/conftest.py +++ b/tests/benchmarks/conftest.py @@ -20,8 +20,13 @@ from unittest import mock import pytest -from pynvml.smi import NVSMI_QUERY_GPU -from pynvml.smi import nvidia_smi + +try: + from pynvml.smi import NVSMI_QUERY_GPU + from pynvml.smi import nvidia_smi +except ImportError: + print("pynvml is not installed") + from test_bench_e2e_pipelines import E2E_TEST_CONFIGS diff --git a/tests/benchmarks/test_bench_agents_simple_pipeline.py b/tests/benchmarks/test_bench_agents_simple_pipeline.py index 05289cde04..ffad11dc78 100644 --- a/tests/benchmarks/test_bench_agents_simple_pipeline.py +++ b/tests/benchmarks/test_bench_agents_simple_pipeline.py @@ -19,13 +19,17 @@ import typing from unittest import mock -import langchain import pytest -from langchain.agents import AgentType -from langchain.agents import initialize_agent -from langchain.agents import load_tools -from langchain.agents.tools import Tool -from langchain.utilities import serpapi + +try: + import langchain + from langchain.agents import AgentType + from langchain.agents import initialize_agent + from langchain.agents import load_tools + from langchain.agents.tools import Tool + from langchain.utilities import serpapi +except ImportError: + print("langchain is not installed") import cudf diff --git a/tests/conftest.py b/tests/conftest.py index b047798e58..88a362bbe5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -50,6 +50,10 @@ from _utils.kafka import kafka_server # noqa: F401 pylint:disable=unused-import from _utils.kafka import zookeeper_proc # noqa: F401 pylint:disable=unused-import +OPT_DEP_SKIP_REASON = ( + "This test requires the {package} package to be installed, to install this run:\n" + "`conda env update --solver=libmamba -n morpheus --file conda/environments/examples_cuda-121_arch-x86_64.yaml`") + def pytest_addoption(parser: pytest.Parser): """ @@ -1040,33 +1044,53 @@ def nemollm_fixture(fail_missing: bool): """ Fixture to ensure nemollm is installed """ - skip_reason = ("Tests for the NeMoLLMService require the nemollm package to be installed, to install this run:\n" - "`conda env update --solver=libmamba -n morpheus " - "--file conda/environments/all_cuda-121_arch-x86_64.yaml --prune`") - yield import_or_skip("nemollm", reason=skip_reason, fail_missing=fail_missing) + yield import_or_skip("nemollm", reason=OPT_DEP_SKIP_REASON.format(package="nemollm"), fail_missing=fail_missing) -@pytest.fixture(name="nvfoundationllm", scope='session') -def nvfoundationllm_fixture(fail_missing: bool): +@pytest.fixture(name="openai", scope='session') +def openai_fixture(fail_missing: bool): """ - Fixture to ensure nvfoundationllm is installed + Fixture to ensure openai is installed """ - skip_reason = ( - "Tests for NVFoundation require the langchain-nvidia-ai-endpoints package to be installed, to install this " - "run:\n `conda env update --solver=libmamba -n morpheus " - "--file conda/environments/all_cuda-121_arch-x86_64.yaml --prune`") - yield import_or_skip("langchain_nvidia_ai_endpoints", reason=skip_reason, fail_missing=fail_missing) + yield import_or_skip("openai", reason=OPT_DEP_SKIP_REASON.format(package="openai"), fail_missing=fail_missing) -@pytest.fixture(name="openai", scope='session') -def openai_fixture(fail_missing: bool): +@pytest.fixture(name="langchain", scope='session') +def langchain_fixture(fail_missing: bool): """ - Fixture to ensure openai is installed + Fixture to ensure langchain is installed + """ + yield import_or_skip("langchain", reason=OPT_DEP_SKIP_REASON.format(package="langchain"), fail_missing=fail_missing) + + +@pytest.fixture(name="langchain_core", scope='session') +def langchain_core_fixture(fail_missing: bool): + """ + Fixture to ensure langchain_core is installed + """ + yield import_or_skip("langchain_core", + reason=OPT_DEP_SKIP_REASON.format(package="langchain_core"), + fail_missing=fail_missing) + + +@pytest.fixture(name="langchain_community", scope='session') +def langchain_community_fixture(fail_missing: bool): + """ + Fixture to ensure langchain_community is installed + """ + yield import_or_skip("langchain_community", + reason=OPT_DEP_SKIP_REASON.format(package="langchain_community"), + fail_missing=fail_missing) + + +@pytest.fixture(name="langchain_nvidia_ai_endpoints", scope='session') +def langchain_nvidia_ai_endpoints_fixture(fail_missing: bool): + """ + Fixture to ensure langchain_nvidia_ai_endpoints is installed """ - skip_reason = ("Tests for the OpenAIChatService require the openai package to be installed, to install this run:\n" - "`conda env update --solver=libmamba -n morpheus " - "--file conda/environments/all_cuda-121_arch-x86_64.yaml --prune`") - yield import_or_skip("openai", reason=skip_reason, fail_missing=fail_missing) + yield import_or_skip("langchain_nvidia_ai_endpoints", + reason=OPT_DEP_SKIP_REASON.format(package="langchain_nvidia_ai_endpoints"), + fail_missing=fail_missing) @pytest.mark.usefixtures("openai") diff --git a/tests/examples/digital_fingerprinting/test_dfp_file_batcher_stage.py b/tests/examples/digital_fingerprinting/test_dfp_file_batcher_stage.py index 6120867926..fd94312f97 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_file_batcher_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_file_batcher_stage.py @@ -48,7 +48,7 @@ def file_specs_fixture(test_data_dir: str): def test_constructor(config: Config): - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage def date_conversion_func(x): return x @@ -71,7 +71,7 @@ def date_conversion_func(x): def test_constructor_deprecated_args(config: Config): """Test that the deprecated sampling_rate_s arg is still supported""" - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage with pytest.deprecated_call(): stage = DFPFileBatcherStage(config, lambda x: x, sampling_rate_s=55) @@ -82,14 +82,14 @@ def test_constructor_deprecated_args(config: Config): def test_constructor_both_sample_args_error(config: Config): """Test that an error is raised if both sampling and sampling_rate_s are specified""" - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage with pytest.raises(AssertionError): DFPFileBatcherStage(config, lambda x: x, sampling=55, sampling_rate_s=20) def test_on_data(config: Config, date_conversion_func: typing.Callable, file_specs: typing.List[fsspec.core.OpenFile]): - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage stage = DFPFileBatcherStage(config, date_conversion_func) assert not stage.on_data([]) @@ -108,7 +108,7 @@ def test_on_data_two_batches(config: Config, file_specs: typing.List[fsspec.core.OpenFile], test_data_dir: str): # Test with a one-minute window which should split the data into two batches - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage stage = DFPFileBatcherStage(config, date_conversion_func, period='min') batches = stage.on_data(file_specs) assert len(batches) == 2 @@ -131,7 +131,7 @@ def test_on_data_start_time(config: Config, file_specs: typing.List[fsspec.core.OpenFile], test_data_dir: str): # Test with a start time that excludes some files - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage stage = DFPFileBatcherStage(config, date_conversion_func, period='min', @@ -151,7 +151,7 @@ def test_on_data_end_time(config: Config, file_specs: typing.List[fsspec.core.OpenFile], test_data_dir: str): # Test with a end time that excludes some files - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage stage = DFPFileBatcherStage(config, date_conversion_func, period='min', @@ -172,7 +172,7 @@ def test_on_data_start_time_end_time(config: Config, file_specs: typing.List[fsspec.core.OpenFile], test_data_dir: str): # Test with a start & end time that excludes some files - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage stage = DFPFileBatcherStage(config, date_conversion_func, period='min', diff --git a/tests/examples/digital_fingerprinting/test_dfp_file_to_df.py b/tests/examples/digital_fingerprinting/test_dfp_file_to_df.py index 19fa6add61..557818d8c7 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_file_to_df.py +++ b/tests/examples/digital_fingerprinting/test_dfp_file_to_df.py @@ -77,7 +77,7 @@ def test_single_object_to_dataframe_timeout(): @pytest.mark.usefixtures("restore_environ") def test_constructor(config: Config): - from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage + from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage # The user may have this already set, ensure it is undefined os.environ.pop('MORPHEUS_FILE_DOWNLOAD_TYPE', None) @@ -118,7 +118,7 @@ def test_get_or_create_dataframe_from_batch_cache_miss(mock_proc_df: mock.MagicM tmp_path: str, single_file_obj: fsspec.core.OpenFile, dataset_pandas: DatasetManager): - from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage + from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage config.ae.timestamp_column_name = 'v1' mock_dask_cluster.return_value = mock_dask_cluster mock_dask_client.return_value = mock_dask_client @@ -195,7 +195,7 @@ def test_get_or_create_dataframe_from_batch_cache_hit(mock_obf_to_df: mock.Magic use_convert_to_dataframe: bool, tmp_path: str, dataset_pandas: DatasetManager): - from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage + from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage config.ae.timestamp_column_name = 'v1' mock_dask_cluster.return_value = mock_dask_cluster mock_dask_client.return_value = mock_dask_client @@ -253,7 +253,7 @@ def test_get_or_create_dataframe_from_batch_none_noop(mock_obf_to_df: mock.Magic dl_type: str, use_convert_to_dataframe: bool, tmp_path: str): - from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage + from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage mock_dask_cluster.return_value = mock_dask_cluster mock_dask_client.return_value = mock_dask_client diff --git a/tests/examples/digital_fingerprinting/test_dfp_inference_stage.py b/tests/examples/digital_fingerprinting/test_dfp_inference_stage.py index 1175d0a61e..722fadb993 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_inference_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_inference_stage.py @@ -30,20 +30,20 @@ @pytest.fixture(name="mock_mlflow_client", autouse=True) def mock_mlflow_client_fixture(): - with mock.patch("dfp.stages.dfp_inference_stage.MlflowClient") as mock_mlflow_client: + with mock.patch("morpheus_dfp.stages.dfp_inference_stage.MlflowClient") as mock_mlflow_client: mock_mlflow_client.return_value = mock_mlflow_client yield mock_mlflow_client @pytest.fixture(name="mock_model_manager", autouse=True) def mock_model_manager_fixture(): - with mock.patch("dfp.stages.dfp_inference_stage.ModelManager") as mock_model_manager: + with mock.patch("morpheus_dfp.stages.dfp_inference_stage.ModelManager") as mock_model_manager: mock_model_manager.return_value = mock_model_manager yield mock_model_manager def test_constructor(config: Config, mock_mlflow_client: mock.MagicMock, mock_model_manager: mock.MagicMock): - from dfp.stages.dfp_inference_stage import DFPInferenceStage + from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage stage = DFPInferenceStage(config, model_name_formatter="test_model_name-{user_id}-{user_md5}") @@ -58,7 +58,7 @@ def test_constructor(config: Config, mock_mlflow_client: mock.MagicMock, mock_mo def test_get_model(config: Config, mock_mlflow_client: mock.MagicMock, mock_model_manager: mock.MagicMock): - from dfp.stages.dfp_inference_stage import DFPInferenceStage + from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage mock_model_cache = mock.MagicMock() mock_model_manager.load_user_model.return_value = mock_model_cache @@ -80,7 +80,7 @@ def test_on_data( control_message: "ControlMessage", # noqa: F821 log_level: int, dataset_pandas: DatasetManager): - from dfp.stages.dfp_inference_stage import DFPInferenceStage + from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage set_log_level(log_level) @@ -116,7 +116,7 @@ def test_on_data_get_model_error( mock_model_manager: mock.MagicMock, control_message: "ControlMessage", # noqa: F821 raise_error: bool): - from dfp.stages.dfp_inference_stage import DFPInferenceStage + from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage # There are two error conditions that can occur in get_model can return None or raise an error if raise_error: diff --git a/tests/examples/digital_fingerprinting/test_dfp_mlflow_model_writer.py b/tests/examples/digital_fingerprinting/test_dfp_mlflow_model_writer.py index 02508eb7b3..39dcfd7d6b 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_mlflow_model_writer.py +++ b/tests/examples/digital_fingerprinting/test_dfp_mlflow_model_writer.py @@ -111,7 +111,7 @@ def mock_mlflow(): def test_constructor(config: Config): - from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage + from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage stage = DFPMLFlowModelWriterStage(config, model_name_formatter="test_model_name-{user_id}-{user_md5}", @@ -132,7 +132,7 @@ def test_constructor(config: Config): ("test_model_name-{user_id}-{user_md5}", 'test_城安宮川', "test_model_name-test_城安宮川-c9acc3dec97777c8b6fd8ae70a744ea8") ]) def test_user_id_to_model(config: Config, model_name_formatter: str, user_id: str, expected_val: str): - from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage + from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage stage = DFPMLFlowModelWriterStage(config, model_name_formatter=model_name_formatter) assert stage._controller.user_id_to_model(user_id) == expected_val @@ -150,7 +150,7 @@ def test_user_id_to_model(config: Config, model_name_formatter: str, user_id: st 'test_城安宮川', "/test/expr/dfp-test_城安宮川-test_城安宮川-c9acc3dec97777c8b6fd8ae70a744ea8")]) def test_user_id_to_experiment(config: Config, experiment_name_formatter: str, user_id: str, expected_val: str): - from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage + from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage stage = DFPMLFlowModelWriterStage(config, model_name_formatter="dfp-{user_id}", @@ -179,7 +179,7 @@ def verify_apply_model_permissions(mock_requests: MockedRequests, def test_apply_model_permissions(config: Config, databricks_env: dict, mock_requests: MockedRequests): - from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage + from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage databricks_permissions = OrderedDict([('group1', 'CAN_READ'), ('group2', 'CAN_WRITE')]) stage = DFPMLFlowModelWriterStage(config, databricks_permissions=databricks_permissions, timeout=10) stage._controller._apply_model_permissions("test_experiment") @@ -207,7 +207,7 @@ def test_apply_model_permissions_no_perms_error(config: Config, else: os.environ.pop("DATABRICKS_TOKEN", None) - from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage + from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage stage = DFPMLFlowModelWriterStage(config) with pytest.raises(RuntimeError): stage._controller._apply_model_permissions("test_experiment") @@ -218,7 +218,7 @@ def test_apply_model_permissions_no_perms_error(config: Config, @pytest.mark.usefixtures("databricks_env") def test_apply_model_permissions_requests_error(config: Config, mock_requests: MockedRequests): - from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage + from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage mock_requests.get.side_effect = RuntimeError("test error") stage = DFPMLFlowModelWriterStage(config, timeout=10) @@ -239,8 +239,8 @@ def test_on_data( databricks_env: dict, databricks_permissions: dict, tracking_uri: str): - from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage - from dfp.stages.dfp_mlflow_model_writer import conda_env + from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage + from morpheus_dfp.stages.dfp_mlflow_model_writer import conda_env should_apply_permissions = (databricks_permissions is not None and tracking_uri == "databricks") diff --git a/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py b/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py index b173c145dc..31176221a9 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py @@ -27,7 +27,7 @@ def test_constructor(config: Config): - from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage + from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage stage = DFPPostprocessingStage(config) assert isinstance(stage, SinglePortStage) assert stage._needed_columns['event_time'] == TypeId.STRING @@ -36,13 +36,13 @@ def test_constructor(config: Config): @pytest.mark.usefixtures("reset_loglevel") @pytest.mark.parametrize('use_on_data', [True, False]) @pytest.mark.parametrize('log_level', [logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]) -@mock.patch('dfp.stages.dfp_postprocessing_stage.datetime') +@mock.patch('morpheus_dfp.stages.dfp_postprocessing_stage.datetime') def test_process_events_on_data(mock_datetime: mock.MagicMock, config: Config, control_message: ControlMessage, use_on_data: bool, log_level: int): - from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage + from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage mock_dt_obj = mock.MagicMock() mock_dt_obj.strftime.return_value = '2021-01-01T00:00:00Z' @@ -68,7 +68,7 @@ def test_process_events_on_data(mock_datetime: mock.MagicMock, def test_on_data_none(config: Config): - from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage + from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage stage = DFPPostprocessingStage(config) assert stage.on_data(None) is None mock_payload = mock.MagicMock() diff --git a/tests/examples/digital_fingerprinting/test_dfp_preprocessing_stage.py b/tests/examples/digital_fingerprinting/test_dfp_preprocessing_stage.py index 538e20425e..49a2feea90 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_preprocessing_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_preprocessing_stage.py @@ -28,7 +28,7 @@ def test_constructor(config: Config): - from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage + from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage schema = DataFrameInputSchema() stage = DFPPreprocessingStage(config, input_schema=schema) @@ -43,7 +43,7 @@ def test_process_features( control_message: "ControlMessage", # noqa: F821 dataset_pandas: DatasetManager, log_level: int): - from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage + from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage set_log_level(log_level) diff --git a/tests/examples/digital_fingerprinting/test_dfp_rolling_window_stage.py b/tests/examples/digital_fingerprinting/test_dfp_rolling_window_stage.py index 15ad17c3cb..06d142f91c 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_rolling_window_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_rolling_window_stage.py @@ -47,7 +47,7 @@ def build_mock_user_cache(user_id: str = 'test_user', def test_constructor(config: Config): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') assert isinstance(stage, SinglePortStage) @@ -59,7 +59,7 @@ def test_constructor(config: Config): def test_get_user_cache_hit(config: Config): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') @@ -71,8 +71,8 @@ def test_get_user_cache_hit(config: Config): def test_get_user_cache_miss(config: Config): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage - from dfp.utils.cached_user_window import CachedUserWindow + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.utils.cached_user_window import CachedUserWindow config.ae.timestamp_column_name = 'test_timestamp_col' stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') @@ -88,7 +88,7 @@ def test_get_user_cache_miss(config: Config): def test_build_window_no_new(config: Config, control_message: ControlMessage): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') @@ -99,7 +99,7 @@ def test_build_window_no_new(config: Config, control_message: ControlMessage): def test_build_window_not_enough_data(config: Config, control_message: ControlMessage): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') @@ -109,7 +109,7 @@ def test_build_window_not_enough_data(config: Config, control_message: ControlMe def test_build_window_min_increment(config: Config, control_message: ControlMessage): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') @@ -119,7 +119,7 @@ def test_build_window_min_increment(config: Config, control_message: ControlMess def test_build_window_invalid(config: Config, control_message: ControlMessage, train_df: pd.DataFrame): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') @@ -134,7 +134,7 @@ def test_build_window_invalid(config: Config, control_message: ControlMessage, t def test_build_window_overlap(config: Config, control_message: ControlMessage, train_df: pd.DataFrame): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') @@ -155,7 +155,7 @@ def test_build_window(config: Config, control_message: ControlMessage, dataset_pandas: DatasetManager, train_df: pd.DataFrame): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') diff --git a/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py b/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py index e844c59c53..0dca35fd02 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py @@ -29,7 +29,7 @@ def test_constructor(config: Config): - from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage + from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage stage = DFPSplitUsersStage(config, include_generic=False, include_individual=True) assert isinstance(stage, SinglePortStage) @@ -66,7 +66,7 @@ def test_extract_users(config: Config, include_individual: bool, skip_users: typing.List[str], only_users: typing.List[str]): - from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage + from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage config.ae.userid_column_name = "From" config.ae.fallback_username = "testy_testerson" ts_col = config.ae.timestamp_column_name @@ -136,7 +136,7 @@ def test_extract_users(config: Config, def test_extract_users_none_to_empty(config: Config): - from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage + from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage stage = DFPSplitUsersStage(config, include_generic=True, include_individual=True) assert not stage.extract_users(None) diff --git a/tests/examples/digital_fingerprinting/test_dfp_training.py b/tests/examples/digital_fingerprinting/test_dfp_training.py index e5194f3718..4408e3bd15 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_training.py +++ b/tests/examples/digital_fingerprinting/test_dfp_training.py @@ -27,7 +27,7 @@ def test_constructor(config: Config): - from dfp.stages.dfp_training import DFPTraining + from morpheus_dfp.stages.dfp_training import DFPTraining stage = DFPTraining(config, model_kwargs={'test': 'this'}, epochs=40, validation_size=0.5) assert isinstance(stage, SinglePortStage) @@ -38,21 +38,21 @@ def test_constructor(config: Config): @pytest.mark.parametrize('validation_size', [-1, -0.2, 1, 5]) def test_constructor_bad_validation_size(config: Config, validation_size: float): - from dfp.stages.dfp_training import DFPTraining + from morpheus_dfp.stages.dfp_training import DFPTraining with pytest.raises(ValueError): DFPTraining(config, validation_size=validation_size) @pytest.mark.parametrize('validation_size', [0., 0.2]) -@mock.patch('dfp.stages.dfp_training.AutoEncoder') -@mock.patch('dfp.stages.dfp_training.train_test_split') +@mock.patch('morpheus_dfp.stages.dfp_training.AutoEncoder') +@mock.patch('morpheus_dfp.stages.dfp_training.train_test_split') def test_on_data(mock_train_test_split: mock.MagicMock, mock_ae: mock.MagicMock, config: Config, dataset_pandas: DatasetManager, validation_size: float): - from dfp.stages.dfp_training import DFPTraining + from morpheus_dfp.stages.dfp_training import DFPTraining mock_ae.return_value = mock_ae diff --git a/tests/examples/digital_fingerprinting/test_dfp_viz_postproc.py b/tests/examples/digital_fingerprinting/test_dfp_viz_postproc.py index 571f976712..ea15b2caee 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_viz_postproc.py +++ b/tests/examples/digital_fingerprinting/test_dfp_viz_postproc.py @@ -54,7 +54,7 @@ def expected_df_fixture(config: Config, control_message: "ControlMessage"): # n def test_constructor(config: Config): - from dfp.stages.dfp_viz_postproc import DFPVizPostprocStage + from morpheus_dfp.stages.dfp_viz_postproc import DFPVizPostprocStage stage = DFPVizPostprocStage(config, period='M', output_dir='/fake/test/dir', output_prefix='test_prefix') assert isinstance(stage, SinglePortStage) @@ -72,7 +72,7 @@ def test_postprocess( control_message: "ControlMessage", # noqa: F821 expected_df: pd.DataFrame, dataset_pandas: DatasetManager): - from dfp.stages.dfp_viz_postproc import DFPVizPostprocStage + from morpheus_dfp.stages.dfp_viz_postproc import DFPVizPostprocStage # _postprocess doesn't write to disk, so the fake output_dir, shouldn't be an issue stage = DFPVizPostprocStage(config, period='min', output_dir='/fake/test/dir', output_prefix='test_prefix') @@ -88,7 +88,7 @@ def test_write_to_files( control_message: "ControlMessage", # noqa: F821 expected_df: pd.DataFrame, dataset_pandas: DatasetManager): - from dfp.stages.dfp_viz_postproc import DFPVizPostprocStage + from morpheus_dfp.stages.dfp_viz_postproc import DFPVizPostprocStage stage = DFPVizPostprocStage(config, period='min', output_dir=tmp_path, output_prefix='test_prefix_') assert stage._write_to_files(control_message) is control_message diff --git a/tests/examples/digital_fingerprinting/test_multifile_source.py b/tests/examples/digital_fingerprinting/test_multifile_source.py index c430e10c12..e0f982ce60 100644 --- a/tests/examples/digital_fingerprinting/test_multifile_source.py +++ b/tests/examples/digital_fingerprinting/test_multifile_source.py @@ -25,7 +25,7 @@ def test_constructor(config: Config): - from dfp.stages.multi_file_source import MultiFileSource + from morpheus_dfp.stages.multi_file_source import MultiFileSource batch_size = 1234 n_threads = 13 @@ -43,7 +43,7 @@ def test_constructor(config: Config): def test_generate_frames_fsspec(config: Config, tmp_path: str): - from dfp.stages.multi_file_source import MultiFileSource + from morpheus_dfp.stages.multi_file_source import MultiFileSource file_glob = os.path.join(TEST_DIRS.tests_data_dir, 'appshield', 'snapshot-1', '*.json') temp_glob = os.path.join(tmp_path, '*.json') # this won't match anything @@ -65,7 +65,7 @@ def test_generate_frames_fsspec(config: Config, tmp_path: str): @mock.patch('time.sleep') def test_polling_generate_frames_fsspec(amock_time: mock.MagicMock, config: Config, tmp_path: str): - from dfp.stages.multi_file_source import MultiFileSource + from morpheus_dfp.stages.multi_file_source import MultiFileSource file_glob = os.path.join(TEST_DIRS.tests_data_dir, 'appshield', 'snapshot-1', '*.json') temp_glob = os.path.join(tmp_path, '*.json') # this won't match anything @@ -88,7 +88,7 @@ def test_polling_generate_frames_fsspec(amock_time: mock.MagicMock, config: Conf def test_generate_frames_fsspec_no_files(config: Config, tmp_path: str): - from dfp.stages.multi_file_source import MultiFileSource + from morpheus_dfp.stages.multi_file_source import MultiFileSource assert os.listdir(tmp_path) == [] diff --git a/tests/examples/digital_fingerprinting/test_write_to_s3_stage.py b/tests/examples/digital_fingerprinting/test_write_to_s3_stage.py index 6c951ff175..beab5e3f2a 100644 --- a/tests/examples/digital_fingerprinting/test_write_to_s3_stage.py +++ b/tests/examples/digital_fingerprinting/test_write_to_s3_stage.py @@ -20,7 +20,7 @@ def test_constructor(config: Config): - from dfp.stages.write_to_s3_stage import WriteToS3Stage + from morpheus_dfp.stages.write_to_s3_stage import WriteToS3Stage mock_s3_writer = mock.MagicMock() stage = WriteToS3Stage(config, s3_writer=mock_s3_writer) diff --git a/tests/examples/digital_fingerprinting/utils/test_config_generator.py b/tests/examples/digital_fingerprinting/utils/test_config_generator.py index 40d4f37b67..eb4c850f94 100644 --- a/tests/examples/digital_fingerprinting/utils/test_config_generator.py +++ b/tests/examples/digital_fingerprinting/utils/test_config_generator.py @@ -23,7 +23,7 @@ @pytest.fixture(name="dfp_arg_parser") def dfp_arg_parser_fixture(): - from dfp.utils.dfp_arg_parser import DFPArgParser + from morpheus_dfp.utils.dfp_arg_parser import DFPArgParser dfp_arg_parser = DFPArgParser(skip_user=["unittest-skip-user"], only_user=["unittest-only-user"], start_time=datetime(1993, 4, 5, 6, 7, 8), @@ -43,13 +43,13 @@ def dfp_arg_parser_fixture(): @pytest.fixture(name="schema") def schema_fixture(config: Config): - from dfp.utils.schema_utils import SchemaBuilder + from morpheus_dfp.utils.schema_utils import SchemaBuilder schema_builder = SchemaBuilder(config, "duo") yield schema_builder.build_schema() def test_constructor(config: Config, dfp_arg_parser: "DFPArgParser", schema: "Schema"): # noqa: F821 - from dfp.utils.config_generator import ConfigGenerator + from morpheus_dfp.utils.config_generator import ConfigGenerator config_generator = ConfigGenerator(config=config, dfp_arg_parser=dfp_arg_parser, schema=schema, encoding="latin1") diff --git a/tests/llm/conftest.py b/tests/llm/conftest.py index 3519166635..94658863c5 100644 --- a/tests/llm/conftest.py +++ b/tests/llm/conftest.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import types from unittest import mock import pytest @@ -20,6 +21,54 @@ from _utils import require_env_variable +@pytest.fixture(name="nemollm", scope='session', autouse=True) +def nemollm_fixture(nemollm: types.ModuleType): + """ + Fixture to ensure nemollm is installed + """ + yield nemollm + + +@pytest.fixture(name="openai", scope='session', autouse=True) +def openai_fixture(openai: types.ModuleType): + """ + Fixture to ensure openai is installed + """ + yield openai + + +@pytest.fixture(name="langchain", scope='session', autouse=True) +def langchain_fixture(langchain: types.ModuleType): + """ + Fixture to ensure langchain is installed + """ + yield langchain + + +@pytest.fixture(name="langchain_core", scope='session', autouse=True) +def langchain_core_fixture(langchain_core: types.ModuleType): + """ + Fixture to ensure langchain_core is installed + """ + yield langchain_core + + +@pytest.fixture(name="langchain_community", scope='session', autouse=True) +def langchain_community_fixture(langchain_community: types.ModuleType): + """ + Fixture to ensure langchain_community is installed + """ + yield langchain_community + + +@pytest.fixture(name="langchain_nvidia_ai_endpoints", scope='session', autouse=True) +def langchain_nvidia_ai_endpoints_fixture(langchain_nvidia_ai_endpoints: types.ModuleType): + """ + Fixture to ensure langchain_nvidia_ai_endpoints is installed + """ + yield langchain_nvidia_ai_endpoints + + @pytest.fixture(name="countries") def countries_fixture(): yield [ diff --git a/tests/llm/nodes/test_langchain_agent_node.py b/tests/llm/nodes/test_langchain_agent_node.py index 033e978402..0779b11604 100644 --- a/tests/llm/nodes/test_langchain_agent_node.py +++ b/tests/llm/nodes/test_langchain_agent_node.py @@ -19,14 +19,6 @@ from unittest import mock import pytest -from langchain.agents import AgentType -from langchain.agents import Tool -from langchain.agents import initialize_agent -from langchain.callbacks.manager import AsyncCallbackManagerForToolRun -from langchain.callbacks.manager import CallbackManagerForToolRun -from langchain_community.chat_models.openai import ChatOpenAI -from langchain_core.exceptions import OutputParserException -from langchain_core.tools import BaseTool from _utils.llm import execute_node from _utils.llm import mk_mock_langchain_tool @@ -34,6 +26,26 @@ from morpheus_llm.llm import LLMNodeBase from morpheus_llm.llm.nodes.langchain_agent_node import LangChainAgentNode +try: + from langchain.agents import AgentType + from langchain.agents import Tool + from langchain.agents import initialize_agent + from langchain.callbacks.manager import AsyncCallbackManagerForToolRun + from langchain.callbacks.manager import CallbackManagerForToolRun + from langchain_community.chat_models.openai import ChatOpenAI + from langchain_core.tools import BaseTool +except ImportError: + pass + + +class OutputParserExceptionStandin(Exception): + """ + Stand-in for the OutputParserException class to avoid importing the actual class from the langchain_core.exceptions. + There is a need to have OutputParserException objects appear in test parameters, but we don't want to import + langchain_core at the top of the test as it is an optional dependency. + """ + pass + def test_constructor(mock_agent_executor: mock.MagicMock): node = LangChainAgentNode(agent_executor=mock_agent_executor) @@ -156,32 +168,6 @@ def test_execute_error(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMoc assert isinstance(execute_node(node, input="input1"), RuntimeError) -class MetadataSaverTool(BaseTool): - # The base class defines *args and **kwargs in the signature for _run and _arun requiring the arguments-differ - # pylint: disable=arguments-differ - name: str = "MetadataSaverTool" - description: str = "useful for when you need to know the name of a reptile" - - saved_metadata: list[dict] = [] - - def _run( - self, - query: str, - run_manager: typing.Optional[CallbackManagerForToolRun] = None, - ) -> str: - raise NotImplementedError("This tool only supports async") - - async def _arun( - self, - query: str, - run_manager: typing.Optional[AsyncCallbackManagerForToolRun] = None, - ) -> str: - assert query is not None # avoiding unused-argument - assert run_manager is not None - self.saved_metadata.append(run_manager.metadata.copy()) - return "frog" - - @pytest.mark.parametrize("metadata", [{ "morpheus": "unittest" @@ -192,6 +178,32 @@ async def _arun( }], ids=["single-metadata", "single-metadata-list", "multiple-metadata-list"]) def test_metadata(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], metadata: dict): + + class MetadataSaverTool(BaseTool): + # The base class defines *args and **kwargs in the signature for _run and _arun requiring the arguments-differ + # pylint: disable=arguments-differ + name: str = "MetadataSaverTool" + description: str = "useful for when you need to know the name of a reptile" + + saved_metadata: list[dict] = [] + + def _run( + self, + query: str, + run_manager: typing.Optional[CallbackManagerForToolRun] = None, + ) -> str: + raise NotImplementedError("This tool only supports async") + + async def _arun( + self, + query: str, + run_manager: typing.Optional[AsyncCallbackManagerForToolRun] = None, + ) -> str: + assert query is not None # avoiding unused-argument + assert run_manager is not None + self.saved_metadata.append(run_manager.metadata.copy()) + return "frog" + if isinstance(metadata['morpheus'], list): num_meta = len(metadata['morpheus']) input_data = [f"input_{i}" for i in range(num_meta)] @@ -271,7 +283,7 @@ def mock_llm_chat(*_, messages, **__): "arun_return,replace_value,expected_output", [ ( - [[OutputParserException("Parsing Error"), "A valid result."]], + [[OutputParserExceptionStandin("Parsing Error"), "A valid result."]], "Default error message.", [["Default error message.", "A valid result."]], ), @@ -282,7 +294,7 @@ def mock_llm_chat(*_, messages, **__): ), ( [ - ["A valid result.", OutputParserException("Parsing Error")], + ["A valid result.", OutputParserExceptionStandin("Parsing Error")], [Exception("General error"), "Another valid result."], ], None, @@ -297,6 +309,22 @@ def test_execute_replaces_exceptions( replace_value: str, expected_output: list, ): + # We couldn't import OutputParserException at the module level, so we need to replace instances of + # OutputParserExceptionStandin with OutputParserException + from langchain_core.exceptions import OutputParserException + + arun_return_tmp = [] + for values in arun_return: + values_tmp = [] + for value in values: + if isinstance(value, OutputParserExceptionStandin): + values_tmp.append(OutputParserException(*value.args)) + else: + values_tmp.append(value) + arun_return_tmp.append(values_tmp) + + arun_return = arun_return_tmp + placeholder_input_values = {"foo": "bar"} # a non-empty placeholder input for the context mock_agent_executor.arun.return_value = arun_return diff --git a/tests/llm/services/conftest.py b/tests/llm/services/conftest.py index a802c6ec84..88f30e76ba 100644 --- a/tests/llm/services/conftest.py +++ b/tests/llm/services/conftest.py @@ -36,12 +36,12 @@ def openai_fixture(openai): yield openai -@pytest.fixture(name="nvfoundationllm", autouse=True, scope='session') -def nvfoundationllm_fixture(nvfoundationllm): +@pytest.fixture(name="langchain_nvidia_ai_endpoints", autouse=True, scope='session') +def langchain_nvidia_ai_endpoints_fixture(langchain_nvidia_ai_endpoints): """ - All of the tests in this subdir require nvfoundationllm + All of the tests in this subdir require langchain_nvidia_ai_endpoints """ - yield nvfoundationllm + yield langchain_nvidia_ai_endpoints @pytest.fixture(name="mock_chat_completion", autouse=True) diff --git a/tests/llm/services/test_nvfoundation_llm_service.py b/tests/llm/services/test_nvfoundation_llm_service.py index f139ddacde..35a6a66f2b 100644 --- a/tests/llm/services/test_nvfoundation_llm_service.py +++ b/tests/llm/services/test_nvfoundation_llm_service.py @@ -17,13 +17,17 @@ from unittest import mock import pytest -from langchain_core.messages import ChatMessage -from langchain_core.outputs import ChatGeneration -from langchain_core.outputs import LLMResult from morpheus_llm.llm.services.nvfoundation_llm_service import NVFoundationLLMClient from morpheus_llm.llm.services.nvfoundation_llm_service import NVFoundationLLMService +try: + from langchain_core.messages import ChatMessage + from langchain_core.outputs import ChatGeneration + from langchain_core.outputs import LLMResult +except ImportError: + pass + @pytest.fixture(name="set_default_nvidia_api_key", autouse=True, scope="function") def set_default_nvidia_api_key_fixture(): @@ -34,7 +38,7 @@ def set_default_nvidia_api_key_fixture(): @pytest.mark.parametrize("api_key", ["nvapi-12345", None]) @pytest.mark.parametrize("base_url", ["http://test.nvidia.com/v1", None]) -def test_constructor(api_key: str, base_url: bool): +def test_constructor(api_key: str | None, base_url: bool | None): service = NVFoundationLLMService(api_key=api_key, base_url=base_url) diff --git a/tests/llm/test_agents_simple_pipe.py b/tests/llm/test_agents_simple_pipe.py index 61fa7f8d84..5d33dacb03 100644 --- a/tests/llm/test_agents_simple_pipe.py +++ b/tests/llm/test_agents_simple_pipe.py @@ -18,12 +18,6 @@ from unittest import mock import pytest -from langchain.agents import AgentType -from langchain.agents import initialize_agent -from langchain.agents import load_tools -from langchain.agents.tools import Tool -from langchain_community.llms import OpenAI # pylint: disable=no-name-in-module -from langchain_community.utilities import serpapi import cudf @@ -41,6 +35,18 @@ from morpheus_llm.llm.task_handlers.simple_task_handler import SimpleTaskHandler from morpheus_llm.stages.llm.llm_engine_stage import LLMEngineStage +try: + from langchain.agents import AgentType + from langchain.agents import initialize_agent + from langchain.agents import load_tools + from langchain.agents.tools import Tool + from langchain.schema import Generation + from langchain.schema import LLMResult + from langchain_community.llms import OpenAI # pylint: disable=no-name-in-module + from langchain_community.utilities import serpapi +except ImportError: + pass + @pytest.fixture(name="questions") def questions_fixture(): @@ -48,7 +54,6 @@ def questions_fixture(): def _build_agent_executor(model_name: str): - llm = OpenAI(model=model_name, temperature=0, cache=False) # Explicitly construct the serpapi tool, loading it via load_tools makes it too difficult to mock @@ -132,9 +137,6 @@ def test_agents_simple_pipe(mock_openai_agenerate: mock.AsyncMock, questions: list[str]): os.environ.update({'OPENAI_API_KEY': 'test_api_key', 'SERPAPI_API_KEY': 'test_api_key'}) - from langchain.schema import Generation - from langchain.schema import LLMResult - assert serpapi.SerpAPIWrapper().aresults is mock_serpapi_aresults model_name = "test_model" diff --git a/tests/utils/test_shared_process_pool.py b/tests/utils/test_shared_process_pool.py index e1d605f4bb..0e5e5a0b82 100644 --- a/tests/utils/test_shared_process_pool.py +++ b/tests/utils/test_shared_process_pool.py @@ -34,14 +34,14 @@ def setup_and_teardown(): pool = SharedProcessPool() - # Since SharedProcessPool might be used in other tests, terminate and reset the pool before the test starts - pool.terminate() + # Since SharedProcessPool might be used in other tests, stop and reset the pool before the test starts + pool.stop() pool.join() pool.reset() yield - # Terminate the pool after all tests are done - pool.terminate() + # Stop the pool after all tests are done + pool.stop() pool.join() @@ -93,6 +93,7 @@ def test_singleton(): assert pool_1 is pool_2 +@pytest.mark.slow def test_pool_status(shared_process_pool): pool = shared_process_pool @@ -111,7 +112,7 @@ def test_pool_status(shared_process_pool): assert pool._total_usage == 0.5 _check_pool_stage_settings(pool, "test_stage", 0.5) - pool.terminate() + pool.stop() pool.join() assert pool.status == PoolStatus.SHUTDOWN @@ -125,6 +126,7 @@ def test_pool_status(shared_process_pool): assert not pool._task_queues +@pytest.mark.slow @pytest.mark.parametrize( "a, b, expected", [ @@ -157,6 +159,7 @@ def test_submit_single_task(shared_process_pool, a, b, expected): pool.submit_task("test_stage", _add_task, 10, 20) +@pytest.mark.slow def test_submit_task_with_invalid_stage(shared_process_pool): pool = shared_process_pool @@ -165,6 +168,7 @@ def test_submit_task_with_invalid_stage(shared_process_pool): pool.submit_task("stage_does_not_exist", _add_task, 10, 20) +@pytest.mark.slow def test_submit_task_raises_exception(shared_process_pool): pool = shared_process_pool @@ -175,6 +179,7 @@ def test_submit_task_raises_exception(shared_process_pool): task.result() +@pytest.mark.slow def test_submit_task_with_unserializable_result(shared_process_pool): pool = shared_process_pool @@ -185,6 +190,7 @@ def test_submit_task_with_unserializable_result(shared_process_pool): task.result() +@pytest.mark.slow def test_submit_task_with_unserializable_arg(shared_process_pool): pool = shared_process_pool @@ -195,6 +201,7 @@ def test_submit_task_with_unserializable_arg(shared_process_pool): pool.submit_task("test_stage", _arbitrary_function, threading.Lock()) +@pytest.mark.slow @pytest.mark.parametrize( "a, b, expected", [ @@ -220,6 +227,7 @@ def test_submit_multiple_tasks(shared_process_pool, a, b, expected): assert future.result() == expected +@pytest.mark.slow def test_set_usage(shared_process_pool): pool = shared_process_pool @@ -256,6 +264,7 @@ def test_set_usage(shared_process_pool): assert pool._total_usage == 0.9 +@pytest.mark.slow def test_task_completion_with_early_stop(shared_process_pool): pool = shared_process_pool @@ -290,32 +299,3 @@ def test_task_completion_with_early_stop(shared_process_pool): assert len(tasks) == 3 * task_num for task in tasks: assert task.done() - - -def test_terminate_running_tasks(shared_process_pool): - - pool = shared_process_pool - pool.set_usage("test_stage_1", 0.1) - pool.set_usage("test_stage_2", 0.3) - pool.set_usage("test_stage_3", 0.5) - - manager = mp.Manager() - queue = manager.Queue() - - tasks = [] - - task_num = 50 - - for _ in range(task_num): - tasks.append(pool.submit_task("test_stage_1", _blocked_until_signaled_task, queue)) - tasks.append(pool.submit_task("test_stage_2", _blocked_until_signaled_task, queue)) - tasks.append(pool.submit_task("test_stage_3", _blocked_until_signaled_task, queue)) - - for i in range(len(tasks)): - queue.put(i) - - pool.terminate() - pool.join() - - # As pool.terminate() is called, at least some of the tasks are not finished - assert any(not task.done() for task in tasks)