From 5dfcf253cf671e5d6a5e5c94609eecfe7b040f13 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Thu, 17 Oct 2024 16:00:20 +0000 Subject: [PATCH] Tests for the namespace update script Signed-off-by: Anuradha Karuppiah --- scripts/morpheus_namespace_update.py | 4 +- tests/conftest.py | 20 ++++ tests/scripts/conftest.py | 34 ++++++ tests/scripts/data/dfp_old_namespace_data.py | 43 +++++++ .../scripts/data/llm_old_namespace_data.py | 26 ++-- tests/scripts/test_namespace_update.py | 112 ++++++++++++++++++ 6 files changed, 218 insertions(+), 21 deletions(-) create mode 100644 tests/scripts/conftest.py create mode 100644 tests/scripts/data/dfp_old_namespace_data.py rename scripts/data/namespace_update_data.py => tests/scripts/data/llm_old_namespace_data.py (67%) create mode 100644 tests/scripts/test_namespace_update.py diff --git a/scripts/morpheus_namespace_update.py b/scripts/morpheus_namespace_update.py index 2d5056f6a9..96909bf490 100755 --- a/scripts/morpheus_namespace_update.py +++ b/scripts/morpheus_namespace_update.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. ''' -This script is used to update the imports from "dfp." to "morpheus_dfp." in all the files in the given directory. +This script is used to update imports related to DFP and LLM morpheus modules. Usage: python morpheus_namespace_update.py --directory --dfp python morpheus_namespace_update.py --directory --llm @@ -129,9 +129,9 @@ def replace_imports(directory, dfp, llm): return for root, _, files in os.walk(directory): - for file in files: file_path = os.path.join(root, file) + # Skip this script if os.path.abspath(file_path) == os.path.abspath(__file__): continue if file.endswith(".py"): diff --git a/tests/conftest.py b/tests/conftest.py index 55c3b03605..4018ef03a7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1064,6 +1064,26 @@ def bert_cased_vocab_fixture(): yield os.path.join(TEST_DIRS.data_dir, 'bert-base-cased-vocab.txt') +@pytest.fixture(name="morpheus_dfp", scope='session') +def morpheus_dfp_fixture(fail_missing: bool): + """ + Fixture to ensure morpheus_dfp is installed + """ + yield import_or_skip("morpheus_dfp", + reason=OPT_DEP_SKIP_REASON.format(package="morpheus_dfp"), + fail_missing=fail_missing) + + +@pytest.fixture(name="morpheus_llm", scope='session') +def morpheus_llm_fixture(fail_missing: bool): + """ + Fixture to ensure morpheus_llm is installed + """ + yield import_or_skip("morpheus_llm", + reason=OPT_DEP_SKIP_REASON.format(package="morpheus_llm"), + fail_missing=fail_missing) + + @pytest.fixture(name="nemollm", scope='session') def nemollm_fixture(fail_missing: bool): """ diff --git a/tests/scripts/conftest.py b/tests/scripts/conftest.py new file mode 100644 index 0000000000..df8aff444a --- /dev/null +++ b/tests/scripts/conftest.py @@ -0,0 +1,34 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import types + +import pytest + + +@pytest.fixture(name="morpheus_llm", scope='session', autouse=True) +def morpheus_llm_fixture(morpheus_llm: types.ModuleType): + """ + Fixture to ensure morpheus_llm is installed + """ + yield morpheus_llm + + +@pytest.fixture(name="morpheus_dfp", scope='session', autouse=True) +def morpheus_dfp_fixture(morpheus_dfp: types.ModuleType): + """ + Fixture to ensure morpheus_dfp is installed + """ + yield morpheus_dfp diff --git a/tests/scripts/data/dfp_old_namespace_data.py b/tests/scripts/data/dfp_old_namespace_data.py new file mode 100644 index 0000000000..e1a46532d3 --- /dev/null +++ b/tests/scripts/data/dfp_old_namespace_data.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +''' +This script is used as test input for morpheus_namespace_update.py script. +''' + +# Disable all checkers +# flake8: noqa +# isort: skip_file +# yapf: disable +# pylint: skip-file + +# old DFP import patterns +from dfp.utils.config_generator import ConfigGenerator +from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage +from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage # no update +import dfp.stages.dfp_training +import dfp.stages.dfp_inference_stage as inference_stage +import dfp + + +def empty_imports_function_scope(): + ''' + Empty imports from morpheus_dfp, llm and vdb. + ''' + from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage + from dfp.utils.regex_utils import iso_date_regex + from morpheus_dfp.utils.schema_utils import SchemaBuilder # no update + from dfp.modules import dfp_data_prep diff --git a/scripts/data/namespace_update_data.py b/tests/scripts/data/llm_old_namespace_data.py similarity index 67% rename from scripts/data/namespace_update_data.py rename to tests/scripts/data/llm_old_namespace_data.py index 62f609a11d..958ed67b61 100644 --- a/scripts/data/namespace_update_data.py +++ b/tests/scripts/data/llm_old_namespace_data.py @@ -23,19 +23,8 @@ # yapf: disable # pylint: skip-file -# old DFP import patterns -from dfp.utils.config_generator import ConfigGenerator -from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage -from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage # no update -import dfp.stages.dfp_training -import dfp.stages.dfp_inference_stage as inference_stage -import dfp - - # old LLM import patterns from morpheus.llm import LLMContext -from morpheus_llm.llm import LLMEngine # no update -from morpheus.stages.llm.llm_engine_stage import LLMEngineStage from morpheus.llm.services.llm_service import LLMService # old vdb import patterns @@ -43,18 +32,17 @@ from morpheus.service import vdb from morpheus.modules.output import write_to_vector_db from morpheus.modules.output.write_to_vector_db import preprocess_vdb_resources -from morpheus_llm.service.vdb import milvus_client # no update import morpheus.service.vdb +# These should be skipped +from morpheus.stages.postprocess.filter_detections_stage import FilterDetectionsStage +from morpheus_llm.service.vdb import milvus_client # no update +from morpheus_llm.llm import LLMEngine # no update + def empty_imports_function_scope(): ''' - Empty imports from dfp, llm and vdb. + Empty imports from llm and vdb. ''' - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage - from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage - from dfp.utils.regex_utils import iso_date_regex - from morpheus_dfp.utils.schema_utils import SchemaBuilder # no update - from dfp.modules import dfp_data_prep - + from morpheus.stages.llm.llm_engine_stage import LLMEngineStage from morpheus.stages.output.write_to_vector_db_stage import WriteToVectorDBStage import morpheus.modules.schemas.write_to_vector_db_schema diff --git a/tests/scripts/test_namespace_update.py b/tests/scripts/test_namespace_update.py new file mode 100644 index 0000000000..6b505a4e7d --- /dev/null +++ b/tests/scripts/test_namespace_update.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib.util +import os + +import pytest + +from _utils import TEST_DIRS + + +@pytest.fixture +def scripts_data_dir(tmp_path) -> str: + ''' + Copy the data to a temporary directory as we will be modifying the files. + ''' + data_dir = os.path.join(TEST_DIRS.tests_dir, "scripts/data") + tmp_data_dir = tmp_path / "scripts" + tmp_data_dir.mkdir() + os.system(f"cp -r {data_dir} {tmp_data_dir}") + return os.path.join(tmp_data_dir, "data") + + +@pytest.fixture +def update_namespace_script() -> str: + return os.path.join(TEST_DIRS.morpheus_root, "scripts/morpheus_namespace_update.py") + + +def import_module_from_path(module_name, path) -> tuple: + ''' + Import a module from the pytest tmp_path. + ''' + # Create a module spec from the given path + spec = importlib.util.spec_from_file_location(module_name, path) + if not spec: + return None, None + + # Load the module from the created spec + module = importlib.util.module_from_spec(spec) + if not module: + return None, None + + return spec, module + + +@pytest.mark.usefixtures("morpheus_dfp") +def test_dfp_namespace_update(scripts_data_dir, update_namespace_script): + ''' + Update the DFP namespace imports and verify the imports work. + ''' + module_name = 'dfp_old_namespace_data' + module_path = os.path.join(scripts_data_dir, f'{module_name}.py') + + # dfp imports expected to fail before namespace update + spec, module = import_module_from_path(module_name, module_path) + assert module is not None, f"Failed to import {module_name} from {module_path}" + try: + spec.loader.exec_module(module) + assert False, "dfp_namespace_data input is not setup with the old imports" + except ModuleNotFoundError: + pass + + # update imports to the new namespace by running morpheus_namespace_update.py + os.system(f"python {update_namespace_script} --directory {scripts_data_dir} --dfp") + + # verify the morpheus_dfp imports work + spec, module = import_module_from_path(module_name, module_path) + try: + spec.loader.exec_module(module) + except ModuleNotFoundError: + assert False, "old dfp imports are not updated to the new namespace" + + +@pytest.mark.usefixtures("morpheus_llm") +def test_llm_namespace_update(scripts_data_dir, update_namespace_script): + ''' + Update the LLM namespace imports and verify the imports work. + ''' + module_name = 'llm_old_namespace_data' + module_path = os.path.join(scripts_data_dir, f'{module_name}.py') + + # llm imports expected to fail before namespace update + spec, module = import_module_from_path(module_name, module_path) + assert module is not None, f"Failed to import {module_name} from {module_path}" + try: + spec.loader.exec_module(module) + assert False, "llm_namespace_data input is not setup with the old imports" + except ModuleNotFoundError: + pass + + # update imports to the new namespace by running morpheus_namespace_update.py + os.system(f"python {update_namespace_script} --directory {scripts_data_dir} --llm") + + # verify the morpheus_llm imports work + spec, module = import_module_from_path(module_name, module_path) + try: + spec.loader.exec_module(module) + except ModuleNotFoundError: + assert False, "old llm imports are not updated to the new namespace"