From 925638597fd1a81003a5d73936924df4ebd48c39 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Tue, 15 Oct 2024 17:51:52 +0000 Subject: [PATCH 1/3] Script for updating the namespace due to compartmentalization changes python morpheus_namespace_update.py --directory --dfp python morpheus_namespace_update.py --directory --llm Signed-off-by: Anuradha Karuppiah --- scripts/data/test_namespace_update.py | 58 +++++++++++ scripts/morpheus_namespace_update.py | 145 ++++++++++++++++++++++++++ 2 files changed, 203 insertions(+) create mode 100644 scripts/data/test_namespace_update.py create mode 100755 scripts/morpheus_namespace_update.py diff --git a/scripts/data/test_namespace_update.py b/scripts/data/test_namespace_update.py new file mode 100644 index 0000000000..7e16108a81 --- /dev/null +++ b/scripts/data/test_namespace_update.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +''' +This script is used as test input for morpheus_namespace_update.py script. +''' + +# Disable all checkers +# flake8: noqa +# isort: skip_file + +# old DFP import patterns +from dfp.utils.config_generator import ConfigGenerator +from dfp.stages.dfp_rolling_window_stage +from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage # no update +import dfp.stages.dfp_training +import dfp.stages.dfp_inference_stage as inference_stage +import dfp + + +# old LLM import patterns +from morpheus.llm import LLMContext +from morpheus_llm.llm import LLMEngine # no update +from morpheus.stages.llm.llm_engine_stage import LLMEngineStage +from morpheus.llm.services.llm_service import LLMService + +# old vdb import patterns +from morpheus.service.vdb import faiss_vdb_service +from morpheus.service import vdb +from morpheus.modules.output import write_to_vector_db +from morpheus.modules.output.write_to_vector_db import preprocess_vdb_resources +from morpheus_llm.service.vdb import milvus_client # no update +import morpheus.service.vdb + +def empty_imports_function_scope(): + ''' + Empty imports from dfp, llm and vdb. + ''' + from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage + from dfp.utils.regex_utils import iso_date_regex + from morpheus_dfp.utils.schema_utils import SchemaBuilder # no update + from dfp.modules import dfp_data_prep + + from morpheus.stages.output.write_to_vector_db_stage import WriteToVectorDBStage + import morpheus.modules.schemas.write_to_vector_db_schema diff --git a/scripts/morpheus_namespace_update.py b/scripts/morpheus_namespace_update.py new file mode 100755 index 0000000000..2d5056f6a9 --- /dev/null +++ b/scripts/morpheus_namespace_update.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +''' +This script is used to update the imports from "dfp." to "morpheus_dfp." in all the files in the given directory. +Usage: + python morpheus_namespace_update.py --directory --dfp + python morpheus_namespace_update.py --directory --llm +''' +import os +import re + +import click + + +def replace_imports_in_file(file_path, old_module, new_module): + ''' + Simple module replacement function. + ''' + do_write = False + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + # Take care of old imports of style "import old_module.stages.dfp_inference_stage as ..." + if re.findall(rf"(import {old_module})(\W+)", content): + do_write = True + content = re.sub(rf"(import {old_module})(\W+)", rf"import {new_module}\2", content) + + # Take care of old imports of style "from old_module.stages.dfp_inference_stage import ..." + if re.findall(rf"(from {old_module})(\S+)", content): + do_write = True + content = re.sub(rf"(from {old_module})(\S+)", rf"from {new_module}\2", content) + + if do_write: + with open(file_path, "w", encoding="utf-8") as f: + f.write(content) + + +def replace_llm_imports_in_file(file_path): + ''' + LLM module replacement requires special handling. + ''' + do_write = False + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + # simple replace + pat = "import morpheus.llm" + if re.findall(pat, content): + do_write = True + content = re.sub(pat, "import morpheus_llm", content) + + # Take care of old imports of style "from morpheus.llm import ..." and + # "from morpheus.llm.services.llm_service import ..." + module = "llm" + if re.findall(rf"(from morpheus\.)({module})", content): + do_write = True + content = re.sub(rf"(from morpheus\.)({module})", r"from morpheus_llm.\2", content) + + # Take care of old imports of style "from morpheus.stages.llm.llm_engine_stage import ..." + module = "llm" + if re.findall(rf"(from morpheus\.)(\w+)(\.{module})", content): + do_write = True + content = re.sub(rf"(from morpheus\.)(\w+)(\.{module})", r"from morpheus_llm.\2\3", content) + + # Take care of old imports of style "from morpheus.service.vdb import faiss_vdb_service" + module = "vdb" + if re.findall(rf"(from morpheus\.)(\w+)(\.{module})", content): + do_write = True + content = re.sub(rf"(from morpheus\.)(\w+)(\.{module})", r"from morpheus_llm.\2\3", content) + + # Take care of old imports of style "from morpheus.service import vdb" and "import morpheus.service.vdb" + old_pat = "from morpheus.service import vdb" + new_pat = "from morpheus_llm.service import vdb" + if re.findall(old_pat, content): + do_write = True + content = re.sub(old_pat, new_pat, content) + old_pat = "import morpheus.service.vdb" + new_pat = "import morpheus_llm.service.vdb" + if re.findall(old_pat, content): + do_write = True + content = re.sub(old_pat, new_pat, content) + + # Take care of old imports of style - + # "from morpheus.modules.output.write_to_vector_db import preprocess_vdb_resources" + # "from morpheus.stages.write_to_vector_db_stage import WriteToVectorDBStage" + module = "write_to_vector_db" + if re.findall(rf"(from morpheus\.)(\S+)(\.{module})", content): + do_write = True + content = re.sub(rf"(from morpheus\.)(\S+)(\.{module})", r"from morpheus_llm.\2\3", content) + + # Take care of old imports of style "from morpheus.modules.output import write_to_vector_db" + if re.findall(rf"(from morpheus\.)(\S+)( import {module})", content): + do_write = True + content = re.sub(rf"(from morpheus\.)(\S+)( import {module})", r"from morpheus_llm.\2\3", content) + + # Take care of old imports of style "import morpheus.modules.output.write_to_vector_db_schema" + if re.findall(rf"(import morpheus\.)(\S+)(\.{module})", content): + do_write = True + content = re.sub(rf"(import morpheus\.)(\S+)(\.{module})", r"import morpheus_llm.\2\3", content) + + if do_write: + with open(file_path, "w", encoding="utf-8") as f: + f.write(content) + + +@click.command() +@click.option('--directory', default='./', help='directory for updating') +@click.option('--dfp', is_flag=True, help='Replace dfp imports') +@click.option('--llm', is_flag=True, help='Replace llm and vdb imports') +def replace_imports(directory, dfp, llm): + ''' + Walk files in the given directory and replace imports. + ''' + if not llm and not dfp: + print("Please provide either --dfp or --llm") + return + + for root, _, files in os.walk(directory): + + for file in files: + file_path = os.path.join(root, file) + if os.path.abspath(file_path) == os.path.abspath(__file__): + continue + if file.endswith(".py"): + if dfp: + replace_imports_in_file(file_path, 'dfp', 'morpheus_dfp') + if llm: + replace_llm_imports_in_file(file_path) + + +if __name__ == "__main__": + replace_imports() From 7a481d3d8e6013d0c9875c5d0818bbcd94212e51 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Tue, 15 Oct 2024 21:50:21 +0000 Subject: [PATCH 2/3] Rename test_namespace_update to avoid confusion that it is a test file Signed-off-by: Anuradha Karuppiah --- .../data/{test_namespace_update.py => namespace_update_data.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/data/{test_namespace_update.py => namespace_update_data.py} (100%) diff --git a/scripts/data/test_namespace_update.py b/scripts/data/namespace_update_data.py similarity index 100% rename from scripts/data/test_namespace_update.py rename to scripts/data/namespace_update_data.py From 9ccaa9b6d1dee21ef20a5060bfcb5b2aff0c278b Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Tue, 15 Oct 2024 21:53:47 +0000 Subject: [PATCH 3/3] Fix bad sample data that was causing yapf to barf Signed-off-by: Anuradha Karuppiah --- scripts/data/namespace_update_data.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/data/namespace_update_data.py b/scripts/data/namespace_update_data.py index 7e16108a81..62f609a11d 100644 --- a/scripts/data/namespace_update_data.py +++ b/scripts/data/namespace_update_data.py @@ -20,10 +20,12 @@ # Disable all checkers # flake8: noqa # isort: skip_file +# yapf: disable +# pylint: skip-file # old DFP import patterns from dfp.utils.config_generator import ConfigGenerator -from dfp.stages.dfp_rolling_window_stage +from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage # no update import dfp.stages.dfp_training import dfp.stages.dfp_inference_stage as inference_stage