diff --git a/cm-mlops/automation/script/module.py b/cm-mlops/automation/script/module.py index a06495642f..6ed2a6c0a9 100644 --- a/cm-mlops/automation/script/module.py +++ b/cm-mlops/automation/script/module.py @@ -90,10 +90,13 @@ def run(self, i): (input) (str): converted to env.CM_INPUT (local env) (output) (str): converted to env.CM_OUTPUT (local env) - (name) (str): converted to env.CM_NAME (local env) (extra_cache_tags) (str): converted to env.CM_EXTRA_CACHE_TAGS and used to add to caching (local env) + (name) (str): taken from env.CM_NAME and/or converted to env.CM_NAME (local env) + Added to extra_cache_tags with "name-" prefix . + Useful for python virtual env (to create multiple entries) + (quiet) (bool): if True, set env.CM_QUIET to "yes" and attempt to skip questions (the developers have to support it in pre/post processing and scripts) @@ -285,6 +288,9 @@ def run(self, i): if x!='' and x not in extra_cache_tags: extra_cache_tags.append(x) + if env.get('CM_NAME','')!='': + extra_cache_tags.append('name-'+env['CM_NAME'].strip().lower()) + ############################################################################################################ # Check if we want to skip cache (either by skip_cache or by fake_run) @@ -1338,6 +1344,12 @@ def run(self, i): # Restore original env/state and merge env/state + # This is needed since we want to keep original env/state outside this script + # If we delete env and create a new dict, the original one outside this script will be detached + # That's why we just clean all keys in original env/state (used oustide) + # And then copy saved_env (with new_env merged) and saved_state (with new_state merged) + # while getting rid of all temporal updates in env and state inside this script + for k in list(env.keys()): del(env[k]) for k in list(state.keys()): diff --git a/cm-mlops/script/activate-python-venv/README.md b/cm-mlops/script/activate-python-venv/README.md new file mode 100644 index 0000000000..2b61d193cd --- /dev/null +++ b/cm-mlops/script/activate-python-venv/README.md @@ -0,0 +1,7 @@ +# About + +Activate python virtual environment installed via CM: + +```bash +cm run script "activate python-ven" (--version={python version}) (--name={user friendly name of the virtual environment)) +``` diff --git a/cm-mlops/script/activate-python-venv/_cm.json b/cm-mlops/script/activate-python-venv/_cm.json index e41cb2b940..627ce3897d 100644 --- a/cm-mlops/script/activate-python-venv/_cm.json +++ b/cm-mlops/script/activate-python-venv/_cm.json @@ -11,6 +11,7 @@ "names": [ "python-venv" ], + "reuse_version": true, "tags": "install,python-venv" } ], diff --git a/cm-mlops/script/activate-python-venv/customize.py b/cm-mlops/script/activate-python-venv/customize.py index e046380201..938a016a05 100644 --- a/cm-mlops/script/activate-python-venv/customize.py +++ b/cm-mlops/script/activate-python-venv/customize.py @@ -15,12 +15,12 @@ def preprocess(i): name = env.get('CM_NAME','') if name != '': - name_tag = name.lower() + name = name.strip().lower() - r = automation.update_deps({'deps':meta['post_deps'], + r = automation.update_deps({'deps':meta['prehook_deps'], 'update_deps':{ 'python-venv':{ - 'extra_cache_tags':name + 'name':name } } }) diff --git a/cm-mlops/script/app-mlperf-inference/_cm.yaml b/cm-mlops/script/app-mlperf-inference/_cm.yaml index a94bd254f7..8c0f6123cf 100644 --- a/cm-mlops/script/app-mlperf-inference/_cm.yaml +++ b/cm-mlops/script/app-mlperf-inference/_cm.yaml @@ -153,7 +153,8 @@ deps: - tvm-onnx - tvm-pytorch skip_if_env: - CM_TVM_PIP_INSTALL: [ "on" ] + CM_TVM_PIP_INSTALL: + - "on" ######################################################################## @@ -317,6 +318,23 @@ variations: tags: _float32 env: CM_MLPERF_PYTHON: 'yes' + CM_MLPERF_IMPLEMENTATION: reference + + nvidia: + add_deps_recursive: + imagenet-accuracy-script: + tags: _float32 + env: + CM_MLPERF_IMPLEMENTATION: nvidia + deps: + ## Nvidia common code + - tags: get,mlperf,inference,nvidia,common-code + - tags: get,mlperf,training,src + - tags: get,generic-python-lib,_nvidia-pyindex + - tags: get,generic-python-lib,_nvidia-tensorrt + - tags: get,generic-python-lib,_numpy + - tags: get,generic-python-lib,_pycuda + - tags: get,generic-python-lib,_mlperf_logging # ML engine onnxruntime: diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py index 7270427ade..0fbd9f8732 100644 --- a/cm-mlops/script/app-mlperf-inference/customize.py +++ b/cm-mlops/script/app-mlperf-inference/customize.py @@ -197,7 +197,9 @@ def get_run_cmd(env, scenario_extra_options, mode_extra_options, dataset_options return "" def get_run_cmd_nvidia(env, scenario_extra_options, mode_extra_options, dataset_options): - cmd = "" + import pathlib + code_dir=pathlib.Path(__file__).parent.resolve() + cmd = env['CM_PYTHON_BIN_WITH_PATH']+ " " +os.path.join(code_dir, "nvidia", "retinanet.py") + " --pytorch --num_samples=1200 --batch_size=8 --training_repo_path="+env['CM_MLPERF_TRAINING_SOURCE']+" --pyt_ckpt_path="+env['CM_ML_MODEL_FILE_WITH_PATH'] return cmd def get_run_cmd_reference(env, scenario_extra_options, mode_extra_options, dataset_options): diff --git a/cm-mlops/script/get-generic-python-lib/_cm.json b/cm-mlops/script/get-generic-python-lib/_cm.json index e189b4e9c8..2640f2ab77 100644 --- a/cm-mlops/script/get-generic-python-lib/_cm.json +++ b/cm-mlops/script/get-generic-python-lib/_cm.json @@ -219,6 +219,38 @@ "CM_BOTO3_VERSION" ] }, + "nvidia-pyindex": { + "env": { + "CM_PYTHON_PACKAGE_NAME": "nvidia-pyindex" + }, + "new_env_keys": [ + "CM_NVIDIA_PYINDEX_VERSION" + ] + }, + "nvidia-tensorrt": { + "env": { + "CM_PYTHON_PACKAGE_NAME": "nvidia-tensorrt" + }, + "new_env_keys": [ + "CM_NVIDIA_TENSORRT_VERSION" + ] + }, + "pycuda": { + "env": { + "CM_PYTHON_PACKAGE_NAME": "pycuda" + }, + "new_env_keys": [ + "CM_PYCUDA_VERSION" + ] + }, + "mlperf_logging": { + "env": { + "CM_PYTHON_PACKAGE_NAME": "mlperf_logging" + }, + "new_env_keys": [ + "CM_MLPERF_LOGGING_VERSION" + ] + }, "wandb": { "env": { "CM_PYTHON_PACKAGE_NAME": "wandb" diff --git a/cm-mlops/script/get-mlperf-inference-nvidia-common-code/customize.py b/cm-mlops/script/get-mlperf-inference-nvidia-common-code/customize.py index f7f4a72472..ff4e073036 100644 --- a/cm-mlops/script/get-mlperf-inference-nvidia-common-code/customize.py +++ b/cm-mlops/script/get-mlperf-inference-nvidia-common-code/customize.py @@ -13,6 +13,6 @@ def preprocess(i): def postprocess(i): env = i['env'] - env['+PYTHONPATH'] = os.path.join(env['CM_MLPERF_INFERENCE_RESULTS_PATH'], "closed", "NVIDIA", "code", "common") + env['+PYTHONPATH'] = [ os.path.join(env['CM_MLPERF_INFERENCE_RESULTS_PATH'], "closed", "NVIDIA", "code", "common") ] return {'return':0} diff --git a/cm-mlops/script/get-mlperf-inference-src/README.md b/cm-mlops/script/get-mlperf-inference-src/README.md index d6fc0ad15c..67f0136de7 100644 --- a/cm-mlops/script/get-mlperf-inference-src/README.md +++ b/cm-mlops/script/get-mlperf-inference-src/README.md @@ -12,7 +12,7 @@ where [VARIATION] is one of * `octoml:` Works with the OctoML fork of the MLCommons inference repository. Uses `short-history` variation * `short-history:` Uses a git depth of last 10 commits (significantly reduces the download size) * `full-history:` Uses the full git history -* `recurse-submodules:` Downloads all the submodules +* `no-recurse-submodules:` Only download the main repository [VERSION] is one of * `master:` Uses the master branch diff --git a/cm-mlops/script/get-mlperf-training-src/README.md b/cm-mlops/script/get-mlperf-training-src/README.md new file mode 100644 index 0000000000..65188e922c --- /dev/null +++ b/cm-mlops/script/get-mlperf-training-src/README.md @@ -0,0 +1,27 @@ +# Get MLCommons Training Source +This [CM script](https://github.com/mlcommons/ck/blob/master/cm/docs/tutorial-scripts.md) git clones the [MLCommons Training repository](https://github.com/mlcommons/training). + +## Commands +To install +``` +cm run script --tags=get,mlperf,training,src,[VARIATION] --version=[VERSION] +``` +where [VARIATION] is one of +* `default:` Works with the official MLCommons inference repository. Uses `short-history` variation +* `patch:` Applies the `git.patch` to the cloned git repository +* `octoml:` Works with the OctoML fork of the MLCommons inference repository. Uses `short-history` variation +* `short-history:` Uses a git depth of last 10 commits (significantly reduces the download size) +* `full-history:` Uses the full git history +* `no-recurse-submodules:` Only download the main repository + +[VERSION] is one of +* `master:` Uses the master branch +* `r2.1:` Uses the release branch used for MLCommons training 2.1 round + +## Exported Variables +* `CM_MLPERF_TRAINING_SOURCE`: Directory path of the cloned inference repository +* `PYTHONPATH`: Is appended with the paths to vision module and the submission tools module + +## Supported and Tested OS +1. Ubuntu 18.04, 20.04, 22.04 +2. RHEL 9 diff --git a/cm-mlops/script/get-mlperf-training-src/_cm.json b/cm-mlops/script/get-mlperf-training-src/_cm.json new file mode 100644 index 0000000000..f49bef30c4 --- /dev/null +++ b/cm-mlops/script/get-mlperf-training-src/_cm.json @@ -0,0 +1,93 @@ +{ + "alias": "get-mlperf-training-src", + "automation_alias": "script", + "automation_uid": "5b4e0237da074764", + "cache": true, + "category": "Modular MLPerf benchmarks", + "default_env": { + "CM_GIT_CHECKOUT": "master", + "CM_GIT_DEPTH": "--depth 4", + "CM_GIT_PATCH": "no", + "CM_GIT_RECURSE_SUBMODULES": " --recurse-submodules", + "CM_GIT_URL": "https://github.com/mlcommons/training.git" + }, + "default_variation": "default", + "default_version": "master", + "deps": [ + { + "tags": "detect,os" + }, + { + "names": [ + "python", + "python3" + ], + "tags": "get,python3" + } + ], + "new_env_keys": [ + "CM_MLPERF_TRAINING_*", + "CM_MLPERF_TRAINING_LAST_RELEASE", + "+PYTHONPATH" + ], + "tags": [ + "get", + "src", + "source", + "training", + "training-src", + "training-source", + "mlperf", + "mlcommons" + ], + "uid": "dc440bd88e794a28", + "variations": { + "default": { + "base": [ + "short-history" + ], + "env": { + "CM_GIT_PATCH": "no" + } + }, + "full-history": { + "env": { + "CM_GIT_DEPTH": "" + } + }, + "no-recurse-submodules": { + "env": { + "CM_GIT_RECURSE_SUBMODULES": "" + } + }, + "patch": { + "env": { + "CM_GIT_PATCH": "yes" + } + }, + "short-history": { + "env": { + "CM_GIT_DEPTH": "--depth 5" + } + } + }, + "versions": { + "custom": { + "env": { + "CM_MLPERF_LAST_RELEASE": "v2.1" + } + }, + "master": { + "env": { + "CM_GIT_CHECKOUT": "master", + "CM_MLPERF_LAST_RELEASE": "v2.1" + } + }, + "r2.1": { + "env": { + "CM_GIT_CHECKOUT": "r2.1", + "CM_MLPERF_LAST_RELEASE": "v2.1" + } + } + } +} diff --git a/cm-mlops/script/get-mlperf-training-src/customize.py b/cm-mlops/script/get-mlperf-training-src/customize.py new file mode 100644 index 0000000000..570a283ccc --- /dev/null +++ b/cm-mlops/script/get-mlperf-training-src/customize.py @@ -0,0 +1,41 @@ +from cmind import utils +import os +import shutil + +def preprocess(i): + + os_info = i['os_info'] + + if os_info['platform'] == 'windows': + return {'return':1, 'error': 'Windows is not supported in this script yet'} + + env = i['env'] + meta = i['meta'] + + if 'CM_GIT_DEPTH' not in env: + env['CM_GIT_DEPTH'] = '' + + if 'CM_GIT_RECURSE_SUBMODULES' not in env: + env['CM_GIT_RECURSE_SUBMODULES'] = '' + + need_version = env.get('CM_VERSION','') + versions = meta['versions'] + + if need_version!='' and not need_version in versions: + env['CM_GIT_CHECKOUT'] = need_version + + return {'return':0} + + +def postprocess(i): + + env = i['env'] + state = i['state'] + + env['CM_MLPERF_TRAINING_SOURCE'] = os.path.join(os.getcwd(), 'training') + +# 20221024: we save and restore env in the main script and can clean env here for determinism +# if '+PYTHONPATH' not in env: env['+PYTHONPATH'] = [] + env['+PYTHONPATH']=[] + + return {'return':0} diff --git a/cm-mlops/script/get-mlperf-training-src/run.sh b/cm-mlops/script/get-mlperf-training-src/run.sh new file mode 100644 index 0000000000..8e6667c570 --- /dev/null +++ b/cm-mlops/script/get-mlperf-training-src/run.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +CUR_DIR=$PWD +SCRIPT_DIR=${CM_TMP_CURRENT_SCRIPT_PATH} + +echo "******************************************************" +echo "Cloning Mlcommons from ${CM_GIT_URL} with branch ${CM_GIT_CHECKOUT} ${CM_GIT_DEPTH} ${CM_GIT_RECURSE_SUBMODULES}..." + +if [ ! -d "training" ]; then + if [ -z ${CM_GIT_SHA} ]; then + git clone ${CM_GIT_RECURSE_SUBMODULES} -b "${CM_GIT_CHECKOUT}" ${CM_GIT_URL} ${CM_GIT_DEPTH} training + cd training + else + git clone ${CM_GIT_RECURSE_SUBMODULES} ${CM_GIT_URL} ${CM_GIT_DEPTH} training + cd training + git checkout -b "${CM_GIT_CHECKOUT}" + fi + if [ "${?}" != "0" ]; then exit 1; fi +fi + +if [ ${CM_GIT_PATCH} == "yes" ]; then + patch_filename=${CM_GIT_PATCH_FILENAME:-git.patch} + echo "Applying patch ${SCRIPT_DIR}/patch/$patch_filename" + git apply ${SCRIPT_DIR}/patch/"$patch_filename" + if [ "${?}" != "0" ]; then exit 1; fi +fi +cd "$CUR_DIR" diff --git a/cm-mlops/script/install-cuda-prebuilt/customize.py b/cm-mlops/script/install-cuda-prebuilt/customize.py index f872e9d2fa..4294aa2b83 100644 --- a/cm-mlops/script/install-cuda-prebuilt/customize.py +++ b/cm-mlops/script/install-cuda-prebuilt/customize.py @@ -8,6 +8,9 @@ def preprocess(i): env = i['env'] automation = i['automation'] + version = env.get('CM_VERSION') + if version not in env.get('CM_CUDA_LINUX_FILENAME', ''): + return {'return': 1, 'error': "Only CUDA versions 11.7.0 and 11.8.0 are supported now!"} recursion_spaces = i['recursion_spaces'] nvcc_bin = "nvcc" diff --git a/cm-mlops/script/install-generic-python-lib/_cm.json b/cm-mlops/script/install-generic-python-lib/_cm.json index fbf0b0f7fa..9a47f5a0df 100644 --- a/cm-mlops/script/install-generic-python-lib/_cm.json +++ b/cm-mlops/script/install-generic-python-lib/_cm.json @@ -155,6 +155,33 @@ "CM_PYTHON_PACKAGE_NAME": "boto3" } }, + "nvidia-pyindex": { + "env": { + "CM_PYTHON_PACKAGE_NAME": "nvidia-pyindex" + } + }, + "nvidia-tensorrt": { + "env": { + "CM_PYTHON_PACKAGE_NAME": "nvidia-tensorrt" + } + }, + "mlperf_logging": { + "env": { + "CM_PYTHON_PACKAGE_NAME": "mlperf_logging", + "CM_PIP_URL": "git+https://github.com/mlperf/logging.git@2.1.0" + } + }, + "pycuda": { + "env": { + "CM_PYTHON_PACKAGE_NAME": "pycuda" + }, + "deps": [ + { + "names": [ "cuda" ], + "tags": "get,cuda" + } + ] + }, "wandb": { "env": { "CM_PYTHON_PACKAGE_NAME": "wandb" diff --git a/cm-mlops/script/install-python-venv/customize.py b/cm-mlops/script/install-python-venv/customize.py index 43d8acf5eb..939b33e49c 100644 --- a/cm-mlops/script/install-python-venv/customize.py +++ b/cm-mlops/script/install-python-venv/customize.py @@ -36,7 +36,7 @@ def preprocess(i): if x != '': name = x if name != '': - name_tag = 'venv-'+name.lower() + name_tag = 'name-'+name.strip().lower() add_extra_cache_tags.append(name_tag) add_python_extra_cache_tags.append(name_tag) diff --git a/cm/CHANGES.md b/cm/CHANGES.md index 47c1399f68..5be3242b8b 100644 --- a/cm/CHANGES.md +++ b/cm/CHANGES.md @@ -1,3 +1,8 @@ +## V1.0.5.1 + - added --checkout and --branch to "cm pull repo' for more determinism and reproducibility + - detect if repository and its forks already exist during "cm pull repo" (#397) + - support = inside argument of a key (--key="x=y") (#453) + ## V1.0.5 - redesigned documentation - added support utils to generate lists of all automations and scripts diff --git a/cm/cmind/__init__.py b/cm/cmind/__init__.py index 19c44a2cbe..04a02191db 100644 --- a/cm/cmind/__init__.py +++ b/cm/cmind/__init__.py @@ -1,4 +1,4 @@ -__version__ = "1.0.5" +__version__ = "1.0.5.1" from cmind.core import access from cmind.core import error diff --git a/cm/cmind/cli.py b/cm/cmind/cli.py index 650a769a9a..7a0c777d83 100644 --- a/cm/cmind/cli.py +++ b/cm/cmind/cli.py @@ -110,9 +110,10 @@ def parse(cmd): artifacts.append(a) else: # flags - if '=' in a: - key,value = a.split('=') - value=value.strip() + j = a.find('=') # find first = + if j>0: + key = a[:j].strip() + value = a[j+1:].strip() else: key=a value=True diff --git a/cm/cmind/repos.py b/cm/cmind/repos.py index 4493118bee..9cf625fb24 100644 --- a/cm/cmind/repos.py +++ b/cm/cmind/repos.py @@ -166,8 +166,44 @@ def process(self, repo_path, mode='add'): if mode == 'add': if repo_path not in paths: + if len(paths)>0: + # Load meta of the current repo + path_to_repo_desc = os.path.join(repo_path, self.cfg['file_meta_repo']) + r=utils.load_yaml_and_json(file_name_without_ext=path_to_repo_desc) + if r['return']>0: return r + + meta = r['meta'] + + alias = meta.get('alias', '') + uid = meta.get('uid', '') + + # Check that no repos exist with the same alias and/or uid + # (to avoid adding forks and original repos) + + for path in paths: + path_to_existing_repo_desc = os.path.join(path, self.cfg['file_meta_repo']) + r=utils.load_yaml_and_json(file_name_without_ext=path_to_existing_repo_desc) + if r['return']>0: return r + + existing_meta = r['meta'] + + existing_alias = existing_meta.get('alias', '') + existing_uid = existing_meta.get('uid', '') + + exist = False + if alias != '' and existing_alias !='' and alias == existing_alias: + exist = True + + if not exist and uid !='' and existing_uid !='' and uid == existing_uid: + exist = True + + if exist: + return {'return':1, 'error':'CM repository with the same alias "{}" and/or uid "{}" already exists in {}'.format(alias, uid, path)} + + paths.append(repo_path) modified = True + elif mode == 'delete': new_paths = [] for p in paths: @@ -237,10 +273,32 @@ def pull(self, alias, url = '', branch = '', checkout = '', console = False, des print (cmd) print ('') - os.system(cmd) + r = os.system(cmd) - if console: - print ('') + if clone and not os.path.isdir(path_to_repo): + return {'return':1, 'error':'repository was not cloned'} + + os.chdir(path_to_repo) + + # Check if branch + if branch != '' or checkout != '': + cmd = 'git checkout' + + if branch != '': + cmd += ' -b ' + branch + + if checkout!='': + cmd += ' ' + checkout + + if console: + print ('') + print (cmd) + print ('') + + r = os.system(cmd) + + if r>0: + return {'return':1, 'error':'git checkout for repository failed'} # Check if repo description exists path_to_repo_desc = os.path.join(path_to_repo, self.cfg['file_meta_repo']) @@ -297,7 +355,7 @@ def pull(self, alias, url = '', branch = '', checkout = '', console = False, des r=utils.save_yaml(path_to_repo_desc + '.yaml', meta=meta) if r['return']>0: return r - # Check paht to repo with prefix + # Check path to repo with prefix path_to_repo_with_prefix = path_to_repo if prefix!='': @@ -306,6 +364,9 @@ def pull(self, alias, url = '', branch = '', checkout = '', console = False, des if not os.path.isdir(path_to_repo_with_prefix): os.makedirs(path_to_repo_with_prefix) + # Get final alias + alias = meta.get('alias', '') + # Update repo list # TBD: make it more safe (reload and save) r = self.process(path_to_repo, 'add') @@ -314,6 +375,10 @@ def pull(self, alias, url = '', branch = '', checkout = '', console = False, des # Go back to original directory os.chdir(cur_dir) + if console: + print ('') + print ('CM alias for this repository: {}'.format(alias)) + return {'return':0, 'meta':meta} ############################################################ diff --git a/tests/script/test_features.py b/tests/script/test_features.py index 5b6380eca5..c519facb8c 100644 --- a/tests/script/test_features.py +++ b/tests/script/test_features.py @@ -3,7 +3,8 @@ import cmind as cm import check as checks -r = cm.access({'action':'run', 'automation':'script', 'tags': 'install,python-venv', 'env': {'CM_NAME': 'test'}, 'quiet': 'yes'}) +r = cm.access({'action':'run', 'automation':'script', 'tags': 'install,python-venv', 'name':'test', 'quiet': 'yes'}) checks.check_return(r) -r = cm.access({'action':'search', 'automation': 'cache', 'tags': 'get,venv-test'}) + +r = cm.access({'action':'search', 'automation': 'cache', 'tags': 'get,python,virtual,name-test'}) checks.check_list(r, "get,python-venv")