Merge from OctoML (#515)

mlcommons · Nov 15, 2022 · fe95527 · fe95527
2 parents 692baaf + 9c06b30
commit fe95527
Show file tree

Hide file tree

Showing 21 changed files with 381 additions and 19 deletions.
diff --git a/cm-mlops/automation/script/module.py b/cm-mlops/automation/script/module.py
@@ -90,10 +90,13 @@ def run(self, i):
 
           (input) (str): converted to env.CM_INPUT  (local env)
           (output) (str): converted to env.CM_OUTPUT (local env)
-          (name) (str): converted to env.CM_NAME (local env)
 
           (extra_cache_tags) (str): converted to env.CM_EXTRA_CACHE_TAGS and used to add to caching (local env)
 
+          (name) (str): taken from env.CM_NAME and/or converted to env.CM_NAME (local env)
+                        Added to extra_cache_tags with "name-" prefix .
+                        Useful for python virtual env (to create multiple entries)
+
           (quiet) (bool): if True, set env.CM_QUIET to "yes" and attempt to skip questions
                           (the developers have to support it in pre/post processing and scripts)
 
@@ -285,6 +288,9 @@ def run(self, i):
                 if x!='' and x not in extra_cache_tags:
                     extra_cache_tags.append(x)
 
+        if env.get('CM_NAME','')!='':
+            extra_cache_tags.append('name-'+env['CM_NAME'].strip().lower())
+
 
         ############################################################################################################
         # Check if we want to skip cache (either by skip_cache or by fake_run)
@@ -1338,6 +1344,12 @@ def run(self, i):
 
 
         # Restore original env/state and merge env/state
+        # This is needed since we want to keep original env/state outside this script 
+        # If we delete env and create a new dict, the original one outside this script will be detached
+        # That's why we just clean all keys in original env/state (used oustide)
+        # And then copy saved_env (with new_env merged) and saved_state (with new_state merged)
+        # while getting rid of all temporal updates in env and state inside this script
+
         for k in list(env.keys()):
             del(env[k])
         for k in list(state.keys()):

diff --git a/cm-mlops/script/activate-python-venv/README.md b/cm-mlops/script/activate-python-venv/README.md
@@ -0,0 +1,7 @@
+# About
+
+Activate python virtual environment installed via CM:
+
+```bash
+cm run script "activate python-ven" (--version={python version}) (--name={user friendly name of the virtual environment))
+```
diff --git a/cm-mlops/script/activate-python-venv/_cm.json b/cm-mlops/script/activate-python-venv/_cm.json
@@ -11,6 +11,7 @@
       "names": [
         "python-venv"
       ],
+      "reuse_version": true,
       "tags": "install,python-venv"
     }
   ],

diff --git a/cm-mlops/script/activate-python-venv/customize.py b/cm-mlops/script/activate-python-venv/customize.py
@@ -15,12 +15,12 @@ def preprocess(i):
 
     name = env.get('CM_NAME','')
     if name != '':
-        name_tag = name.lower()
+        name = name.strip().lower()
 
-        r = automation.update_deps({'deps':meta['post_deps'],
+        r = automation.update_deps({'deps':meta['prehook_deps'],
                                     'update_deps':{
                                       'python-venv':{
-                                        'extra_cache_tags':name
+                                        'name':name
                                         }
                                       }
                                    })

diff --git a/cm-mlops/script/app-mlperf-inference/_cm.yaml b/cm-mlops/script/app-mlperf-inference/_cm.yaml
@@ -153,7 +153,8 @@ deps:
       - tvm-onnx
       - tvm-pytorch
     skip_if_env:
-      CM_TVM_PIP_INSTALL: [ "on" ]
+      CM_TVM_PIP_INSTALL:
+      - "on"
 
 
   ########################################################################
@@ -317,6 +318,23 @@ variations:
         tags: _float32
     env:
       CM_MLPERF_PYTHON: 'yes'
+      CM_MLPERF_IMPLEMENTATION: reference
+
+  nvidia:
+    add_deps_recursive:
+      imagenet-accuracy-script:
+        tags: _float32
+    env:
+      CM_MLPERF_IMPLEMENTATION: nvidia
+    deps:
+      ## Nvidia common code
+      - tags: get,mlperf,inference,nvidia,common-code
+      - tags: get,mlperf,training,src
+      - tags: get,generic-python-lib,_nvidia-pyindex
+      - tags: get,generic-python-lib,_nvidia-tensorrt
+      - tags: get,generic-python-lib,_numpy
+      - tags: get,generic-python-lib,_pycuda
+      - tags: get,generic-python-lib,_mlperf_logging
 
   # ML engine
   onnxruntime:

diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py
@@ -197,7 +197,9 @@ def get_run_cmd(env, scenario_extra_options, mode_extra_options, dataset_options
     return ""
 
 def get_run_cmd_nvidia(env, scenario_extra_options, mode_extra_options, dataset_options):
-    cmd = ""
+    import pathlib
+    code_dir=pathlib.Path(__file__).parent.resolve()
+    cmd = env['CM_PYTHON_BIN_WITH_PATH']+ " " +os.path.join(code_dir, "nvidia", "retinanet.py") + " --pytorch --num_samples=1200 --batch_size=8 --training_repo_path="+env['CM_MLPERF_TRAINING_SOURCE']+" --pyt_ckpt_path="+env['CM_ML_MODEL_FILE_WITH_PATH']
     return cmd
 
 def get_run_cmd_reference(env, scenario_extra_options, mode_extra_options, dataset_options):

diff --git a/cm-mlops/script/get-generic-python-lib/_cm.json b/cm-mlops/script/get-generic-python-lib/_cm.json
@@ -219,6 +219,38 @@
         "CM_BOTO3_VERSION"
       ]
     },
+    "nvidia-pyindex": {
+      "env": {
+        "CM_PYTHON_PACKAGE_NAME": "nvidia-pyindex"
+      },
+      "new_env_keys": [
+        "CM_NVIDIA_PYINDEX_VERSION"
+      ]
+    },
+    "nvidia-tensorrt": {
+      "env": {
+        "CM_PYTHON_PACKAGE_NAME": "nvidia-tensorrt"
+      },
+      "new_env_keys": [
+        "CM_NVIDIA_TENSORRT_VERSION"
+      ]
+    },
+    "pycuda": {
+      "env": {
+        "CM_PYTHON_PACKAGE_NAME": "pycuda"
+      },
+      "new_env_keys": [
+        "CM_PYCUDA_VERSION"
+      ]
+    },
+    "mlperf_logging": {
+      "env": {
+        "CM_PYTHON_PACKAGE_NAME": "mlperf_logging"
+      },
+      "new_env_keys": [
+        "CM_MLPERF_LOGGING_VERSION"
+      ]
+    },
     "wandb": {
       "env": {
         "CM_PYTHON_PACKAGE_NAME": "wandb"

diff --git a/cm-mlops/script/get-mlperf-inference-nvidia-common-code/customize.py b/cm-mlops/script/get-mlperf-inference-nvidia-common-code/customize.py
@@ -13,6 +13,6 @@ def preprocess(i):
 def postprocess(i):
     env = i['env']
 
-    env['+PYTHONPATH'] = os.path.join(env['CM_MLPERF_INFERENCE_RESULTS_PATH'], "closed", "NVIDIA", "code", "common")
+    env['+PYTHONPATH'] = [ os.path.join(env['CM_MLPERF_INFERENCE_RESULTS_PATH'], "closed", "NVIDIA", "code", "common") ]
 
     return {'return':0}
diff --git a/cm-mlops/script/get-mlperf-inference-src/README.md b/cm-mlops/script/get-mlperf-inference-src/README.md
@@ -12,7 +12,7 @@ where [VARIATION] is one of
 * `octoml:` Works with the OctoML fork of the MLCommons inference repository. Uses `short-history` variation
 * `short-history:` Uses a git depth of last 10 commits (significantly reduces the download size)
 * `full-history:` Uses the full git history
-* `recurse-submodules:` Downloads all the submodules
+* `no-recurse-submodules:` Only download the main repository
 
 [VERSION] is one of
 * `master:` Uses the master branch 

diff --git a/cm-mlops/script/get-mlperf-training-src/README.md b/cm-mlops/script/get-mlperf-training-src/README.md
@@ -0,0 +1,27 @@
+# Get MLCommons Training Source
+This [CM script](https://github.com/mlcommons/ck/blob/master/cm/docs/tutorial-scripts.md) git clones the [MLCommons Training repository](https://github.com/mlcommons/training).
+
+## Commands
+To install
+```
+cm run script --tags=get,mlperf,training,src,[VARIATION] --version=[VERSION] 
+```
+where [VARIATION] is one of
+* `default:` Works with the official MLCommons inference repository. Uses `short-history` variation
+* `patch:` Applies the `git.patch` to the cloned git repository
+* `octoml:` Works with the OctoML fork of the MLCommons inference repository. Uses `short-history` variation
+* `short-history:` Uses a git depth of last 10 commits (significantly reduces the download size)
+* `full-history:` Uses the full git history
+* `no-recurse-submodules:` Only download the main repository
+
+[VERSION] is one of
+* `master:` Uses the master branch 
+* `r2.1:`  Uses the release branch used for MLCommons training 2.1 round
+
+## Exported Variables
+* `CM_MLPERF_TRAINING_SOURCE`: Directory path of the cloned inference repository
+* `PYTHONPATH`: Is appended with the paths to vision module and the submission tools module
+
+## Supported and Tested OS
+1. Ubuntu 18.04, 20.04, 22.04
+2. RHEL 9
diff --git a/cm-mlops/script/get-mlperf-training-src/_cm.json b/cm-mlops/script/get-mlperf-training-src/_cm.json
@@ -0,0 +1,93 @@
+{
+  "alias": "get-mlperf-training-src",
+  "automation_alias": "script",
+  "automation_uid": "5b4e0237da074764",
+  "cache": true,
+  "category": "Modular MLPerf benchmarks",
+  "default_env": {
+    "CM_GIT_CHECKOUT": "master",
+    "CM_GIT_DEPTH": "--depth 4",
+    "CM_GIT_PATCH": "no",
+    "CM_GIT_RECURSE_SUBMODULES": " --recurse-submodules",
+    "CM_GIT_URL": "https://github.com/mlcommons/training.git"
+  },
+  "default_variation": "default",
+  "default_version": "master",
+  "deps": [
+    {
+      "tags": "detect,os"
+    },
+    {
+      "names": [
+        "python",
+        "python3"
+      ],
+      "tags": "get,python3"
+    }
+  ],
+  "new_env_keys": [
+    "CM_MLPERF_TRAINING_*",
+    "CM_MLPERF_TRAINING_LAST_RELEASE",
+    "+PYTHONPATH"
+  ],
+  "tags": [
+    "get",
+    "src",
+    "source",
+    "training",
+    "training-src",
+    "training-source",
+    "mlperf",
+    "mlcommons"
+  ],
+  "uid": "dc440bd88e794a28",
+  "variations": {
+    "default": {
+      "base": [
+        "short-history"
+      ],
+      "env": {
+        "CM_GIT_PATCH": "no"
+      }
+    },
+    "full-history": {
+      "env": {
+        "CM_GIT_DEPTH": ""
+      }
+    },
+    "no-recurse-submodules": {
+      "env": {
+        "CM_GIT_RECURSE_SUBMODULES": ""
+      }
+    },
+    "patch": {
+      "env": {
+        "CM_GIT_PATCH": "yes"
+      }
+    },
+    "short-history": {
+      "env": {
+        "CM_GIT_DEPTH": "--depth 5"
+      }
+    }
+  },
+  "versions": {
+    "custom": {
+      "env": {
+        "CM_MLPERF_LAST_RELEASE": "v2.1"
+      }
+    },
+    "master": {
+      "env": {
+        "CM_GIT_CHECKOUT": "master",
+        "CM_MLPERF_LAST_RELEASE": "v2.1"
+      }
+    },
+    "r2.1": {
+      "env": {
+        "CM_GIT_CHECKOUT": "r2.1",
+        "CM_MLPERF_LAST_RELEASE": "v2.1"
+      }
+    }
+  }
+}
diff --git a/cm-mlops/script/get-mlperf-training-src/customize.py b/cm-mlops/script/get-mlperf-training-src/customize.py
@@ -0,0 +1,41 @@
+from cmind import utils
+import os
+import shutil
+
+def preprocess(i):
+
+    os_info = i['os_info']
+
+    if os_info['platform'] == 'windows':
+        return {'return':1, 'error': 'Windows is not supported in this script yet'}
+
+    env = i['env']
+    meta = i['meta']
+
+    if 'CM_GIT_DEPTH' not in env:
+        env['CM_GIT_DEPTH'] = ''
+
+    if 'CM_GIT_RECURSE_SUBMODULES' not in env:
+        env['CM_GIT_RECURSE_SUBMODULES'] = ''
+
+    need_version = env.get('CM_VERSION','')
+    versions = meta['versions']
+
+    if need_version!='' and not need_version in versions:
+        env['CM_GIT_CHECKOUT'] = need_version
+
+    return {'return':0}
+
+
+def postprocess(i):
+
+    env = i['env']
+    state = i['state']
+
+    env['CM_MLPERF_TRAINING_SOURCE'] = os.path.join(os.getcwd(), 'training')
+
+#        20221024: we save and restore env in the main script and can clean env here for determinism
+#    if '+PYTHONPATH' not in env: env['+PYTHONPATH'] = []
+    env['+PYTHONPATH']=[]
+
+    return {'return':0}
diff --git a/cm-mlops/script/get-mlperf-training-src/run.sh b/cm-mlops/script/get-mlperf-training-src/run.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+CUR_DIR=$PWD
+SCRIPT_DIR=${CM_TMP_CURRENT_SCRIPT_PATH}
+
+echo "******************************************************"
+echo "Cloning Mlcommons from ${CM_GIT_URL} with branch ${CM_GIT_CHECKOUT} ${CM_GIT_DEPTH} ${CM_GIT_RECURSE_SUBMODULES}..."
+
+if [ ! -d "training" ]; then
+  if [ -z ${CM_GIT_SHA} ]; then
+    git clone ${CM_GIT_RECURSE_SUBMODULES} -b "${CM_GIT_CHECKOUT}" ${CM_GIT_URL} ${CM_GIT_DEPTH} training
+    cd training
+  else
+    git clone ${CM_GIT_RECURSE_SUBMODULES} ${CM_GIT_URL} ${CM_GIT_DEPTH} training
+    cd training
+    git checkout -b "${CM_GIT_CHECKOUT}"
+  fi
+  if [ "${?}" != "0" ]; then exit 1; fi
+fi
+
+if [ ${CM_GIT_PATCH} == "yes" ]; then
+  patch_filename=${CM_GIT_PATCH_FILENAME:-git.patch}
+  echo "Applying patch ${SCRIPT_DIR}/patch/$patch_filename"
+  git apply ${SCRIPT_DIR}/patch/"$patch_filename"
+  if [ "${?}" != "0" ]; then exit 1; fi
+fi
+cd "$CUR_DIR"
diff --git a/cm-mlops/script/install-cuda-prebuilt/customize.py b/cm-mlops/script/install-cuda-prebuilt/customize.py
@@ -8,6 +8,9 @@ def preprocess(i):
     env = i['env']
 
     automation = i['automation']
+    version = env.get('CM_VERSION')
+    if version not in env.get('CM_CUDA_LINUX_FILENAME', ''):
+        return {'return': 1, 'error': "Only CUDA versions 11.7.0 and 11.8.0 are supported now!"}
 
     recursion_spaces = i['recursion_spaces']
     nvcc_bin = "nvcc"