Skip to content

Commit

Permalink
Merge from OctoML (#515)
Browse files Browse the repository at this point in the history
  • Loading branch information
gfursin authored Nov 15, 2022
2 parents 692baaf + 9c06b30 commit fe95527
Show file tree
Hide file tree
Showing 21 changed files with 381 additions and 19 deletions.
14 changes: 13 additions & 1 deletion cm-mlops/automation/script/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,13 @@ def run(self, i):
(input) (str): converted to env.CM_INPUT (local env)
(output) (str): converted to env.CM_OUTPUT (local env)
(name) (str): converted to env.CM_NAME (local env)
(extra_cache_tags) (str): converted to env.CM_EXTRA_CACHE_TAGS and used to add to caching (local env)
(name) (str): taken from env.CM_NAME and/or converted to env.CM_NAME (local env)
Added to extra_cache_tags with "name-" prefix .
Useful for python virtual env (to create multiple entries)
(quiet) (bool): if True, set env.CM_QUIET to "yes" and attempt to skip questions
(the developers have to support it in pre/post processing and scripts)
Expand Down Expand Up @@ -285,6 +288,9 @@ def run(self, i):
if x!='' and x not in extra_cache_tags:
extra_cache_tags.append(x)

if env.get('CM_NAME','')!='':
extra_cache_tags.append('name-'+env['CM_NAME'].strip().lower())


############################################################################################################
# Check if we want to skip cache (either by skip_cache or by fake_run)
Expand Down Expand Up @@ -1338,6 +1344,12 @@ def run(self, i):


# Restore original env/state and merge env/state
# This is needed since we want to keep original env/state outside this script
# If we delete env and create a new dict, the original one outside this script will be detached
# That's why we just clean all keys in original env/state (used oustide)
# And then copy saved_env (with new_env merged) and saved_state (with new_state merged)
# while getting rid of all temporal updates in env and state inside this script

for k in list(env.keys()):
del(env[k])
for k in list(state.keys()):
Expand Down
7 changes: 7 additions & 0 deletions cm-mlops/script/activate-python-venv/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# About

Activate python virtual environment installed via CM:

```bash
cm run script "activate python-ven" (--version={python version}) (--name={user friendly name of the virtual environment))
```
1 change: 1 addition & 0 deletions cm-mlops/script/activate-python-venv/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"names": [
"python-venv"
],
"reuse_version": true,
"tags": "install,python-venv"
}
],
Expand Down
6 changes: 3 additions & 3 deletions cm-mlops/script/activate-python-venv/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ def preprocess(i):

name = env.get('CM_NAME','')
if name != '':
name_tag = name.lower()
name = name.strip().lower()

r = automation.update_deps({'deps':meta['post_deps'],
r = automation.update_deps({'deps':meta['prehook_deps'],
'update_deps':{
'python-venv':{
'extra_cache_tags':name
'name':name
}
}
})
Expand Down
20 changes: 19 additions & 1 deletion cm-mlops/script/app-mlperf-inference/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,8 @@ deps:
- tvm-onnx
- tvm-pytorch
skip_if_env:
CM_TVM_PIP_INSTALL: [ "on" ]
CM_TVM_PIP_INSTALL:
- "on"


########################################################################
Expand Down Expand Up @@ -317,6 +318,23 @@ variations:
tags: _float32
env:
CM_MLPERF_PYTHON: 'yes'
CM_MLPERF_IMPLEMENTATION: reference

nvidia:
add_deps_recursive:
imagenet-accuracy-script:
tags: _float32
env:
CM_MLPERF_IMPLEMENTATION: nvidia
deps:
## Nvidia common code
- tags: get,mlperf,inference,nvidia,common-code
- tags: get,mlperf,training,src
- tags: get,generic-python-lib,_nvidia-pyindex
- tags: get,generic-python-lib,_nvidia-tensorrt
- tags: get,generic-python-lib,_numpy
- tags: get,generic-python-lib,_pycuda
- tags: get,generic-python-lib,_mlperf_logging

# ML engine
onnxruntime:
Expand Down
4 changes: 3 additions & 1 deletion cm-mlops/script/app-mlperf-inference/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,9 @@ def get_run_cmd(env, scenario_extra_options, mode_extra_options, dataset_options
return ""

def get_run_cmd_nvidia(env, scenario_extra_options, mode_extra_options, dataset_options):
cmd = ""
import pathlib
code_dir=pathlib.Path(__file__).parent.resolve()
cmd = env['CM_PYTHON_BIN_WITH_PATH']+ " " +os.path.join(code_dir, "nvidia", "retinanet.py") + " --pytorch --num_samples=1200 --batch_size=8 --training_repo_path="+env['CM_MLPERF_TRAINING_SOURCE']+" --pyt_ckpt_path="+env['CM_ML_MODEL_FILE_WITH_PATH']
return cmd

def get_run_cmd_reference(env, scenario_extra_options, mode_extra_options, dataset_options):
Expand Down
32 changes: 32 additions & 0 deletions cm-mlops/script/get-generic-python-lib/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,38 @@
"CM_BOTO3_VERSION"
]
},
"nvidia-pyindex": {
"env": {
"CM_PYTHON_PACKAGE_NAME": "nvidia-pyindex"
},
"new_env_keys": [
"CM_NVIDIA_PYINDEX_VERSION"
]
},
"nvidia-tensorrt": {
"env": {
"CM_PYTHON_PACKAGE_NAME": "nvidia-tensorrt"
},
"new_env_keys": [
"CM_NVIDIA_TENSORRT_VERSION"
]
},
"pycuda": {
"env": {
"CM_PYTHON_PACKAGE_NAME": "pycuda"
},
"new_env_keys": [
"CM_PYCUDA_VERSION"
]
},
"mlperf_logging": {
"env": {
"CM_PYTHON_PACKAGE_NAME": "mlperf_logging"
},
"new_env_keys": [
"CM_MLPERF_LOGGING_VERSION"
]
},
"wandb": {
"env": {
"CM_PYTHON_PACKAGE_NAME": "wandb"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ def preprocess(i):
def postprocess(i):
env = i['env']

env['+PYTHONPATH'] = os.path.join(env['CM_MLPERF_INFERENCE_RESULTS_PATH'], "closed", "NVIDIA", "code", "common")
env['+PYTHONPATH'] = [ os.path.join(env['CM_MLPERF_INFERENCE_RESULTS_PATH'], "closed", "NVIDIA", "code", "common") ]

return {'return':0}
2 changes: 1 addition & 1 deletion cm-mlops/script/get-mlperf-inference-src/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ where [VARIATION] is one of
* `octoml:` Works with the OctoML fork of the MLCommons inference repository. Uses `short-history` variation
* `short-history:` Uses a git depth of last 10 commits (significantly reduces the download size)
* `full-history:` Uses the full git history
* `recurse-submodules:` Downloads all the submodules
* `no-recurse-submodules:` Only download the main repository

[VERSION] is one of
* `master:` Uses the master branch
Expand Down
27 changes: 27 additions & 0 deletions cm-mlops/script/get-mlperf-training-src/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Get MLCommons Training Source
This [CM script](https://github.com/mlcommons/ck/blob/master/cm/docs/tutorial-scripts.md) git clones the [MLCommons Training repository](https://github.com/mlcommons/training).

## Commands
To install
```
cm run script --tags=get,mlperf,training,src,[VARIATION] --version=[VERSION]
```
where [VARIATION] is one of
* `default:` Works with the official MLCommons inference repository. Uses `short-history` variation
* `patch:` Applies the `git.patch` to the cloned git repository
* `octoml:` Works with the OctoML fork of the MLCommons inference repository. Uses `short-history` variation
* `short-history:` Uses a git depth of last 10 commits (significantly reduces the download size)
* `full-history:` Uses the full git history
* `no-recurse-submodules:` Only download the main repository

[VERSION] is one of
* `master:` Uses the master branch
* `r2.1:` Uses the release branch used for MLCommons training 2.1 round

## Exported Variables
* `CM_MLPERF_TRAINING_SOURCE`: Directory path of the cloned inference repository
* `PYTHONPATH`: Is appended with the paths to vision module and the submission tools module

## Supported and Tested OS
1. Ubuntu 18.04, 20.04, 22.04
2. RHEL 9
93 changes: 93 additions & 0 deletions cm-mlops/script/get-mlperf-training-src/_cm.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
{
"alias": "get-mlperf-training-src",
"automation_alias": "script",
"automation_uid": "5b4e0237da074764",
"cache": true,
"category": "Modular MLPerf benchmarks",
"default_env": {
"CM_GIT_CHECKOUT": "master",
"CM_GIT_DEPTH": "--depth 4",
"CM_GIT_PATCH": "no",
"CM_GIT_RECURSE_SUBMODULES": " --recurse-submodules",
"CM_GIT_URL": "https://github.com/mlcommons/training.git"
},
"default_variation": "default",
"default_version": "master",
"deps": [
{
"tags": "detect,os"
},
{
"names": [
"python",
"python3"
],
"tags": "get,python3"
}
],
"new_env_keys": [
"CM_MLPERF_TRAINING_*",
"CM_MLPERF_TRAINING_LAST_RELEASE",
"+PYTHONPATH"
],
"tags": [
"get",
"src",
"source",
"training",
"training-src",
"training-source",
"mlperf",
"mlcommons"
],
"uid": "dc440bd88e794a28",
"variations": {
"default": {
"base": [
"short-history"
],
"env": {
"CM_GIT_PATCH": "no"
}
},
"full-history": {
"env": {
"CM_GIT_DEPTH": ""
}
},
"no-recurse-submodules": {
"env": {
"CM_GIT_RECURSE_SUBMODULES": ""
}
},
"patch": {
"env": {
"CM_GIT_PATCH": "yes"
}
},
"short-history": {
"env": {
"CM_GIT_DEPTH": "--depth 5"
}
}
},
"versions": {
"custom": {
"env": {
"CM_MLPERF_LAST_RELEASE": "v2.1"
}
},
"master": {
"env": {
"CM_GIT_CHECKOUT": "master",
"CM_MLPERF_LAST_RELEASE": "v2.1"
}
},
"r2.1": {
"env": {
"CM_GIT_CHECKOUT": "r2.1",
"CM_MLPERF_LAST_RELEASE": "v2.1"
}
}
}
}
41 changes: 41 additions & 0 deletions cm-mlops/script/get-mlperf-training-src/customize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from cmind import utils
import os
import shutil

def preprocess(i):

os_info = i['os_info']

if os_info['platform'] == 'windows':
return {'return':1, 'error': 'Windows is not supported in this script yet'}

env = i['env']
meta = i['meta']

if 'CM_GIT_DEPTH' not in env:
env['CM_GIT_DEPTH'] = ''

if 'CM_GIT_RECURSE_SUBMODULES' not in env:
env['CM_GIT_RECURSE_SUBMODULES'] = ''

need_version = env.get('CM_VERSION','')
versions = meta['versions']

if need_version!='' and not need_version in versions:
env['CM_GIT_CHECKOUT'] = need_version

return {'return':0}


def postprocess(i):

env = i['env']
state = i['state']

env['CM_MLPERF_TRAINING_SOURCE'] = os.path.join(os.getcwd(), 'training')

# 20221024: we save and restore env in the main script and can clean env here for determinism
# if '+PYTHONPATH' not in env: env['+PYTHONPATH'] = []
env['+PYTHONPATH']=[]

return {'return':0}
27 changes: 27 additions & 0 deletions cm-mlops/script/get-mlperf-training-src/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash

CUR_DIR=$PWD
SCRIPT_DIR=${CM_TMP_CURRENT_SCRIPT_PATH}

echo "******************************************************"
echo "Cloning Mlcommons from ${CM_GIT_URL} with branch ${CM_GIT_CHECKOUT} ${CM_GIT_DEPTH} ${CM_GIT_RECURSE_SUBMODULES}..."

if [ ! -d "training" ]; then
if [ -z ${CM_GIT_SHA} ]; then
git clone ${CM_GIT_RECURSE_SUBMODULES} -b "${CM_GIT_CHECKOUT}" ${CM_GIT_URL} ${CM_GIT_DEPTH} training
cd training
else
git clone ${CM_GIT_RECURSE_SUBMODULES} ${CM_GIT_URL} ${CM_GIT_DEPTH} training
cd training
git checkout -b "${CM_GIT_CHECKOUT}"
fi
if [ "${?}" != "0" ]; then exit 1; fi
fi

if [ ${CM_GIT_PATCH} == "yes" ]; then
patch_filename=${CM_GIT_PATCH_FILENAME:-git.patch}
echo "Applying patch ${SCRIPT_DIR}/patch/$patch_filename"
git apply ${SCRIPT_DIR}/patch/"$patch_filename"
if [ "${?}" != "0" ]; then exit 1; fi
fi
cd "$CUR_DIR"
3 changes: 3 additions & 0 deletions cm-mlops/script/install-cuda-prebuilt/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ def preprocess(i):
env = i['env']

automation = i['automation']
version = env.get('CM_VERSION')
if version not in env.get('CM_CUDA_LINUX_FILENAME', ''):
return {'return': 1, 'error': "Only CUDA versions 11.7.0 and 11.8.0 are supported now!"}

recursion_spaces = i['recursion_spaces']
nvcc_bin = "nvcc"
Expand Down
Loading

0 comments on commit fe95527

Please sign in to comment.