diff --git a/Dockerfile b/Dockerfile index 531b4081d9..0a996ff72a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,6 +14,9 @@ ENV TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0" ADD https://raw.githubusercontent.com/mosaicml/llm-foundry/$BRANCH_NAME/setup.py setup.py RUN rm setup.py +# Install TransformerEngine +RUN NVTE_FRAMEWORK=pytorch CMAKE_BUILD_PARALLEL_LEVEL=4 MAX_JOBS=4 pip install git+https://github.com/NVIDIA/TransformerEngine.git@901e5d2 + # Install and uninstall foundry to cache foundry requirements RUN git clone -b $BRANCH_NAME https://github.com/mosaicml/llm-foundry.git RUN pip install --no-cache-dir "./llm-foundry${DEP_GROUPS}" diff --git a/setup.py b/setup.py index 0eab2343b8..8db58afcb9 100644 --- a/setup.py +++ b/setup.py @@ -122,18 +122,14 @@ 'grouped-gemm==0.1.4', #### TODO: UPDATE TO 0.1.6 ] -extra_deps['te'] = [ - 'transformer-engine[pytorch]==1.9.0.post1', -] - extra_deps['databricks-serverless'] = { dep for key, deps in extra_deps.items() for dep in deps - if 'gpu' not in key and 'megablocks' not in key and 'te' not in key and + if 'gpu' not in key and 'megablocks' not in key and 'databricks-connect' not in dep } extra_deps['all-cpu'] = { dep for key, deps in extra_deps.items() for dep in deps - if 'gpu' not in key and 'megablocks' not in key and 'te' not in key + if 'gpu' not in key and 'megablocks' not in key } extra_deps['all'] = { dep for key, deps in extra_deps.items() for dep in deps