diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 3f25b155d6..6ca10fcd47 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -17,11 +17,11 @@ jobs: strategy: matrix: include: - - name: "2.1.0_cu121_flash2" - base_image: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04 + - name: "2.3.0_cu121_flash2" + base_image: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04 dep_groups: "[gpu-flash2]" - - name: "2.1.0_cu121_flash2_aws" - base_image: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04-aws + - name: "2.3.0_cu121_flash2_aws" + base_image: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04-aws dep_groups: "[gpu-flash2]" steps: - name: Maximize Build Space on Worker diff --git a/Dockerfile b/Dockerfile index 0fff8d35a8..be16614dfc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,9 +14,7 @@ RUN rm setup.py # Install and uninstall foundry to cache foundry requirements RUN pip install --verbose --no-cache-dir "./llm-foundry${DEP_GROUPS}" +RUN pip install --verbose --no-cache-dir git+https://github.com/NVIDIA/TransformerEngine.git@main RUN pip uninstall -y llm-foundry +RUN pip uninstall -y transformer-engine RUN rm -rf llm-foundry -RUN git clone --branch stable --recursive https://github.com/NVIDIA/TransformerEngine.git -RUN cd TransformerEngine && git submodule update --init --recursive -RUN export NVTE_FRAMEWORK=pytorch && MAX_JOBS=1 pip install . -RUN cd ..