diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index c09f9bb7a5..6cc799acde 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -50,3 +50,45 @@ jobs: user: __token__ password: ${{ secrets.TEST_PYPI_API_TOKEN }} repository_url: https://test.pypi.org/legacy/ + + build-docker: + - name: Define Docker tags + id: define-tags + run: | + # Replace / with _ in github.ref_name + BRANCH_NAME="${{ github.ref_name }}" + TAG_NAME=$(echo "${BRANCH_NAME}" | sed 's/\//_/g') + + echo "BRANCH_NAME=${BRANCH_NAME}" >> $GITHUB_ENV + echo "DOCKER_TAG=mosaicml/llm-foundry:release_${TAG_NAME}" >> $GITHUB_ENV + echo "AWS_DOCKER_TAG=mosaicml/llm-foundry:release_${TAG_NAME}_aws" >> $GITHUB_ENV + echo "LATEST_TAG=mosaicml/llm-foundry:release-latest" >> $GITHUB_ENV + echo "AWS_LATEST_TAG=mosaicml/llm-foundry:release_aws-latest" >> $GITHUB_ENV + + - name: Build and push AWS Docker image + uses: docker/build-push-action@v3 + with: + context: . + file: release.Dockerfile + push: true + tags: | + ${{ env.AWS_DOCKER_TAG }} + ${{ env.AWS_LATEST_TAG }} + build-args: | + BASE_IMAGE=mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04-aws + BRANCH_NAME=${{ env.BRANCH_NAME }} + TE_COMMIT=901e5d2 + + - name: Build and push Docker image + uses: docker/build-push-action@v3 + with: + context: . + file: release.Dockerfile + push: true + tags: | + ${{ env.DOCKER_TAG }} + ${{ env.LATEST_TAG }} + build-args: | + BASE_IMAGE=mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04 + BRANCH_NAME=${{ env.BRANCH_NAME }} + TE_COMMIT=901e5d2 diff --git a/release.Dockerfile b/release.Dockerfile new file mode 100644 index 0000000000..b759455224 --- /dev/null +++ b/release.Dockerfile @@ -0,0 +1,23 @@ +# Copyright 2022 MosaicML LLM Foundry authors +# SPDX-License-Identifier: Apache-2.0 + +ARG BASE_IMAGE +FROM $BASE_IMAGE + +ARG BRANCH_NAME +ARG TE_COMMIT + +ENV TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0" + +# Check for changes in setup.py. +# If there are changes, the docker cache is invalidated and a fresh pip installation is triggered. +ADD https://raw.githubusercontent.com/mosaicml/llm-foundry/$BRANCH_NAME/setup.py setup.py +RUN rm setup.py + +# Install TransformerEngine +RUN NVTE_FRAMEWORK=pytorch CMAKE_BUILD_PARALLEL_LEVEL=4 MAX_JOBS=4 pip install git+https://github.com/NVIDIA/TransformerEngine.git@$TE_COMMIT + +# Install and uninstall foundry to cache foundry requirements +RUN git clone -b $BRANCH_NAME https://github.com/mosaicml/llm-foundry.git +RUN pip install --no-cache-dir "./llm-foundry[all]" +RUN pip uninstall -y llm-foundry