diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 6f1be030d7..718a19203f 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -77,8 +77,10 @@ jobs: TAG_NAME=$(echo "${BRANCH_NAME}" | sed 's/\//_/g') echo "BRANCH_NAME=${BRANCH_NAME}" >> $GITHUB_ENV - echo "DOCKER_TAG=mosaicml/llm-foundry:release_${TAG_NAME}" >> $GITHUB_ENV - echo "AWS_DOCKER_TAG=mosaicml/llm-foundry:release_${TAG_NAME}_aws" >> $GITHUB_ENV + echo "DOCKER_TAG=mosaicml/llm-foundry:test_release_${TAG_NAME}" >> $GITHUB_ENV + echo "AWS_DOCKER_TAG=mosaicml/llm-foundry:test_release_${TAG_NAME}_aws" >> $GITHUB_ENV + # echo "LATEST_TAG=mosaicml/llm-foundry:release-latest" >> $GITHUB_ENV + # echo "AWS_LATEST_TAG=mosaicml/llm-foundry:release_aws-latest" >> $GITHUB_ENV - name: Build and push AWS Docker image @@ -89,6 +91,7 @@ jobs: push: true tags: | ${{ env.AWS_DOCKER_TAG }} + ${{ env.AWS_LATEST_TAG }} build-args: | BASE_IMAGE=mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04-aws BRANCH_NAME=${{ env.BRANCH_NAME }} @@ -104,6 +107,7 @@ jobs: push: true tags: | ${{ env.DOCKER_TAG }} + ${{ env.LATEST_TAG }} build-args: | BASE_IMAGE=mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04 BRANCH_NAME=${{ env.BRANCH_NAME }} diff --git a/Dockerfile b/Dockerfile index 06ce6d5293..5e8c43d3f3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,46 +1,30 @@ +# Copyright 2022 MosaicML LLM Foundry authors +# SPDX-License-Identifier: Apache-2.0 + ARG BASE_IMAGE FROM $BASE_IMAGE ARG BRANCH_NAME ARG DEP_GROUPS ARG TE_COMMIT -ARG KEEP_FOUNDRY="false" +ARG KEEP_FOUNDRY=false ENV TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0" -RUN echo "Starting build with BRANCH_NAME=${BRANCH_NAME}, DEP_GROUPS=${DEP_GROUPS}, TE_COMMIT=${TE_COMMIT}, KEEP_FOUNDRY=${KEEP_FOUNDRY}" - # Check for changes in setup.py. +# If there are changes, the docker cache is invalidated and a fresh pip installation is triggered. ADD https://raw.githubusercontent.com/mosaicml/llm-foundry/$BRANCH_NAME/setup.py setup.py RUN rm setup.py # Install TransformerEngine RUN NVTE_FRAMEWORK=pytorch CMAKE_BUILD_PARALLEL_LEVEL=4 MAX_JOBS=4 pip install git+https://github.com/NVIDIA/TransformerEngine.git@$TE_COMMIT -RUN echo "Cloning llm-foundry repository" +# Install and uninstall foundry to cache foundry requirements RUN git clone -b $BRANCH_NAME https://github.com/mosaicml/llm-foundry.git - -RUN echo "Installing llm-foundry" RUN pip install --no-cache-dir "./llm-foundry${DEP_GROUPS}" -RUN echo "KEEP_FOUNDRY value: ${KEEP_FOUNDRY}" - # Conditionally uninstall llm-foundry and remove its directory RUN if [ "$KEEP_FOUNDRY" != "true" ]; then \ - echo "Uninstalling llm-foundry"; \ pip uninstall -y llm-foundry && \ rm -rf llm-foundry; \ - else \ - echo "Keeping llm-foundry installed"; \ fi - -# Add a final check to see if llm-foundry is installed -RUN pip list | grep llm-foundry || echo "llm-foundry not found in pip list" - -# Print the contents of the directory where llm-foundry was cloned -RUN ls -la /llm-foundry || echo "llm-foundry directory not found" - -# Try to import llm-foundry in Python -RUN python -c "import llm_foundry; print('llm-foundry successfully imported')" || echo "Failed to import llm-foundry" - -RUN echo "Build process completed"