From aeae01f2cdfe5e88916b5ee8d098c0d7802c1f8e Mon Sep 17 00:00:00 2001 From: Dustin Franklin Date: Sat, 13 Apr 2024 00:02:10 -0400 Subject: [PATCH] fixed flash-attention build for JP6 --- packages/llm/flash-attention/Dockerfile | 2 +- packages/llm/flash-attention/build.sh | 10 ++++++---- packages/llm/flash-attention/install.sh | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/packages/llm/flash-attention/Dockerfile b/packages/llm/flash-attention/Dockerfile index ff7d4b8af..e26ac747e 100644 --- a/packages/llm/flash-attention/Dockerfile +++ b/packages/llm/flash-attention/Dockerfile @@ -10,7 +10,7 @@ FROM ${BASE_IMAGE} ARG FLASH_ATTENTION_VERSION="2.5.6" -COPY build.sh install.sh patch.diff /tmp/flash-attention +COPY build.sh install.sh patch.diff /tmp/flash-attention/ RUN /tmp/flash-attention/install.sh || /tmp/flash-attention/build.sh \ No newline at end of file diff --git a/packages/llm/flash-attention/build.sh b/packages/llm/flash-attention/build.sh index fedabf3d3..b35d8222f 100755 --- a/packages/llm/flash-attention/build.sh +++ b/packages/llm/flash-attention/build.sh @@ -10,15 +10,17 @@ cd /opt/flash-attention git apply /tmp/flash-attention/patch.diff git diff git status - + FLASH_ATTENTION_FORCE_BUILD=1 \ FLASH_ATTENTION_FORCE_CXX11_ABI=0 \ FLASH_ATTENTION_SKIP_CUDA_BUILD=0 \ +MAX_JOBS=$(nproc) \ python3 setup.py --verbose bdist_wheel --dist-dir /opt +ls /opt cd / -pip3 install --no-cache-dir --verbose /opt/flash-attn*.whl -pip3 show flash-attn && python3 -c 'import flash_attn' +pip3 install --no-cache-dir --verbose /opt/flash_attn*.whl +#pip3 show flash-attn && python3 -c 'import flash_attn' -twine upload --verbose /opt/flash-attn*.whl || echo "failed to upload wheel to ${TWINE_REPOSITORY_URL}" +twine upload --verbose /opt/flash_attn*.whl || echo "failed to upload wheel to ${TWINE_REPOSITORY_URL}" diff --git a/packages/llm/flash-attention/install.sh b/packages/llm/flash-attention/install.sh index a1e77ff11..1f771df19 100755 --- a/packages/llm/flash-attention/install.sh +++ b/packages/llm/flash-attention/install.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash set -ex -pip3 install --no-cache-dir --verbose flash-attn=${FLASH_ATTENTION_VERSION} +pip3 install --no-cache-dir --verbose flash-attn==${FLASH_ATTENTION_VERSION} pip3 show flash-attn && python3 -c 'import flash_attn' \ No newline at end of file