ane test #82
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: apple-perf | |
on: | |
schedule: | |
- cron: 0 1 * * * | |
pull_request: | |
paths: | |
- .github/workflows/apple-perf.yml | |
- extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 | |
push: | |
branches: | |
- main | |
paths: | |
- .github/workflows/apple-perf.yml | |
- extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 | |
# Note: GitHub has an upper limit of 10 inputs | |
workflow_dispatch: | |
inputs: | |
models: | |
description: Models to be benchmarked | |
required: false | |
type: string | |
default: stories110M | |
devices: | |
description: Target devices to run benchmark | |
required: false | |
type: string | |
default: apple_iphone_15 | |
benchmark_configs: | |
description: The list of configs used the benchmark | |
required: false | |
type: string | |
workflow_call: | |
inputs: | |
models: | |
description: Models to be benchmarked | |
required: false | |
type: string | |
default: stories110M | |
devices: | |
description: Target devices to run benchmark | |
required: false | |
type: string | |
default: apple_iphone_15 | |
benchmark_configs: | |
description: The list of configs used the benchmark | |
required: false | |
type: string | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
cancel-in-progress: true | |
jobs: | |
set-parameters: | |
runs-on: ubuntu-22.04 | |
outputs: | |
benchmark_configs: ${{ steps.set-parameters.outputs.benchmark_configs }} | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
submodules: 'false' | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10' | |
- name: Set parameters | |
id: set-parameters | |
shell: bash | |
env: | |
# Separate default values from the workflow dispatch. To ensure defaults are accessible | |
# during scheduled runs and to provide flexibility for different defaults between | |
# on-demand and periodic benchmarking. | |
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'llama,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8' || 'llama' }} | |
CRON_DEFAULT_DEVICES: apple_iphone_15 | |
run: | | |
set -eux | |
MODELS="${{ inputs.models }}" | |
if [ -z "$MODELS" ]; then | |
MODELS="$CRON_DEFAULT_MODELS" | |
fi | |
DEVICES="${{ inputs.devices }}" | |
if [ -z "$DEVICES" ]; then | |
DEVICES="$CRON_DEFAULT_DEVICES" | |
fi | |
PYTHONPATH="${PWD}" python .ci/scripts/gather_benchmark_configs.py \ | |
--os "ios" \ | |
--models $MODELS \ | |
--devices $DEVICES | |
echo "benchmark_configs is: ${{ steps.set-parameters.outputs.benchmark_configs }}" | |
prepare-test-specs: | |
runs-on: linux.2xlarge | |
needs: set-parameters | |
strategy: | |
matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} | |
fail-fast: false | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Prepare the spec | |
shell: bash | |
working-directory: extension/benchmark/apple/Benchmark | |
run: | | |
set -eux | |
echo "DEBUG: ${{ matrix.model }}" | |
# The model will be exported in the next step to this S3 path | |
MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip" | |
# We could write a script to properly use jinja here, but there is only one variable, | |
# so let's just sed it | |
sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2 | |
cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml | |
# Just print the test spec for debugging | |
cat default-ios-device-farm-appium-test-spec.yml | |
- name: Upload the spec | |
uses: seemethere/upload-artifact-s3@v5 | |
with: | |
s3-bucket: gha-artifacts | |
s3-prefix: | | |
${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }} | |
retention-days: 1 | |
if-no-files-found: error | |
path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml | |
export-models: | |
name: export-models | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
needs: set-parameters | |
secrets: inherit | |
strategy: | |
matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} | |
fail-fast: false | |
with: | |
# NB: Need to use our AWS MacOS runner to upload large models to S3 | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'true' | |
timeout: 60 | |
upload-artifact: ios-models | |
upload-artifact-to-s3: true | |
secrets-env: EXECUTORCH_HF_TOKEN | |
script: | | |
set -eux | |
echo "::group::Setting up CI environment" | |
.ci/scripts/setup-conda.sh | |
BUILD_TOOL=cmake | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ | |
.ci/scripts/setup-macos.sh "${BUILD_TOOL}" | |
if [[ ${{ matrix.config }} == *"coreml"* ]]; then | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ | |
backends/apple/coreml/scripts/install_requirements.sh | |
fi | |
if [[ ${{ matrix.config }} == *"mps"* ]]; then | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ | |
backends/apple/mps/install_requirements.sh | |
fi | |
# Install requirements for export_llama | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh | |
pip install -U "huggingface_hub[cli]" | |
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN | |
${CONDA_RUN} pip install accelerate sentencepiece | |
pip list | |
ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.config }} | |
echo "::endgroup::" | |
echo "::group::Exporting ${{ matrix.config }} model: ${{ matrix.model }}" | |
BUILD_MODE="cmake" | |
if [[ ${{ matrix.model }} =~ ^[^/]+/[^/]+$ ]]; then | |
# HuggingFace model. Assume the pattern is always like "<org>/<repo>" | |
HF_MODEL_REPO=${{ matrix.model }} | |
OUT_ET_MODEL_NAME="$(echo "$HF_MODEL_REPO" | awk -F'/' '{print $2}' | sed 's/_/-/g' | tr '[:upper:]' '[:lower:]')_${{ matrix.config }}" | |
if [[ "$HF_MODEL_REPO" == meta-llama/* ]]; then | |
# Llama models on Hugging Face | |
if [[ ${{ matrix.config }} == "llama3_spinquant" ]]; then | |
# SpinQuant | |
# Download prequantized chceckpoint from Hugging Face | |
DOWNLOADED_PATH=$( | |
bash .ci/scripts/download_hf_hub.sh \ | |
--model_id "${HF_MODEL_REPO}" \ | |
--files "tokenizer.model" "params.json" "consolidated.00.pth" | |
) | |
# Export using ExecuTorch's model definition | |
${CONDA_RUN} python -m examples.models.llama.export_llama \ | |
--model "llama3_2" \ | |
--checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \ | |
--params "${DOWNLOADED_PATH}/params.json" \ | |
--use_sdpa_with_kv_cache \ | |
-X \ | |
--xnnpack-extended-ops \ | |
--preq_mode 8da4w_output_8da8w \ | |
--preq_group_size 32 \ | |
--max_seq_length 2048 \ | |
--output_name "${OUT_ET_MODEL_NAME}.pte" \ | |
-kv \ | |
-d fp32 \ | |
--preq_embedding_quantize 8,0 \ | |
--use_spin_quant native \ | |
--metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' | |
ls -lh "${OUT_ET_MODEL_NAME}.pte" | |
elif [[ ${{ matrix.config }} == "llama3_qlora" ]]; then | |
# QAT + LoRA | |
# Download prequantized chceckpoint from Hugging Face | |
DOWNLOADED_PATH=$( | |
bash .ci/scripts/download_hf_hub.sh \ | |
--model_id "${HF_MODEL_REPO}" \ | |
--files "tokenizer.model" "params.json" "consolidated.00.pth" | |
) | |
# Export using ExecuTorch's model definition | |
${CONDA_RUN} python -m examples.models.llama.export_llama \ | |
--model "llama3_2" \ | |
--checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \ | |
--params "${DOWNLOADED_PATH}/params.json" \ | |
-qat \ | |
-lora 16 \ | |
--preq_mode 8da4w_output_8da8w \ | |
--preq_group_size 32 \ | |
--preq_embedding_quantize 8,0 \ | |
--use_sdpa_with_kv_cache \ | |
-kv \ | |
-X \ | |
--xnnpack-extended-ops \ | |
-d fp32 \ | |
--max_seq_length 2048 \ | |
--output_name "${OUT_ET_MODEL_NAME}.pte" \ | |
--metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' | |
ls -lh "${OUT_ET_MODEL_NAME}.pte" | |
elif [[ ${{ matrix.config }} == "llama3_fb16" ]]; then | |
# Original BF16 version, without any quantization | |
DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth") | |
${CONDA_RUN} python -m examples.models.llama.export_llama \ | |
--model "llama3_2" \ | |
--checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \ | |
--params "${DOWNLOADED_PATH}/params.json" \ | |
-kv \ | |
--use_sdpa_with_kv_cache \ | |
-X \ | |
-d bf16 \ | |
--metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \ | |
--output_name="${OUT_ET_MODEL_NAME}.pte" | |
ls -lh "${OUT_ET_MODEL_NAME}.pte" | |
elif [[ ${{ matrix.config }} == "llama3_coreml_ane" ]]; then | |
# ANE | |
DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth") | |
${CONDA_RUN} python -m examples.models.llama.export_llama \ | |
--checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \ | |
--params "${DOWNLOADED_PATH}/params.json" \ | |
-E "4,32" \ | |
-kv \ | |
--disable_dynamic_shape \ | |
--coreml \ | |
--coreml-ios 18 \ | |
--coreml-quantize c4w \ | |
--coreml-compute-units cpu_and_ne \ | |
--output_name="${OUT_ET_MODEL_NAME}.pte" | |
ls -lh "${OUT_ET_MODEL_NAME}.pte" | |
else | |
# By default, test with the Hugging Face model and the xnnpack recipe | |
DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model") | |
${CONDA_RUN} python -m extension.export_util.export_hf_model -hfm="$HF_MODEL_REPO" -o "$OUT_ET_MODEL_NAME" | |
ls -lh "${OUT_ET_MODEL_NAME}.pte" | |
fi | |
else | |
echo "Unsupported model ${{ matrix.model }}" | |
exit 1 | |
fi | |
zip -j model.zip "${OUT_ET_MODEL_NAME}.pte" "${DOWNLOADED_PATH}/tokenizer.model" | |
ls -lh model.zip | |
mkdir -p "${ARTIFACTS_DIR_NAME}" | |
mv model.zip "${ARTIFACTS_DIR_NAME}" | |
elif [[ ${{ matrix.model }} == "llama" ]]; then | |
# Install requirements for export_llama | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ | |
bash examples/models/llama/install_requirements.sh | |
# Test llama2 | |
if [[ ${{ matrix.config }} == *"xnnpack"* ]]; then | |
DELEGATE_CONFIG="xnnpack+custom+qe" | |
elif [[ ${{ matrix.config }} == *"coreml"* ]]; then | |
DELEGATE_CONFIG="coreml" | |
elif [[ ${{ matrix.config }} == *"mps"* ]]; then | |
DELEGATE_CONFIG="mps" | |
fi | |
DTYPE="fp32" | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ | |
bash .ci/scripts/test_llama.sh \ | |
-model "stories110M" \ | |
-build_tool "${BUILD_MODE}" \ | |
-dtype "${DTYPE}" \ | |
-mode "${DELEGATE_CONFIG}" \ | |
-upload "${ARTIFACTS_DIR_NAME}" | |
else | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ | |
bash .ci/scripts/test_model.sh \ | |
"${{ matrix.model }}" \ | |
"${BUILD_MODE}" \ | |
"${{ matrix.config }}" \ | |
"${ARTIFACTS_DIR_NAME}" | |
fi | |
echo "::endgroup::" | |
build-benchmark-app: | |
name: build-benchmark-app | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
needs: | |
- set-parameters | |
secrets: inherit | |
with: | |
runner: macos-latest-xlarge | |
python-version: '3.11' | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
upload-artifact: ios-apps | |
secrets-env: BUILD_CERTIFICATE_BASE64 EXECUTORCH_BENCHMARK_BUILD_PROVISION_PROFILE_BASE64 KEYCHAIN_PASSWORD | |
timeout: 90 | |
script: | | |
set -eux | |
echo "::group::Setting up CI environment" | |
.ci/scripts/setup-conda.sh | |
BUILD_TOOL=cmake | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ | |
.ci/scripts/setup-macos.sh "${BUILD_TOOL}" | |
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded | |
# Setup Apple certificate for iOS development | |
BUILD_PROVISION_PROFILE_BASE64="${SECRET_EXECUTORCH_BENCHMARK_BUILD_PROVISION_PROFILE_BASE64}" \ | |
BUILD_CERTIFICATE_BASE64="${SECRET_BUILD_CERTIFICATE_BASE64}" \ | |
KEYCHAIN_PASSWORD="${SECRET_KEYCHAIN_PASSWORD}" \ | |
.ci/scripts/setup-ios.sh | |
# Install CoreML Backend Requirements | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ | |
backends/apple/coreml/scripts/install_requirements.sh | |
# Install MPS Backend Requirements | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ | |
backends/apple/mps/install_requirements.sh | |
echo "::endgroup::" | |
echo "::group::Build ExecuTorch iOS frameworks" | |
FRAMEWORKS=( | |
"executorch" | |
"backend_coreml" | |
"backend_mps" | |
"backend_xnnpack" | |
"kernels_custom" | |
"kernels_optimized" | |
"kernels_portable" | |
"kernels_quantized" | |
) | |
# Build Release iOS Frameworks | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ | |
build/build_apple_frameworks.sh --coreml --custom --mps --optimized --portable --quantized --xnnpack | |
mkdir -p extension/benchmark/apple/Benchmark/Frameworks | |
for FRAMEWORK in "${FRAMEWORKS[@]}"; do ( | |
cp -r "cmake-out/${FRAMEWORK}.xcframework" extension/benchmark/apple/Benchmark/Frameworks/ | |
) done | |
echo "::endgroup::" | |
# NB: Although exported models can be copied to this directory and bundled together with the | |
# app, we don't use this in CI and rely on AWS extra data parameter to make the model and the | |
# tokenizer available to the benchmark. This decouples the app and the model. We just need to | |
# create the directory here to pass the build | |
mkdir -p extension/benchmark/apple/Benchmark/Models | |
${CONDA_RUN} --no-capture-output \ | |
build/build_apple_llm_demo.sh ${ARTIFACTS_DIR_NAME} | |
upload-benchmark-app: | |
needs: build-benchmark-app | |
runs-on: linux.2xlarge | |
steps: | |
- name: Download the apps from GitHub | |
uses: actions/download-artifact@v3 | |
with: | |
# The name here needs to match the name of the upload-artifact parameter | |
name: ios-apps | |
path: ${{ runner.temp }}/artifacts/ | |
- name: Verify the apps | |
shell: bash | |
working-directory: ${{ runner.temp }}/artifacts/ | |
run: | | |
ls -lah ./ | |
- name: Upload the apps to S3 | |
uses: seemethere/upload-artifact-s3@v5 | |
with: | |
s3-bucket: gha-artifacts | |
s3-prefix: | | |
${{ github.repository }}/${{ github.run_id }}/artifacts | |
retention-days: 14 | |
if-no-files-found: ignore | |
path: ${{ runner.temp }}/artifacts/ | |
benchmark-on-device: | |
if: always() | |
needs: | |
- set-parameters | |
- prepare-test-specs | |
- upload-benchmark-app | |
- export-models | |
permissions: | |
id-token: write | |
contents: read | |
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main | |
strategy: | |
matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} | |
fail-fast: false | |
with: | |
# Due to scheduling a job may be pushed beyond the default 60m threshold | |
timeout: 120 | |
device-type: ios | |
# For iOS testing, the runner just needs to call AWS Device Farm, so there is no need to run this on macOS | |
runner: linux.2xlarge | |
test-infra-ref: '' | |
# This is the ARN of ExecuTorch project on AWS | |
project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6 | |
device-pool-arn: ${{ matrix.device_arn }} | |
# Uploaded to S3 from the previous job | |
ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.ipa | |
ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.xctestrun.zip | |
test-spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/default-ios-device-farm-appium-test-spec.yml | |
upload-benchmark-results: | |
needs: | |
- benchmark-on-device | |
if: always() | |
runs-on: linux.2xlarge | |
environment: upload-benchmark-results | |
permissions: | |
id-token: write | |
contents: read | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
submodules: false | |
- name: Authenticate with AWS | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results | |
# The max duration enforced by the server side | |
role-duration-seconds: 18000 | |
aws-region: us-east-1 | |
- name: Setup conda | |
uses: pytorch/test-infra/.github/actions/setup-miniconda@main | |
with: | |
python-version: '3.10' | |
- name: Download the list of artifacts from S3 | |
env: | |
ARTIFACTS_S3_DIR: s3://gha-artifacts/device_farm/${{ github.run_id }}/${{ github.run_attempt }}/artifacts/ | |
shell: bash | |
run: | | |
set -eux | |
${CONDA_RUN} python -mpip install awscli==1.32.18 | |
mkdir -p artifacts | |
pushd artifacts | |
${CONDA_RUN} aws s3 sync "${ARTIFACTS_S3_DIR}" . | |
popd | |
ls -lah artifacts | |
- name: Extract the benchmark results JSON | |
shell: bash | |
run: | | |
set -eux | |
mkdir -p benchmark-results | |
for ARTIFACTS_BY_JOB in artifacts/*.json; do | |
[ -f "${ARTIFACTS_BY_JOB}" ] || break | |
echo "${ARTIFACTS_BY_JOB}" | |
${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \ | |
--artifacts "${ARTIFACTS_BY_JOB}" \ | |
--output-dir benchmark-results \ | |
--repo ${{ github.repository }} \ | |
--head-branch ${{ github.head_ref || github.ref_name }} \ | |
--workflow-name "${{ github.workflow }}" \ | |
--workflow-run-id ${{ github.run_id }} \ | |
--workflow-run-attempt ${{ github.run_attempt }} | |
done | |
for SCHEMA in v2 v3; do | |
for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do | |
cat "${BENCHMARK_RESULTS}" | |
echo | |
done | |
done | |
# TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration | |
- name: Upload the benchmark results (v2) | |
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main | |
with: | |
benchmark-results-dir: benchmark-results/v2 | |
dry-run: false | |
schema-version: v2 | |
- name: Upload the benchmark results (v3) | |
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main | |
with: | |
benchmark-results-dir: benchmark-results/v3 | |
dry-run: false | |
schema-version: v3 | |
github-token: ${{ secrets.GITHUB_TOKEN }} |