Skip to content

Add CI for GPU tests on Ruche #5

Add CI for GPU tests on Ruche

Add CI for GPU tests on Ruche #5

Workflow file for this run

name: Build and test
on:
pull_request:
branches:
- main
env:
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
BUILD_TYPE: RelWithDebInfo
# Force the use of BuildKit for Docker
DOCKER_BUILDKIT: 1
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: DoozyX/[email protected]
with:
source: 'common/ fft/ examples/'
exclude: ''
extensions: 'hpp,cpp'
clangFormatVersion: 12
check_docker_files:
runs-on: ubuntu-latest
outputs:
# true if any Docker file was modified in the PR (PR mode) or since last pushed commit (push mode)
docker_files_have_changed: ${{ steps.get_changed_docker_files.outputs.any_changed == 'true' }}
# use "pr" as image name suffix if on PR mode and if any Docker file was modified, otherwise use "main"
# this is intended to avoid a PR test to alter Docker images for other PRs or for the main branch
image_name_suffix: ${{ steps.get_changed_docker_files.outputs.any_changed == 'true' && github.event_name == 'pull_request' && 'pr' || 'main' }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Get changed Dockerfiles
id: get_changed_docker_files
uses: tj-actions/changed-files@v42
with:
files: docker/**/Dockerfile
- name: List changed Dockerfiles
if: ${{ steps.docker_files_have_changed.outputs.any_changed == 'true' }}
env:
ALL_CHANGED_FILES: ${{ steps.docker_files_have_changed.outputs.all_changed_files }}
run: |
for file in "$ALL_CHANGED_FILES"; do
echo "$file was changed"
done
build_base:
needs: check_docker_files
if: ${{ needs.check_docker_files.outputs.docker_files_have_changed == 'true' }}
uses: ./.github/workflows/__build_base.yaml
with:
image_name_suffix: ${{ needs.check_docker_files.outputs.image_name_suffix }}
build:
runs-on: ubuntu-latest
needs:
- check_docker_files
- build_base
# run this job even if build_base did not run
if: ${{ ! cancelled() && (needs.build_base.result == 'success' || needs.build_base.result == 'skipped') }}
strategy:
matrix:
backend:
- name: openmp
c_compiler: gcc
cxx_compiler: g++
cmake_flags: -DKokkos_ENABLE_OPENMP=ON
- name: cuda
c_compiler: gcc
cxx_compiler: g++
cmake_flags: -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_AMPERE80=ON
- name: hip
c_compiler: hipcc
cxx_compiler: hipcc
cmake_flags: -DKokkos_ENABLE_HIP=ON -DKokkos_ARCH_VEGA90A=ON
- name: sycl
c_compiler: icx
cxx_compiler: icpx
cmake_flags: -DKokkos_ENABLE_SYCL=ON -DKokkos_ARCH_INTEL_GEN=ON
target:
- name: native
cmake_flags: ""
- name: host_device
cmake_flags: -DKokkosFFT_ENABLE_HOST_AND_DEVICE=ON
exclude:
- backend:
name: openmp
target:
name: host_device
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/[email protected]
with:
tool-cache: true
large-packages: false
- name: Checkout built branch
uses: actions/checkout@v4
with:
submodules: recursive
- name: Configure
run: |
docker run -v ${{ github.workspace }}:/work ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend.name }}_${{ needs.check_docker_files.outputs.image_name_suffix }} \
cmake -B build \
-DCMAKE_INSTALL_PREFIX=/work/install \
-DCMAKE_BUILD_TYPE=${{ env.BUILD_TYPE }} \
-DCMAKE_C_COMPILER=${{ matrix.backend.c_compiler }} \
-DCMAKE_CXX_COMPILER=${{ matrix.backend.cxx_compiler }} \
-DCMAKE_CXX_STANDARD=17 \
-DBUILD_TESTING=ON \
-DKokkosFFT_INTERNAL_Kokkos=ON \
${{ matrix.backend.cmake_flags }} \
${{ matrix.target.cmake_flags }}
- name: Build
run: |
docker run -v ${{ github.workspace }}:/work ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend.name }}_${{ needs.check_docker_files.outputs.image_name_suffix }} \
cmake --build build -j $(( $(nproc) * 2 + 1 ))
- name: Prepare artifacts
# this is mandatory to preserve execution rights
run: tar -cvf tests_${{ matrix.backend.name }}.tar build/
if: ${{ matrix.target.name == 'native' }}
- name: Save artifacts
# use v3 as more recent versions cannot run on Ruche
uses: actions/upload-artifact@v3
with:
name: tests_${{ matrix.backend.name }}
path: tests_${{ matrix.backend.name }}.tar
if: ${{ matrix.target.name == 'native' }}
- name: Install
run: |
docker run -v ${{ github.workspace }}:/work ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend.name }}_${{ needs.check_docker_files.outputs.image_name_suffix }} \
cmake --install build
- name: Configure and build test code
run: |
docker run -v ${{ github.workspace }}:/work ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend.name }}_${{ needs.check_docker_files.outputs.image_name_suffix }} \
cmake -B build_test \
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} \
-DCMAKE_C_COMPILER=${{ matrix.backend.c_compiler }} \
-DCMAKE_CXX_COMPILER=${{ matrix.backend.cxx_compiler }} \
-DCMAKE_CXX_STANDARD=17 \
-DCMAKE_PREFIX_PATH=/work/install \
install_test
docker run -v ${{ github.workspace }}:/work ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend.name }}_${{ needs.check_docker_files.outputs.image_name_suffix }} \
cmake --build build_test -j $(( $(nproc) * 2 + 1 ))
test:
runs-on: ${{ matrix.backend.runner }}
needs:
- check_docker_files
- build
# run this job even if build_base did not run
if: ${{ ! cancelled() && needs.build.result == 'success' }}
strategy:
matrix:
backend:
# run CUDA tests on Ruche supercomputer
- name: cuda
runner: [self-hosted, cuda]
# run OpenMP tests on Azure server
- name: openmp
runner: ubuntu-latest
steps:
- name: Get artifacts
# use v3 as more recent versions cannot run on Ruche
uses: actions/download-artifact@v3
with:
name: tests_${{ matrix.backend.name }}
- name: Deploy artifacts
run: tar -xvf tests_${{ matrix.backend.name }}.tar
- name: Run CUDA tests within Slurm job and Singularity image
run: |
# pulling the image in advance seems necessary as sometimes invoking `singularity run` on the image URL fails because it cannot find ghcr.io
singularity pull oras://ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend.name }}_singularity_${{ needs.check_docker_files.outputs.image_name_suffix }}:latest
# actual test on a GPU node
srun --nodes=1 --time=01:00:00 -p gpua100 --gres=gpu:1 \
singularity run --nv --bind $PWD/build:/work/build -H /work/build base_${{ matrix.backend.name }}_singularity_${{ needs.check_docker_files.outputs.image_name_suffix }}_latest.sif \
ctest
if: ${{ matrix.backend.name == 'cuda' }}
- name: Run OpenMP tests within Docker image
run: |
docker run -v $PWD/build:/work/build -w /work/build ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend.name }}_${{ needs.check_docker_files.outputs.image_name_suffix }} \
ctest
if: ${{ matrix.backend.name == 'openmp' }}