.github/workflows/celerity_ci.yml

name: Celerity CI

on:
  push:
  # We perform CI on "push" only on master, otherwise it is redundant with the "pull_request" trigger
    branches:
      - master
  pull_request:
  workflow_dispatch:
    inputs:
      test-head:
        description: "Test against 'HEAD' revisions"
        type: boolean
        required: true
        default: false
      tag-latest:
        description: "Tag 'HEAD' revisions as 'latest' if successful"
        type: boolean
        required: true
        default: false
  # We use nightly builds to determine whether CI passes for "HEAD" revisions
  # of DPC++ and AdaptiveCpp. If so, these revisions are tagged as "latest" and
  # used for all subsequent CI runs.
  schedule:
    # Every night at 05:00 UTC
    - cron: "0 5 * * *"

jobs:
  # Run Clang-Tidy checks
  #
  # Note: This action currently only supports pull_request triggers (as it creates review comments)
  #
  # TODO: This should be combined with the report (or "lint") step, really
  clang-tidy:
    if: github.event.pull_request
    runs-on: [ self-hosted, slurm-intel ]
    env:
      container-workspace: <placeholder>
      build-dir: /root/build
    container:
      image: ghcr.io/celerity/celerity-lint
      volumes:
        - ccache:/ccache
      credentials:
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
    steps:
      # Here and in jobs below: We need to manually set the container workspace
      # path as an environment variable, as (curiously) the `github.workspace` context
      # variable contains the path on the container host (but $GITHUB_WORKSPACE is correct).
      - name: Set container workspace environment variable
        run: echo "container-workspace=$GITHUB_WORKSPACE" > $GITHUB_ENV
      - uses: actions/checkout@v4
        with:
          submodules: true
      - name: Fetch base branch
        run: git fetch --no-tags --no-recurse-submodules origin "${{ github.event.pull_request.base.ref }}"
      # We only want to configure CMake, so we build the "help" target,
      # which doesn't actually do anything (other than print all targets).
      - name: Configure CMake
        run: bash -o pipefail -c "bash /root/build-celerity.sh ${{ env.container-workspace }} --build-type Debug --target help"
      - name: Run clang-tidy
        continue-on-error: true # clang-tidy-diff.py returns 1 if there are any errors; continue in that case (if something actually went wrong, there won't be a fixes file)
        # -j48 for gpuc5
        run: git diff -U0 origin/${{ github.event.pull_request.base.ref }} | clang-tidy-diff.py -j48 -p1 -path ${{ env.build-dir }} -export-fixes clang-tidy-fixes.yml
        shell: bash # enables -o pipefail
      - name: Create clang-tidy report comments
        uses: platisd/clang-tidy-pr-comments@v1
        with:
          python_path: python3 # Use our own Python installation
          github_token: ${{ secrets.GITHUB_TOKEN }}
          clang_tidy_fixes: "${{ env.container-workspace }}/clang-tidy-fixes.yml"
          repo_path_prefix: "/__w"

  # We need to jump through some hoops to have different build matrices based on what triggered the workflow.
  # For normal CI runs we want to build and test against everything except the "HEAD" revisions, whereas during
  # nightly builds we *only* want those.
  #
  # Current workaround is to represent the matrix as a JSON object, which is then deserialized in the next job.
  read-build-matrix:
    runs-on: self-hosted
    outputs:
      matrix: ${{ steps.read-json-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
      - id: read-json-matrix
        name: Read build matrix from file
        shell: python
        run: |
          import json
          import os
          with open("${{ github.workspace }}/.github/workflows/build_matrix.json") as f:
            matrices = json.load(f)
            if '${{ github.event_name != 'schedule' && inputs.test-head == false }}' == 'true':
              matrix = matrices['default']
            else:
              matrix = matrices['nightly']
            with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
              print('matrix={ "include":%s }' % json.dumps(matrix), file=fh)

  build-and-test:
    needs: read-build-matrix
    runs-on: [ self-hosted, "slurm-${{ matrix.platform }}" ]
    strategy:
      fail-fast: false
      matrix: ${{ fromJSON(needs.read-build-matrix.outputs.matrix) }}
    # These outputs are required by the image tagging step, only set during nightly builds.
    outputs:
      dpcpp-HEAD-Debug-works: ${{ steps.set-head-results.outputs.dpcpp-HEAD-Debug-works }}
      dpcpp-HEAD-Release-works: ${{ steps.set-head-results.outputs.dpcpp-HEAD-Release-works }}
      dpcpp-HEAD-ubuntu-version: ${{ steps.set-head-results.outputs.dpcpp-HEAD-ubuntu-version }}
      acpp-HEAD-Debug-works: ${{ steps.set-head-results.outputs.acpp-HEAD-Debug-works }}
      acpp-HEAD-Release-works: ${{ steps.set-head-results.outputs.acpp-HEAD-Release-works }}
      acpp-HEAD-ubuntu-version: ${{ steps.set-head-results.outputs.acpp-HEAD-ubuntu-version }}
    env:
      build-name: ${{ matrix.platform }}-ubuntu${{ matrix.ubuntu-version }}-${{ matrix.sycl }}-${{ matrix.sycl-version }}-${{ matrix.build-type }}
      container-workspace: <placeholder>
      build-dir: /root/build
      examples-build-dir: /root/build-examples
    container:
      image: ghcr.io/celerity/celerity-build/${{ matrix.sycl }}:ubuntu${{ matrix.ubuntu-version }}-${{ matrix.sycl-version }}
      volumes:
        - ccache:/ccache
      credentials:
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
    steps:
      # We limit DPC++ to Level-Zero devices (instead of e.g. also showing OpenCL or CUDA devices).
      # Most importantly, despite its naming, this currently also avoids the "Unified Runtime over Level-Zero", which
      # doesn't seem to be quite ready for production use yet (= it crashes).
      - name: Set environment variables
        run: |
          echo "container-workspace=$GITHUB_WORKSPACE" > $GITHUB_ENV
          echo "ONEAPI_DEVICE_SELECTOR=level_zero:*" >> $GITHUB_ENV
          echo "SIMSYCL_SYSTEM=$GITHUB_WORKSPACE/ci/simsycl-system.json" >> $GITHUB_ENV
      - name: Print exact SYCL revision used for this CI run
        run: cat /VERSION
      - uses: actions/checkout@v4
        with:
          submodules: true
      # In SimSYCL images, /root/celerity-options.sh automatically enables coverage for Debug builds, and Tracy support for Release builds
      - name: Build and install Celerity
        run: bash -o pipefail -c "bash /root/build-celerity.sh ${{ env.container-workspace }} --build-type ${{ matrix.build-type }} --mpi ${{ matrix.mpi }} --target install 2>&1 | tee ${{ env.build-name }}.log"
      # Upload build log for report step
      - name: Upload build logs
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: ${{ env.build-name }}
          path: ${{ env.build-name }}.log
      - name: Build examples against installed Celerity
        run: bash /root/build-examples.sh ${{ env.container-workspace }}/examples --build-type ${{ matrix.build-type }}
      - name: Reset coverage counters
        if: matrix.build-type == 'Debug' && matrix.sycl == 'simsycl' # celerity-build/simsycl image enables --coverage for Debug builds
        run: fastcov -z -d "${{ env.build-dir }}"
      # For a run with ASan, we need to fake dlclose() so all shared libraries are still mapped when leaks are evaluated
      # - otherwise, we will see "<unknown module>" instead of a proper trace if it originated in an unloaded library
      # (see https://github.com/google/sanitizers/issues/89#issuecomment-406316683). LD_PRELOAD will be set to CI_LD_PRELOAD
      # by /root/capture-backtrace.sh (otherwise we would also preload for other processes like bash, which we don't want to do).
      # Also, hwloc (used by OpenMPI) will attempt to dlopen(RTLD_DEEPBIND) for GPU backend discovery which is incompatible
      # with sanitizers but also irrelevant for a SimSYCL build, so we set HWLOC_COMPONENTS accordingly.
      - name: Prepare Sanitizer run
        if: matrix.sycl == 'simsycl' && matrix.build-type == 'Debug'
        run: |
          echo "ASAN_OPTIONS=detect_leaks=1" >> $GITHUB_ENV
          echo "LSAN_OPTIONS=suppressions=$GITHUB_WORKSPACE/ci/lsan.suppressions" >> $GITHUB_ENV
          echo "int dlclose(void *p) { return 0; }" | cc -x c -shared -o libfake_dlclose.so -
          echo "CI_LD_PRELOAD=$(echo /usr/lib/x86_64-linux-gnu/libasan.so.?):$(pwd)/libfake_dlclose.so" >> $GITHUB_ENV
          echo "HWLOC_COMPONENTS=-cuda,-opencl,-rsmi" >> $GITHUB_ENV
      - name: Run unit tests
        timeout-minutes: 5
        working-directory: ${{ env.build-dir }}
        run: ${{ env.container-workspace }}/ci/run-unit-tests.sh
      - name: Run examples (single-node)
        timeout-minutes: 10
        # We build examples twice, but only run the installed version (which probably has more failure modes)
        working-directory: ${{ env.examples-build-dir }}
        run: ${{ env.container-workspace }}/ci/run-examples.sh /data/Lenna.png 1
      - name: Run examples (multi-node)
        if: ${{ matrix.mpi }}
        timeout-minutes: 10
        working-directory: ${{ env.examples-build-dir }}
        run: ${{ env.container-workspace }}/ci/run-examples.sh /data/Lenna.png 2 4
      - name: Run debugging tests
        if: matrix.build-type == 'Debug' && matrix.sycl != 'dpcpp' # newer DPC++ generates DWARF5 which is incompatible with Ubuntu 20.04's GDB
        run: ${{ env.container-workspace }}/test/debug/pretty-print-test.py ${{ env.build-dir }}/test/debug/pretty_printables
      - name: Run system tests
        if: ${{ matrix.mpi }}
        working-directory: ${{ env.build-dir }}
        run: ${{ env.container-workspace }}/ci/run-system-tests.sh 2 4
      - name: Run integration tests
        working-directory: ${{ env.build-dir }}
        run: ${{ env.container-workspace }}/test/integration/run-integration-tests.py . ${{ matrix.platform }}
      - name: Upload stack traces (if any)
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: ${{ env.build-name }}
          path: |
            ${{ env.build-dir }}/*.trace
            ${{ env.examples-build-dir }}/*.trace
          if-no-files-found: ignore
      - id: set-head-results
        name: Set outputs for HEAD builds
        if: matrix.sycl-version == 'HEAD'
        run: |
          echo "${{ matrix.sycl }}-HEAD-${{ matrix.build-type }}-works=1" >> "$GITHUB_OUTPUT"
          echo "${{ matrix.sycl }}-HEAD-ubuntu-version=${{ matrix.ubuntu-version }}" >> "$GITHUB_OUTPUT"
      - name: Collect & report coverage
        if: matrix.build-type == 'Debug' && matrix.sycl == 'simsycl' # celerity-build/simsycl image enables --coverage for Debug builds
        env:
          COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }}
        run: |
          "${{ env.container-workspace }}/ci/capture-coverage.sh" "${{ env.container-workspace }}" "${{ env.build-dir }}" -o lcov.info
          coveralls report lcov.info

  # Tag "HEAD" images that built and tested successfully as "latest".
  # This is only done for nightly builds (or when specifying the "tag-latest" option on manually triggered runs).
  tag-latest-containers:
    needs: build-and-test
    # Run this step regardless of result of `build-and-test` (hence the `always()`),
    # since we always want to tag images that were successful, even if others weren't.
    if: always() && (github.event_name == 'schedule' || (inputs.test-head && inputs.tag-latest))
    runs-on: slurm-${{ matrix.platform }}
    strategy:
      fail-fast: false
      matrix:
        include:
          - sycl: "dpcpp"
            platform: "intel"
          - sycl: "acpp"
            platform: "nvidia"
    env:
      image-basename-dpcpp: ghcr.io/celerity/celerity-build/dpcpp:ubuntu${{ needs.build-and-test.outputs.dpcpp-HEAD-ubuntu-version }}
      image-basename-acpp: ghcr.io/celerity/celerity-build/acpp:ubuntu${{ needs.build-and-test.outputs.acpp-HEAD-ubuntu-version }}
    permissions:
      packages: write
    steps:
      - name: Log into Container registry
        uses: docker/login-action@v3
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - if: matrix.sycl == 'dpcpp'
        run: |
          if [[ "${{ needs.build-and-test.outputs.dpcpp-HEAD-Debug-works }}" -eq 1 ]] && [[ "${{ needs.build-and-test.outputs.dpcpp-HEAD-Release-works }}" -eq 1 ]]; then
            docker tag ${{ env.image-basename-dpcpp }}-HEAD ${{ env.image-basename-dpcpp }}-latest
            docker push ${{ env.image-basename-dpcpp }}-latest
          else
            exit 1
          fi
      - if: matrix.sycl == 'acpp'
        run: |
          if [[ "${{ needs.build-and-test.outputs.acpp-HEAD-Debug-works }}" -eq 1 ]] && [[ "${{ needs.build-and-test.outputs.acpp-HEAD-Release-works }}" -eq 1 ]]; then
            docker tag ${{ env.image-basename-acpp }}-HEAD ${{ env.image-basename-acpp }}-latest
            docker push ${{ env.image-basename-acpp }}-latest
          else
            exit 1
          fi

  report:
    needs: [build-and-test]
    runs-on: [ self-hosted, slurm ]
    env:
      container-workspace: <placeholder>
    container:
      image: ghcr.io/celerity/celerity-lint
      credentials:
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
    steps:
      - name: Set container workspace environment variable
        run: echo "container-workspace=$GITHUB_WORKSPACE" > $GITHUB_ENV
      - uses: actions/checkout@v4
      - name: Check code formatting
        id: formatting
        working-directory: ${{ env.container-workspace }}
        shell: bash
        run: |
          unformatted=$("./ci/find-unformatted-files.sh")
          {
            echo 'unformatted-files<<EOF'
            echo "$unformatted"
            echo EOF
          } >> "$GITHUB_OUTPUT"
      - uses: "celerity/ci-report-action@v8"
        with:
          gh-token: ${{ secrets.GITHUB_TOKEN }}
          unformatted-files: ${{ steps.formatting.outputs.unformatted-files }}