.github/workflows/llm-binary-build.yml

name: LLM Binary Build

# Cancel previous runs in the PR when you push new commits
# concurrency:
#   group: ${{ github.workflow }}-llm-binary-build-${{ github.event.pull_request.number || github.run_id }}
#   cancel-in-progress: false

permissions:
  contents: read

# Controls when the action will run.
on:
  # Triggers the workflow on push or pull request events but only for the main branch
  # push:
  #   branches: [main]
  #   paths:
  #     - ".github/workflows/llm-binary-build.yml"
  # pull_request:
  #   branches: [main]
  #   paths:
  #     - ".github/workflows/llm-binary-build.yml"
  # workflow_dispatch:
  #   inputs:
  #     llmcpp-ref:
  #       description: 'Ref of llm.cpp code'
  #       default: ''
  #       required: false
  #       type: string
  #     platform:
  #       description: 'Platforms to built on'
  #       default: '["Windows", "Linux"]'
  #       required: false
  #       type: string
  workflow_call:
    inputs:
      llmcpp-ref:
        description: 'Ref of llm.cpp code'
        default: ''
        required: false
        type: string
      platform:
        description: 'Platforms to built on'
        default: 'Windows,Linux'
        required: false
        type: string
    
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
  check-linux-avxvnni-artifact:
    if: ${{contains(inputs.platform, 'Linux')}}
    runs-on: [Shire]
    outputs:
      if-exists: ${{steps.check_artifact.outputs.exists}}
    steps:
      - name: Check if built
        id: check_artifact
        uses: xSAVIKx/artifact-exists-action@v0
        with:
          name: linux-avxvnni

  linux-build-avxvnni:
    runs-on: [self-hosted, AVX2, almalinux8]
    needs: check-linux-avxvnni-artifact
    if: needs.check-linux-avxvnni-artifact.outputs.if-exists == 'false'
    steps:
      - name: Set access token
        run: |
          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
      - name: Install Build Environment
        shell: bash
        run: |
          export http_proxy=${HTTP_PROXY}
          export https_proxy=${HTTPS_PROXY}
          yum install --nogpgcheck -y gcc-toolset-11 cmake git
      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
        with:
          repository: "intel-analytics/llm.cpp"
          ref: ${{ inputs.llmcpp-ref }}
          token: ${{ env.github_access_token }}
          submodules: "recursive"
      - name: Build binary
        shell: bash
        run: |
          scl enable gcc-toolset-11 "cmake -B build"
          scl enable gcc-toolset-11 "cmake --build build --config Release -j"
      - name: Move release binary
        shell: bash
        run: |
          mkdir release
          mv build/main-bloom release/main-bloom
          mv build/libbloom-api.so release/libbloom-api.so
          mv build/quantize-bloom release/quantize-bloom
          mv build/libbloom.so release/libbloom_avxvnni.so
          mv build/main-llama release/main-llama
          mv build/libllama-api.so release/libllama-api.so
          mv build/quantize-llama release/quantize-llama
          mv build/libllama.so release/libllama_avxvnni.so
          mv build/main-gptneox release/main-gptneox
          mv build/libgptneox-api.so release/libgptneox-api.so
          mv build/quantize-gptneox release/quantize-gptneox
          mv build/libgptneox.so release/libgptneox_avxvnni.so
          mv build/main-starcoder release/main-starcoder
          mv build/libstarcoder-api.so release/libstarcoder-api.so
          mv build/quantize-starcoder release/quantize-starcoder
          mv build/libstarcoder.so release/libstarcoder_avxvnni.so
      - name: Archive build files
        uses: actions/upload-artifact@v3
        with:
          name: linux-avxvnni
          path: |
            release
      - name: Clean up test environment
        shell: bash
        run: |
          make clean

  check-linux-avx512-artifact:
    if: ${{contains(inputs.platform, 'Linux')}}
    runs-on: [Shire]
    outputs:
      if-exists: ${{steps.check_artifact.outputs.exists}}
    steps:
      - name: Check if built
        id: check_artifact
        uses: xSAVIKx/artifact-exists-action@v0
        with:
          name: linux-avx512

  linux-build-avx512:
    runs-on: [self-hosted, AVX512, almalinux8]
    needs: check-linux-avx512-artifact
    if: needs.check-linux-avx512-artifact.outputs.if-exists == 'false'
    steps:
      - name: Set access token
        run: |
          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
      - name: Install Build Environment
        shell: bash
        run: |
          export http_proxy=${HTTP_PROXY}
          export https_proxy=${HTTPS_PROXY}
          yum install --nogpgcheck -y gcc-toolset-11 cmake git
          conda remove -n python39 --all -y
          conda create -n python39 python=3.9 -y
      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
        with:
          repository: "intel-analytics/llm.cpp"
          ref: ${{ inputs.llmcpp-ref }}
          token: ${{ env.github_access_token }}
          submodules: "recursive"
      - name: Build avx512 binary
        shell: bash
        run: |
          scl enable gcc-toolset-11 "cmake -DONLYAVX=OFF -DONLYAVX2=OFF -B build"
          scl enable gcc-toolset-11 "cmake --build build --config Release -j"
      - name: Move avx512 release binary
        shell: bash
        run: |
          mkdir avx512_release
          mv build/quantize-bloom avx512_release/quantize-bloom_avx512
          mv build/libbloom.so avx512_release/libbloom_avx512.so
          mv build/quantize-llama avx512_release/quantize-llama_avx512
          mv build/libllama.so avx512_release/libllama_avx512.so
          mv build/quantize-gptneox avx512_release/quantize-gptneox_avx512
          mv build/libgptneox.so avx512_release/libgptneox_avx512.so
          mv build/quantize-starcoder avx512_release/quantize-starcoder_avx512
          mv build/libstarcoder.so avx512_release/libstarcoder_avx512.so
      - name: Build avx2 binary
        shell: bash
        run: |
          scl enable gcc-toolset-11 "cmake -DONLYAVX=OFF -DONLYAVX2=ON -B build"
          scl enable gcc-toolset-11 "cmake --build build --config Release -j"
      - name: Move avx2 release binary
        shell: bash
        run: |
          mkdir avx2_release
          mv build/libbloom.so avx2_release/libbloom_avx2.so
          mv build/libllama.so avx2_release/libllama_avx2.so
          mv build/libgptneox.so avx2_release/libgptneox_avx2.so
          mv build/libstarcoder.so avx2_release/libstarcoder_avx2.so
      - name: Build avx binary
        shell: bash
        run: |
          scl enable gcc-toolset-11 "cmake -DONLYAVX=ON -DONLYAVX2=OFF -B build"
          scl enable gcc-toolset-11 "cmake --build build --config Release -j"
      - name: Move avx release binary
        shell: bash
        run: |
          mkdir avx_release
          mv build/libbloom.so avx_release/libbloom_avx.so
          mv build/libllama.so avx_release/libllama_avx.so
          mv build/libgptneox.so avx_release/libgptneox_avx.so
          mv build/libstarcoder.so avx_release/libstarcoder_avx.so
      - name: Archive avx512 build files
        uses: actions/upload-artifact@v3
        with:
          name: linux-avx512
          path: |
            avx512_release
      - name: Archive avx2 build files
        uses: actions/upload-artifact@v3
        with:
          name: linux-avx2
          path: |
            avx2_release
      - name: Archive avx build files
        uses: actions/upload-artifact@v3
        with:
          name: linux-avx
          path: |
            avx_release
      - name: Clean up test environment
        if: ${{ always() }}
        shell: bash
        run: |
          make clean
          conda remove -n python39 --all -y

  check-linux-amx-artifact:
    if: ${{contains(inputs.platform, 'Linux')}}
    runs-on: [Shire]
    outputs:
      if-exists: ${{steps.check_artifact.outputs.exists}}
    steps:
      - name: Check if built
        id: check_artifact
        uses: xSAVIKx/artifact-exists-action@v0
        with:
          name: linux-amx

  linux-build-amx:
    runs-on: [self-hosted, amx, almalinux8]
    needs: check-linux-amx-artifact
    if: needs.check-linux-amx-artifact.outputs.if-exists == 'false'
    steps:
      - name: Set access token
        run: |
          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
      - name: Install Build Environment
        shell: bash
        run: |
          export http_proxy=${HTTP_PROXY}
          export https_proxy=${HTTPS_PROXY}
          yum install --nogpgcheck -y gcc-toolset-11 cmake git
      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
        with:
          repository: "intel-analytics/llm.cpp"
          ref: ${{ inputs.llmcpp-ref }}
          token: ${{ env.github_access_token }}
          submodules: "recursive"
      - name: Build amx binary
        shell: bash
        run: |
          scl enable gcc-toolset-11 "cmake -DONLYAVX=OFF -DONLYAVX2=OFF -B build"
          scl enable gcc-toolset-11 "cmake --build build --config Release -j"
      - name: Move amx release binary
        shell: bash
        run: |
          mkdir amx_release
          mv build/quantize-bloom amx_release/quantize-bloom_amx
          mv build/libbloom.so amx_release/libbloom_amx.so
          mv build/quantize-llama amx_release/quantize-llama_amx
          mv build/libllama.so amx_release/libllama_amx.so
          mv build/quantize-gptneox amx_release/quantize-gptneox_amx
          mv build/libgptneox.so amx_release/libgptneox_amx.so
          mv build/quantize-starcoder amx_release/quantize-starcoder_amx
          mv build/libstarcoder.so amx_release/libstarcoder_amx.so
      - name: Archive amx build files
        uses: actions/upload-artifact@v3
        with:
          name: linux-amx
          path: |
            amx_release
      - name: Clean up test environment
        shell: bash
        run: |
          make clean
          
  check-windows-avx2-artifact:
    if: ${{contains(inputs.platform, 'Windows')}}
    runs-on: [Shire]
    outputs:
      if-exists: ${{steps.check_artifact.outputs.exists}}
    steps:
      - name: Check if built
        id: check_artifact
        uses: xSAVIKx/artifact-exists-action@v0
        with:
          name: windows-avx2

  windows-build-avx2:
    runs-on: [self-hosted, Windows, AVX-VNNI-Build]
    needs: check-windows-avx2-artifact
    if: needs.check-windows-avx2-artifact.outputs.if-exists == 'false'
    steps:
      - name: Set access token
        run: |
          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $env:GITHUB_ENV
          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN"
      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
        with:
          repository: "intel-analytics/llm.cpp"
          ref: ${{ inputs.llmcpp-ref }}
          token: ${{ env.github_access_token }}
          submodules: "recursive"
      - name: Add msbuild to PATH
        uses: microsoft/setup-msbuild@v1.1
        with:
          msbuild-architecture: x64
      - name: Add cmake to PATH
        uses: ilammy/msvc-dev-cmd@v1
      - name: Build binary
        shell: powershell
        run: |
          cmake .
          cmake --build . --config Release -j
      - name: Archive build files
        uses: actions/upload-artifact@v3
        with:
          name: windows-avx2
          path: |
            build/Release

  check-windows-avx-vnni-artifact:
    if: ${{contains(inputs.platform, 'Windows')}}
    runs-on: [Shire]
    outputs:
      if-exists: ${{steps.check_artifact.outputs.exists}}
    steps:
      - name: Check if built
        id: check_artifact
        uses: xSAVIKx/artifact-exists-action@v0
        with:
          name: windows-avx-vnni

  windows-build-avx-vnni:
    runs-on: [self-hosted, Windows, AVX-VNNI-Build]
    needs: check-windows-avx-vnni-artifact
    if: needs.check-windows-avx-vnni-artifact.outputs.if-exists == 'false'
    steps:
      - name: Set access token
        run: |
          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $env:GITHUB_ENV
          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN"
      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
        with:
          repository: "intel-analytics/llm.cpp"
          ref: ${{ inputs.llmcpp-ref }}
          token: ${{ env.github_access_token }}
          submodules: "recursive"
      - name: Add msbuild to PATH
        uses: microsoft/setup-msbuild@v1.1
        with:
          msbuild-architecture: x64
      - name: Add cmake to PATH
        uses: ilammy/msvc-dev-cmd@v1
      - name: Build binary
        shell: powershell
        run: |
          cmake -DAVXVNNI=ON .
          cmake --build . --config Release -j
      - name: Move release binary
        shell: powershell
        run: |
          if (Test-Path ./release) { rm -r -fo release }
          mkdir release
          # mv build/Release/main-bloom.exe release/main-bloom_vnni.exe
          mv build/Release/quantize-bloom.exe release/quantize-bloom_vnni.exe
          mv build/Release/bloom.dll release/libbloom_vnni.dll

          # mv build/Release/main-llama.exe release/main-llama_vnni.exe
          mv build/Release/quantize-llama.exe release/quantize-llama_vnni.exe
          mv build/Release/llama.dll release/libllama_vnni.dll

          # mv build/Release/main-gptneox.exe release/main-gptneox_vnni.exe
          mv build/Release/quantize-gptneox.exe release/quantize-gptneox_vnni.exe
          mv build/Release/gptneox.dll release/libgptneox_vnni.dll

          # mv build/Release/main-starcoder.exe release/main-starcoder_vnni.exe
          mv build/Release/quantize-starcoder.exe release/quantize-starcoder_vnni.exe
          mv build/Release/starcoder.dll release/libstarcoder_vnni.dll
      - name: Archive build files
        uses: actions/upload-artifact@v3
        with:
          name: windows-avx-vnni
          path: |
            release

  check-windows-avx-artifact:
    if: ${{contains(inputs.platform, 'Windows')}}
    runs-on: [Shire]
    outputs:
      if-exists: ${{steps.check_artifact.outputs.exists}}
    steps:
      - name: Check if built
        id: check_artifact
        uses: xSAVIKx/artifact-exists-action@v0
        with:
          name: windows-avx

  windows-build-avx:
    runs-on: [self-hosted, Windows, AVX-VNNI-Build]
    needs: check-windows-avx-artifact
    if: needs.check-windows-avx-artifact.outputs.if-exists == 'false'
    steps:
      - name: Set access token
        run: |
          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $env:GITHUB_ENV
          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN"
      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
        with:
          repository: "intel-analytics/llm.cpp"
          ref: ${{ inputs.llmcpp-ref }}
          token: ${{ env.github_access_token }}
          submodules: "recursive"
      - name: Add msbuild to PATH
        uses: microsoft/setup-msbuild@v1.1
        with:
          msbuild-architecture: x64
      - name: Add cmake to PATH
        uses: ilammy/msvc-dev-cmd@v1
      - name: Build binary
        shell: powershell
        run: |
          cmake -DONLYAVX=ON .
          cmake --build . --config Release -j
      - name: Move release binary
        shell: powershell
        run: |
          if (Test-Path ./release) { rm -r -fo release }
          mkdir release
          mv build/Release/bloom.dll release/libbloom_avx.dll

          mv build/Release/llama.dll release/libllama_avx.dll

          mv build/Release/gptneox.dll release/libgptneox_avx.dll

          mv build/Release/starcoder.dll release/libstarcoder_avx.dll
      - name: Archive build files
        uses: actions/upload-artifact@v3
        with:
          name: windows-avx
          path: |
            release

  check-windows-npu-level0-artifact:
    if: ${{contains(inputs.platform, 'Windows')}}
    runs-on: [Shire]
    outputs:
      if-exists: ${{steps.check_artifact.outputs.exists}}
    steps:
      - name: Check if built
        id: check_artifact
        uses: xSAVIKx/artifact-exists-action@v0
        with:
          name: windows-npu-level0

  windows-build-npu-level0:
    runs-on: [self-hosted, Windows, npu-level0]
    needs: check-windows-npu-level0-artifact
    if: needs.check-windows-npu-level0-artifact.outputs.if-exists == 'false'
    steps:
      - name: Set access token
        run: |
          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN" >> $env:GITHUB_ENV
          echo "github_access_token=$env:GITHUB_ACCESS_TOKEN"
      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
        with:
          repository: "intel-analytics/llm.cpp"
          ref: ${{ inputs.llmcpp-ref }}
          token: ${{ env.github_access_token }}
          submodules: "recursive"
      - name: Add msbuild to PATH
        uses: microsoft/setup-msbuild@v1.1
        with:
          msbuild-architecture: x64
      - name: Add cmake to PATH
        uses: ilammy/msvc-dev-cmd@v1
      - name: Build binary
        shell: powershell
        run: |
          cd bigdl-core-npu-level0
          mkdir build
          cd build
          cmake ..
          cmake --build . --config Release -t pipeline
      - name: Move release binary
        shell: powershell
        run: |
          cd bigdl-core-npu-level0
          if (Test-Path ./release) { rm -r -fo release }
          mkdir release
          mv build/Release/pipeline.dll release/pipeline.dll
      - name: Archive build files
        uses: actions/upload-artifact@v3
        with:
          name: windows-npu-level0
          path: |
            bigdl-core-npu-level0/release


  # to make llm-binary-build optionally skippable
  dummy-step:
    if: ${{ inputs.platform == 'Dummy' }}
    runs-on: ubuntu-latest
    steps:
      - name: dummy echo
        run: |
          echo "dummy step"