Skip to content

Commit

Permalink
support different network interface tests
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexCheema committed Dec 17, 2024
1 parent 2f0b543 commit 023ddc2
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 14 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/bench_job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ on:
calling_job_name:
required: true
type: string
network_interface:
required: true
type: string
jobs:
generate-matrix:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -122,7 +125,7 @@ jobs:
sudo taskpolicy -d default -g default -a -t 0 -l 0 .venv/bin/exo \
--node-id="${MY_NODE_ID}" \
--node-id-filter="${ALL_NODE_IDS}" \
--interface-type-filter="Ethernet" \
--interface-type-filter="${{ inputs.network_interface }}" \
--disable-tui \
--max-generate-tokens 250 \
--chatgpt-api-port 52415 > output1.log 2>&1 &
Expand Down
40 changes: 27 additions & 13 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ jobs:
config: '{"M4PRO_GPU16_24GB": 1}'
model: ${{ matrix.model }}
calling_job_name: 'single-m4-pro'
network_interface: 'Ethernet'
secrets: inherit

two-m4-pro-cluster:
Expand All @@ -28,30 +29,43 @@ jobs:
config: '{"M4PRO_GPU16_24GB": 2}'
model: ${{ matrix.model }}
calling_job_name: 'two-m4-pro-cluster'
network_interface: 'Ethernet'
secrets: inherit

# two-m4-pro-cluster-thunderbolt:
# strategy:
# matrix:
# model: ['llama-3.2-1b', 'llama-3.2-3b', 'llama-3.1-8b']
# uses: ./.github/workflows/bench_job.yml
# with:
# config: '{"M4PRO_GPU16_24GB": 2}'
# model: ${{ matrix.model }}
# calling_job_name: 'two-m4-pro-cluster-thunderbolt'
# network_interface: 'Thunderbolt'
# secrets: inherit

three-m4-pro-cluster:
strategy:
matrix:
model: ['llama-3.2-1b', 'llama-3.2-3b', 'llama-3.1-8b', 'llama-3.3-70b']
# Optional: add fail-fast: false if you want all matrix jobs to continue even if one fails
fail-fast: false
uses: ./.github/workflows/bench_job.yml
with:
config: '{"M4PRO_GPU16_24GB": 3}'
model: ${{ matrix.model }}
calling_job_name: 'three-m4-pro-cluster'
network_interface: 'Ethernet'
secrets: inherit

# test-m3-single-node:
# strategy:
# matrix:
# model: ['llama-3.2-1b']
# # Optional: add fail-fast: false if you want all matrix jobs to continue even if one fails
# fail-fast: false
# uses: ./.github/workflows/bench_job.yml
# with:
# config: '{"M3MAX_GPU40_128GB": 1}'
# model: ${{ matrix.model }}
# calling_job_name: 'test-m3-cluster'
# secrets: inherit
test-m3-single-node:
strategy:
matrix:
model: ['llama-3.2-1b']
fail-fast: false
uses: ./.github/workflows/bench_job.yml
with:
config: '{"M3MAX_GPU40_128GB": 1}'
model: ${{ matrix.model }}
calling_job_name: 'test-m3-cluster'
network_interface: 'Ethernet'
secrets: inherit

0 comments on commit 023ddc2

Please sign in to comment.