add --generate option to upload best.json to s3 with best benchmark r… #112
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build and Test | |
on: | |
push: | |
branches: [ '*' ] | |
tags: [ '*' ] | |
pull_request: | |
branches: [ '*' ] | |
env: | |
PYTHON_VERSION: "3.12" | |
TOKENIZERS_PARALLELISM: "false" | |
PYTHONPATH: "." | |
jobs: | |
check_line_count: | |
runs-on: depot-ubuntu-22.04-4 | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install tabulate | |
- name: Run line count check | |
run: | | |
if [[ -n "${{ github.event.pull_request }}" ]]; then | |
git fetch origin ${{ github.base_ref }} | |
git clone -b ${{ github.base_ref }} --single-branch \ | |
https://github.com/${{ github.repository }}.git base_branch | |
python extra/line_counter.py base_branch . | |
else | |
python extra/line_counter.py . | |
fi | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: line-count-results | |
path: | | |
line-count-snapshot.json | |
line-count-diff.json | |
unit_test: | |
runs-on: depot-macos-latest | |
timeout-minutes: 20 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
# - name: Cache python packages | |
# uses: actions/cache@v4 | |
# with: | |
# path: ${{ env.Python3_ROOT_DIR }}/lib/python3.12/site-packages | |
# key: testing-packages-${{ hashFiles('**/setup.py') }} | |
- name: Install dependencies | |
run: | | |
python -m venv env | |
source env/bin/activate | |
pip install --upgrade pip | |
pip install llvmlite | |
pip install . | |
- name: Basic import test | |
run: | | |
source env/bin/activate | |
python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))" | |
- name: Run tests | |
run: | | |
source env/bin/activate | |
METAL_DEVICE_WRAPPER_TYPE=1 METAL_DEBUG_ERROR_MODE=1 METAL_XCODE=1 TEMPERATURE=0 python3 -m exo.inference.test_inference_engine | |
python3 ./test/test_tokenizers.py | |
python3 ./test/test_model_helpers.py | |
discovery_integration_test: | |
runs-on: depot-ubuntu-22.04-4 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install dependencies | |
run: | | |
python -m venv env | |
source env/bin/activate | |
pip install --upgrade pip | |
pip install . | |
- name: Run discovery integration test | |
run: | | |
source env/bin/activate | |
DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --disable-tui > output1.log 2>&1 & | |
PID1=$! | |
DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --disable-tui > output2.log 2>&1 & | |
PID2=$! | |
sleep 10 | |
kill $PID1 $PID2 | |
if grep -q "Peer statuses: {.*'node2': 'is_connected=True, health_check=True" output1.log && \ | |
! grep -q "Failed to connect peers:" output1.log && \ | |
grep -q "Peer statuses: {.*'node1': 'is_connected=True, health_check=True" output2.log && \ | |
! grep -q "Failed to connect peers:" output2.log; then | |
echo "Test passed: Both instances discovered each other" | |
exit 0 | |
else | |
echo "Test failed: Devices did not discover each other" | |
echo "Output of first instance:" | |
cat output1.log | |
echo "Output of second instance:" | |
cat output2.log | |
exit 1 | |
fi | |
chatgpt_api_tests: | |
runs-on: ${{ (matrix.inference_engine == 'tinygrad' || matrix.inference_engine == 'dummy') && 'depot-ubuntu-22.04-4' || 'depot-macos-latest' }} | |
strategy: | |
matrix: | |
inference_engine: [mlx, tinygrad, dummy] | |
include: | |
- inference_engine: mlx | |
model_id: llama-3.2-1b | |
prompt: "Keep responses concise. Who was the king of pop?" | |
expected_output: "Michael Jackson" | |
- inference_engine: tinygrad | |
model_id: llama-3.2-1b | |
prompt: "Keep responses concise. Who was the king of pop?" | |
expected_output: "Michael Jackson" | |
- inference_engine: dummy | |
model_id: dummy | |
prompt: "Dummy prompt." | |
expected_output: "dummy" | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install dependencies | |
run: | | |
python -m venv env | |
source env/bin/activate | |
pip install --upgrade pip | |
pip install . | |
if [ "${{ matrix.inference_engine }}" = "tinygrad" ]; then | |
pip install llvmlite | |
fi | |
- name: Run ChatGPT API test | |
env: | |
TOKENIZERS_PARALLELISM: ${{ matrix.inference_engine == 'tinygrad' && 'true' || 'false' }} | |
SUPPORT_BF16: '0' | |
CLANG: ${{ matrix.inference_engine == 'tinygrad' && '1' || '0' }} | |
METAL_DEBUG_ERROR_MODE: '0' | |
METAL_DEVICE_WRAPPER_TYPE: '1' | |
METAL_XCODE: '1' | |
run: | | |
source env/bin/activate | |
# Start first instance | |
HF_HOME="$(pwd)/.hf_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine ${{ matrix.inference_engine }} \ | |
--node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 \ | |
--chatgpt-api-response-timeout 900 --disable-tui > output1.log & | |
PID1=$! | |
tail -f output1.log & | |
TAIL1=$! | |
# Start second instance | |
HF_HOME="$(pwd)/.hf_cache_node2" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine ${{ matrix.inference_engine }} \ | |
--node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 \ | |
--chatgpt-api-response-timeout 900 --disable-tui > output2.log & | |
PID2=$! | |
tail -f output2.log & | |
TAIL2=$! | |
# Remember to kill the tail processes at the end | |
trap 'kill $TAIL1 $TAIL2' EXIT | |
# Wait for discovery and verify peer connections | |
sleep 10 | |
if ! grep -q "Peer statuses: {.*'node2': 'is_connected=True, health_check=True" output1.log || \ | |
grep -q "Failed to connect peers:" output1.log || \ | |
! grep -q "Peer statuses: {.*'node1': 'is_connected=True, health_check=True" output2.log || \ | |
grep -q "Failed to connect peers:" output2.log; then | |
echo "Test failed: Nodes did not discover each other properly" | |
echo "Output of first instance:" | |
cat output1.log | |
echo "Output of second instance:" | |
cat output2.log | |
exit 1 | |
fi | |
echo "Peer discovery successful" | |
# Function to check if processes are still running | |
check_processes() { | |
if ! kill -0 $PID1 2>/dev/null; then | |
echo "First instance (PID $PID1) died unexpectedly. Log output:" | |
cat output1.log | |
exit 1 | |
fi | |
if ! kill -0 $PID2 2>/dev/null; then | |
echo "Second instance (PID $PID2) died unexpectedly. Log output:" | |
cat output2.log | |
exit 1 | |
fi | |
} | |
# Check processes before proceeding | |
check_processes | |
echo "Sending request to first instance..." | |
response_1=$(curl -s http://localhost:8000/v1/chat/completions \ | |
-H "Content-Type: application/json" \ | |
-d '{ | |
"model": "${{ matrix.model_id }}", | |
"messages": [{"role": "user", "content": "${{ matrix.prompt }}"}], | |
"temperature": 0.7 | |
}') | |
echo "Response 1: $response_1" | |
# Check processes after first response | |
check_processes | |
echo "Sending request to second instance..." | |
response_2=$(curl -s http://localhost:8001/v1/chat/completions \ | |
-H "Content-Type: application/json" \ | |
-d '{ | |
"model": "${{ matrix.model_id }}", | |
"messages": [{"role": "user", "content": "${{ matrix.prompt }}"}], | |
"temperature": 0.7 | |
}') | |
echo "Response 2: $response_2" | |
# Check processes after second response | |
check_processes | |
# Stop both instances | |
kill $PID1 $PID2 | |
echo "" | |
# Extract content using jq and check if it contains expected output | |
content1=$(echo "$response_1" | jq -r '.choices[0].message.content') | |
content2=$(echo "$response_2" | jq -r '.choices[0].message.content') | |
if [[ "$content1" != *"${{ matrix.expected_output }}"* ]] || [[ "$content2" != *"${{ matrix.expected_output }}"* ]]; then | |
echo "Test failed: Response does not match '${{ matrix.expected_output }}'" | |
echo "Response 1 content: $content1" | |
echo "" | |
echo "Response 2 content: $content2" | |
echo "Output of first instance:" | |
cat output1.log | |
echo "Output of second instance:" | |
cat output2.log | |
exit 1 | |
else | |
echo "Test passed: Response from both nodes matches '${{ matrix.expected_output }}'" | |
fi | |
measure_pip_sizes: | |
runs-on: depot-macos-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install dependencies and measure sizes | |
run: | | |
python -m venv env | |
source env/bin/activate | |
pip install --upgrade pip | |
pip install . | |
python ./extra/pipsize.py --json ./pipsize.json | |
- name: Upload pip sizes artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: pip-sizes | |
path: ./pipsize.json |