Skip to content

Commit

Permalink
Fix GPTQ CI (#1878)
Browse files Browse the repository at this point in the history
* fix gptq tests

* simplify ci

* enable determinism

* fix

* add more expected outputs

* last one

* hopefully

* more expected putputs with each run

* new one

* add evaluation

* fix

* remove gptq extra

* style check
  • Loading branch information
IlyasMoutawwakil authored Jun 28, 2024
1 parent 291f535 commit 9c882fd
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 185 deletions.
56 changes: 23 additions & 33 deletions .github/workflows/check_code_quality.yml
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
name: check_code_quality
name: Code Quality

on:
push:
branches: [ main ]
paths:
- "optimum/**.py"
- "tests/**.py"
- "examples/**.py"
branches: [main]

pull_request:
branches: [ main ]
paths:
- "optimum/**.py"
- "tests/**.py"
- "examples/**.py"
branches: [main]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
Expand All @@ -29,25 +21,23 @@ jobs:

runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Create and start a virtual environment
run: |
python -m venv venv
source venv/bin/activate
- name: Install dependencies
run: |
source venv/bin/activate
pip install --upgrade pip
pip install .[quality]
- name: Check style with black
run: |
source venv/bin/activate
black --check .
- name: Check style with ruff
run: |
source venv/bin/activate
ruff .
- name: Checkout code
uses: actions/checkout@v4

- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
pip install --upgrade pip
pip install .[quality]
- name: Check style with black
run: |
black --check .
- name: Check style with ruff
run: |
ruff .
59 changes: 38 additions & 21 deletions .github/workflows/test_gptq.yml
Original file line number Diff line number Diff line change
@@ -1,29 +1,46 @@
name: GPTQ Quantization / Test GPU
name: GPTQ / Python - Test

on:
workflow_dispatch:
schedule:
- cron: 0 1 */3 * * # at 1am every 3 days
push:
branches: [main]
paths:
- tests/gptq/**
- optimum/gptq/**
- .github/workflows/test_gptq.yml
pull_request:
types: [opened, synchronize, reopened, labeled]
# uncomment to enable on PR merge on main branch:
#push:
# branches:
# - main
branches: [main]
paths:
- tests/gptq/**
- optimum/gptq/**
- .github/workflows/test_gptq.yml
schedule:
# every day at midnight
- cron: "0 0 * * *"

jobs:
do-the-job:
if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }}
name: Start self-hosted EC2 runner
test_gptq:
runs-on: [single-gpu, nvidia-gpu, t4, ci]
env:
AWS_REGION: us-east-1

steps:
- name: Checkout
uses: actions/checkout@v2
- name: Build image
run: |
docker build -f tests/gptq/Dockerfile_quantization_gpu -t gptq-gpu .
- name: Test with unittest within docker container
run: |
docker run --rm --gpus all -v $(pwd)/hf_cache:/root/.cache/huggingface --workdir=/workspace/optimum/tests gptq-gpu:latest
- name: Checkout code
uses: actions/checkout@v4

- name: Run tests
uses: addnab/docker-run-action@v3
with:
image: pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime
# latest auto-gptq was built with pytorch 2.2 and cuda 12.1
options: |
--rm
--gpus all
--shm-size 16G
--env RUN_SLOW=1
--env HF_HOME=/mnt/cache/
--volume /mnt/cache/:/mnt/cache/
--volume ${{ github.workspace }}:/workspace
--workdir /workspace
run: |
pip install auto-gptq
pip install -e .[tests]
pytest tests/gptq -s -vvvv --durations=0
41 changes: 41 additions & 0 deletions optimum/gptq/eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import torch
import torch.nn as nn
from datasets import load_dataset
from tqdm import tqdm


def evaluate_perplexity(model, tokenizer):
def _perplexity(nlls, n_samples, seqlen):
return torch.exp(torch.stack(nlls).sum() / (n_samples * seqlen))

# load and prepare dataset
data = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
data = tokenizer("\n\n".join(data["text"]), return_tensors="pt")
data = data.input_ids.to(model.device)

seqlen = 512
model = model.eval()
n_samples = data.numel() // seqlen

nlls = []

with tqdm(range(n_samples), desc="Perplexity -") as progress_bar:
for i in progress_bar:
start_index = i * seqlen
end_index = (i + 1) * seqlen
batch = data[:, start_index:end_index].to(model.device)
with torch.no_grad():
logits = model(batch).logits
shift_logits = logits[:, :-1, :].contiguous().float()
shift_labels = data[:, start_index:end_index][:, 1:]
loss_fct = nn.CrossEntropyLoss()
loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
neg_log_likelihood = loss.float() * seqlen
nlls.append(neg_log_likelihood)

curr_ppl = _perplexity(nlls, i + 1, seqlen)
progress_bar.set_description(f"Perplexity {curr_ppl:.3f}")

ppl = _perplexity(nlls, n_samples, seqlen)

return ppl.item()
26 changes: 0 additions & 26 deletions tests/gptq/Dockerfile_quantization_gpu

This file was deleted.

Loading

0 comments on commit 9c882fd

Please sign in to comment.