Skip to content

Commit

Permalink
Add support for GPTQ Marlin
Browse files Browse the repository at this point in the history
  • Loading branch information
danieldk committed Jun 11, 2024
1 parent 85dfc39 commit 6b68dbd
Show file tree
Hide file tree
Showing 39 changed files with 4,596 additions and 98 deletions.
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,9 @@ RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-eetq
# Build marlin kernels
FROM kernel-builder as marlin-kernels-builder
WORKDIR /usr/src
COPY server/Makefile-marlin Makefile
COPY server/marlin/ .
# Build specific version of transformers
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-marlin
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build

# Build Transformers CUDA kernels
FROM kernel-builder as custom-kernels-builder
Expand Down Expand Up @@ -213,7 +213,7 @@ COPY --from=awq-kernels-builder /usr/src/llm-awq/awq/kernels/build/lib.linux-x86
# Copy build artifacts from eetq kernels builder
COPY --from=eetq-kernels-builder /usr/src/eetq/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
# Copy build artifacts from marlin kernels builder
COPY --from=marlin-kernels-builder /usr/src/marlin/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
COPY --from=marlin-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages

# Copy builds artifacts from vllm builder
COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2323,
"logprob": null,
"text": "Test"
},
{
"id": 1715,
"logprob": -11.75,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 311,
"logprob": -2.8261719,
"special": false,
"text": " to"
},
{
"id": 279,
"logprob": -2.0507812,
"special": false,
"text": " the"
},
{
"id": 3622,
"logprob": -2.2832031,
"special": false,
"text": " server"
},
{
"id": 13,
"logprob": -1.7314453,
"special": false,
"text": "."
},
{
"id": 578,
"logprob": -1.5800781,
"special": false,
"text": " The"
},
{
"id": 1715,
"logprob": -1.3330078,
"special": false,
"text": " request"
},
{
"id": 374,
"logprob": -1.4277344,
"special": false,
"text": " is"
},
{
"id": 3288,
"logprob": -1.8115234,
"special": false,
"text": " sent"
},
{
"id": 311,
"logprob": -1.1923828,
"special": false,
"text": " to"
},
{
"id": 279,
"logprob": -0.06713867,
"special": false,
"text": " the"
}
],
"top_tokens": null
},
"generated_text": " to the server. The request is sent to the"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2323,
"logprob": null,
"text": "Test"
},
{
"id": 1715,
"logprob": -11.75,
"text": " request"
}
],
"seed": 0,
"tokens": [
{
"id": 13,
"logprob": -2.2363281,
"special": false,
"text": "."
},
{
"id": 578,
"logprob": -0.10021973,
"special": false,
"text": " The"
},
{
"id": 3016,
"logprob": -1.6083984,
"special": false,
"text": " client"
},
{
"id": 649,
"logprob": -0.95751953,
"special": false,
"text": " can"
},
{
"id": 1005,
"logprob": -0.6298828,
"special": false,
"text": " use"
},
{
"id": 279,
"logprob": 0.0,
"special": false,
"text": " the"
},
{
"id": 1595,
"logprob": -0.17492676,
"special": false,
"text": " `"
},
{
"id": 2079,
"logprob": -0.27368164,
"special": false,
"text": "request"
},
{
"id": 63,
"logprob": 0.0,
"special": false,
"text": "`"
},
{
"id": 1665,
"logprob": 0.0,
"special": false,
"text": " object"
}
],
"top_tokens": null
},
"generated_text": "Test request. The client can use the `request` object"
}
Loading

0 comments on commit 6b68dbd

Please sign in to comment.