From d1d49397e7f8d1ac472d763dae395b67fdda1ef8 Mon Sep 17 00:00:00 2001 From: Alberto Ferrer Date: Sat, 4 Jan 2025 00:29:02 -0600 Subject: [PATCH] Update bnb.md with example for OpenAI (#11718) --- docs/source/quantization/bnb.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/source/quantization/bnb.md b/docs/source/quantization/bnb.md index 8240eca1c7e03..f7f41726f3725 100644 --- a/docs/source/quantization/bnb.md +++ b/docs/source/quantization/bnb.md @@ -37,3 +37,10 @@ model_id = "huggyllama/llama-7b" llm = LLM(model=model_id, dtype=torch.bfloat16, trust_remote_code=True, \ quantization="bitsandbytes", load_format="bitsandbytes") ``` +## OpenAI Compatible Server + +Append the following to your 4bit model arguments: + +``` +--quantization bitsandbytes --load-format bitsandbytes +```