From d1d49397e7f8d1ac472d763dae395b67fdda1ef8 Mon Sep 17 00:00:00 2001
From: Alberto Ferrer <albertof@barrahome.org>
Date: Sat, 4 Jan 2025 00:29:02 -0600
Subject: [PATCH] Update bnb.md with example for OpenAI (#11718)

---
 docs/source/quantization/bnb.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/source/quantization/bnb.md b/docs/source/quantization/bnb.md
index 8240eca1c7e03..f7f41726f3725 100644
--- a/docs/source/quantization/bnb.md
+++ b/docs/source/quantization/bnb.md
@@ -37,3 +37,10 @@ model_id = "huggyllama/llama-7b"
 llm = LLM(model=model_id, dtype=torch.bfloat16, trust_remote_code=True, \
 quantization="bitsandbytes", load_format="bitsandbytes")
 ```
+## OpenAI Compatible Server
+
+Append the following to your 4bit model arguments:
+
+```
+--quantization bitsandbytes --load-format bitsandbytes
+```