Skip to content

Commit

Permalink
modify
Browse files Browse the repository at this point in the history
  • Loading branch information
ivy-lv11 committed May 23, 2024
1 parent 576d7e9 commit a1e34d7
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 16 deletions.
2 changes: 1 addition & 1 deletion docs/docs/examples/llm/ipex_llm_gpu.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
"\n",
"## `IpexLLM`\n",
"\n",
"Setting `device_map=\"xpu\"` when initializing `IpexLLM` will put the embedding model on Intel GPU and benefit from IPEX-LLM optimizations:\n",
"Setting `device_map=\"xpu\"` when initializing `IpexLLM` will put the embedding model on Intel GPU and benefit from IPEX-LLM optimizations. Use proper prompt format for zephyr-7b-alpha following the [model card](https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha)\n",
"\n",
"```python\n",
"# Transform a string into input zephyr-specific input\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def messages_to_prompt(messages):
type=str,
default="cpu",
choices=["cpu", "xpu"],
help="The device (Intel CPU or Intel GPU) the embedding model runs on",
help="The device (Intel CPU or Intel GPU) the LLM model runs on",
)
parser.add_argument(
"--query",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def messages_to_prompt(messages):
type=str,
default="cpu",
choices=["cpu", "xpu"],
help="The device (Intel CPU or Intel GPU) the embedding model runs on",
help="The device (Intel CPU or Intel GPU) the LLM model runs on",
)
parser.add_argument(
"--query",
Expand All @@ -51,11 +51,19 @@ def messages_to_prompt(messages):
default="What is IPEX-LLM?",
help="The sentence you prefer for query the LLM",
)
parser.add_argument(
"--save-lowbit-dir",
"-s",
type=str,
default="./lowbit",
help="The directory to save the low bit model",
)

args = parser.parse_args()
model_name = args.model_name
device = args.device
query = args.query
saved_lowbit_model_path = args.save_lowbit_dir

llm = IpexLLM.from_model_id(
model_name=model_name,
Expand All @@ -68,8 +76,6 @@ def messages_to_prompt(messages):
device_map=device,
)

saved_lowbit_model_path = "./zephyr-7b-alpha-low-bit" # path to save low-bit model

llm._model.save_low_bit(saved_lowbit_model_path)
del llm

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ def completion_to_prompt(completion):
"sym_int5",
"asym_int5",
"sym_int8",
"fp4",
"fp8",
"fp16",
"bf16",
"fp8_e4m3",
"fp8_e5m2",
"nf3",
"nf4",
"fp4", # only available on GPU
"fp8", # only available on GPU
"fp16", # only available on GPU
"bf16", # only available on GPU
"fp8_e4m3", # only available on GPU
"fp8_e5m2", # only available on GPU
"nf3", # only available on GPU
"nf4", # only available on GPU
],
help="The quantization type the model will convert to.",
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def __init__(
)
if device_map not in ["cpu", "xpu"]:
raise ValueError(
"IpexLLMEmbedding currently only supports device to be 'cpu' or 'xpu', "
"IpexLLM currently only supports device to be 'cpu' or 'xpu', "
f"but you have: {device_map}."
)
if "xpu" in device_map:
Expand Down Expand Up @@ -292,7 +292,7 @@ def from_model_id(
load_in_low_bit: Optional[str] = None,
model: Optional[Any] = None,
tokenizer: Optional[Any] = None,
device_map: Optional[str] = "cpu",
device_map: Literal["cpu", "xpu"] = "cpu",
stopping_ids: Optional[List[int]] = None,
tokenizer_kwargs: Optional[dict] = None,
tokenizer_outputs_to_remove: Optional[list] = None,
Expand Down Expand Up @@ -338,7 +338,7 @@ def from_model_id_low_bit(
model_name: str = DEFAULT_HUGGINGFACE_MODEL,
model: Optional[Any] = None,
tokenizer: Optional[Any] = None,
device_map: Optional[str] = "cpu",
device_map: Literal["cpu", "xpu"] = "cpu",
stopping_ids: Optional[List[int]] = None,
tokenizer_kwargs: Optional[dict] = None,
tokenizer_outputs_to_remove: Optional[list] = None,
Expand Down

0 comments on commit a1e34d7

Please sign in to comment.