Skip to content

Commit

Permalink
add
Browse files Browse the repository at this point in the history
  • Loading branch information
echarlaix committed Mar 18, 2024
1 parent eaf327c commit 2e6d25b
Showing 1 changed file with 15 additions and 15 deletions.
30 changes: 15 additions & 15 deletions notebooks/openvino/quantized_generation_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,16 @@
"metadata": {},
"outputs": [],
"source": [
"model_name = 'microsoft/phi-2'\n",
"save_name = model_name.split(\"/\")[-1] + '_openvino'\n",
"precision = 'f32'\n",
"model_name = \"microsoft/phi-2\"\n",
"save_name = model_name.split(\"/\")[-1] + \"_openvino\"\n",
"precision = \"f32\"\n",
"quantization_config = OVWeightQuantizationConfig(\n",
" bits=4,\n",
" sym=False,\n",
" group_size=128,\n",
" ratio=0.8,\n",
")\n",
"device = 'gpu'"
"device = \"gpu\""
]
},
{
Expand Down Expand Up @@ -114,14 +114,14 @@
"source": [
"# Load kwargs\n",
"load_kwargs = {\n",
" 'device': device,\n",
" 'ov_config': {\n",
" \"device\": device,\n",
" \"ov_config\": {\n",
" \"PERFORMANCE_HINT\": \"LATENCY\",\n",
" \"INFERENCE_PRECISION_HINT\": precision,\n",
" \"CACHE_DIR\": os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n",
" },\n",
" 'compile': False,\n",
" 'quantization_config': quantization_config\n",
" \"compile\": False,\n",
" \"quantization_config\": quantization_config\n",
"}\n",
"\n",
"# Check whether the model was already exported\n",
Expand All @@ -143,7 +143,7 @@
"\n",
"# TODO Optional: export to huggingface/hub\n",
"\n",
"model_size = os.stat(os.path.join(save_name, 'openvino_model.bin')).st_size / 1024 ** 3\n",
"model_size = os.stat(os.path.join(save_name, \"openvino_model.bin\")).st_size / 1024 ** 3\n",
"print(f'Model size in FP32: ~5.4GB, current model size in 4bit: {model_size:.2f}GB')"
]
},
Expand Down Expand Up @@ -312,12 +312,12 @@
" for idx, (user_msg, model_msg) in enumerate(history):\n",
" # skip the last assistant message if its empty, the tokenizer will do the formating\n",
" if idx == len(history) - 1 and not model_msg:\n",
" messages.append({'role': 'User', 'content': user_msg})\n",
" messages.append({\"role\": \"User\", \"content\": user_msg})\n",
" break\n",
" if user_msg:\n",
" messages.append({'role': 'User', 'content': user_msg})\n",
" messages.append({\"role\": \"User\", \"content\": user_msg})\n",
" if model_msg:\n",
" messages.append({'role': 'Assistant', 'content': model_msg})\n",
" messages.append({\"role\": \"Assistant\", \"content\": model_msg})\n",
" input_token = tokenizer.apply_chat_template(\n",
" messages,\n",
" add_generation_prompt=True,\n",
Expand Down Expand Up @@ -356,7 +356,7 @@
"\n",
" prompt_char = '▌'\n",
" history[-1][1] = prompt_char\n",
" yield (history, 'Status: Generating...')\n",
" yield (history, \"Status: Generating...\")\n",
" \n",
" streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n",
"\n",
Expand Down Expand Up @@ -394,7 +394,7 @@
" break\n",
" elif is_partial_stop(partial_text, stop_str):\n",
" continue\n",
" yield (history, 'Status: Generating...')\n",
" yield (history, \"Status: Generating...\")\n",
" history[-1][1] = partial_text\n",
" generation_time = time.perf_counter() - start\n",
" yield (history, f'Generation time: {generation_time:.2f} sec')"
Expand Down Expand Up @@ -519,7 +519,7 @@
" queue=True\n",
" )\n",
" \n",
" clear.click(fn=lambda: (None, 'Status: Idle'), inputs=None, outputs=[chatbot, status], queue=False)"
" clear.click(fn=lambda: (None, \"Status: Idle\"), inputs=None, outputs=[chatbot, status], queue=False)"
]
},
{
Expand Down

0 comments on commit 2e6d25b

Please sign in to comment.