From d1ee29fc7764ca478bdbcb5651163d4ed76eee0d Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 18 Mar 2024 16:10:36 +0100 Subject: [PATCH 1/4] rename --- .../{phi-2_on_mtl.ipynb => quantized_generation_demo.ipynb} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename notebooks/openvino/{phi-2_on_mtl.ipynb => quantized_generation_demo.ipynb} (100%) diff --git a/notebooks/openvino/phi-2_on_mtl.ipynb b/notebooks/openvino/quantized_generation_demo.ipynb similarity index 100% rename from notebooks/openvino/phi-2_on_mtl.ipynb rename to notebooks/openvino/quantized_generation_demo.ipynb From eaf327cbd4a55f81b568d7d4e9544c15c2412439 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 18 Mar 2024 16:12:43 +0100 Subject: [PATCH 2/4] update saving directory --- notebooks/openvino/quantized_generation_demo.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/openvino/quantized_generation_demo.ipynb b/notebooks/openvino/quantized_generation_demo.ipynb index 88f0387f05..ac787e67cb 100644 --- a/notebooks/openvino/quantized_generation_demo.ipynb +++ b/notebooks/openvino/quantized_generation_demo.ipynb @@ -76,7 +76,7 @@ "outputs": [], "source": [ "model_name = 'microsoft/phi-2'\n", - "save_name = './phi-2-woq4'\n", + "save_name = model_name.split(\"/\")[-1] + '_openvino'\n", "precision = 'f32'\n", "quantization_config = OVWeightQuantizationConfig(\n", " bits=4,\n", @@ -575,7 +575,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.9.18" } }, "nbformat": 4, From 2e6d25b98b097caf103f7c6362382942f1279454 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 18 Mar 2024 16:18:24 +0100 Subject: [PATCH 3/4] add --- .../openvino/quantized_generation_demo.ipynb | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/notebooks/openvino/quantized_generation_demo.ipynb b/notebooks/openvino/quantized_generation_demo.ipynb index ac787e67cb..582b463346 100644 --- a/notebooks/openvino/quantized_generation_demo.ipynb +++ b/notebooks/openvino/quantized_generation_demo.ipynb @@ -75,16 +75,16 @@ "metadata": {}, "outputs": [], "source": [ - "model_name = 'microsoft/phi-2'\n", - "save_name = model_name.split(\"/\")[-1] + '_openvino'\n", - "precision = 'f32'\n", + "model_name = \"microsoft/phi-2\"\n", + "save_name = model_name.split(\"/\")[-1] + \"_openvino\"\n", + "precision = \"f32\"\n", "quantization_config = OVWeightQuantizationConfig(\n", " bits=4,\n", " sym=False,\n", " group_size=128,\n", " ratio=0.8,\n", ")\n", - "device = 'gpu'" + "device = \"gpu\"" ] }, { @@ -114,14 +114,14 @@ "source": [ "# Load kwargs\n", "load_kwargs = {\n", - " 'device': device,\n", - " 'ov_config': {\n", + " \"device\": device,\n", + " \"ov_config\": {\n", " \"PERFORMANCE_HINT\": \"LATENCY\",\n", " \"INFERENCE_PRECISION_HINT\": precision,\n", " \"CACHE_DIR\": os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n", " },\n", - " 'compile': False,\n", - " 'quantization_config': quantization_config\n", + " \"compile\": False,\n", + " \"quantization_config\": quantization_config\n", "}\n", "\n", "# Check whether the model was already exported\n", @@ -143,7 +143,7 @@ "\n", "# TODO Optional: export to huggingface/hub\n", "\n", - "model_size = os.stat(os.path.join(save_name, 'openvino_model.bin')).st_size / 1024 ** 3\n", + "model_size = os.stat(os.path.join(save_name, \"openvino_model.bin\")).st_size / 1024 ** 3\n", "print(f'Model size in FP32: ~5.4GB, current model size in 4bit: {model_size:.2f}GB')" ] }, @@ -312,12 +312,12 @@ " for idx, (user_msg, model_msg) in enumerate(history):\n", " # skip the last assistant message if its empty, the tokenizer will do the formating\n", " if idx == len(history) - 1 and not model_msg:\n", - " messages.append({'role': 'User', 'content': user_msg})\n", + " messages.append({\"role\": \"User\", \"content\": user_msg})\n", " break\n", " if user_msg:\n", - " messages.append({'role': 'User', 'content': user_msg})\n", + " messages.append({\"role\": \"User\", \"content\": user_msg})\n", " if model_msg:\n", - " messages.append({'role': 'Assistant', 'content': model_msg})\n", + " messages.append({\"role\": \"Assistant\", \"content\": model_msg})\n", " input_token = tokenizer.apply_chat_template(\n", " messages,\n", " add_generation_prompt=True,\n", @@ -356,7 +356,7 @@ "\n", " prompt_char = '▌'\n", " history[-1][1] = prompt_char\n", - " yield (history, 'Status: Generating...')\n", + " yield (history, \"Status: Generating...\")\n", " \n", " streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n", "\n", @@ -394,7 +394,7 @@ " break\n", " elif is_partial_stop(partial_text, stop_str):\n", " continue\n", - " yield (history, 'Status: Generating...')\n", + " yield (history, \"Status: Generating...\")\n", " history[-1][1] = partial_text\n", " generation_time = time.perf_counter() - start\n", " yield (history, f'Generation time: {generation_time:.2f} sec')" @@ -519,7 +519,7 @@ " queue=True\n", " )\n", " \n", - " clear.click(fn=lambda: (None, 'Status: Idle'), inputs=None, outputs=[chatbot, status], queue=False)" + " clear.click(fn=lambda: (None, \"Status: Idle\"), inputs=None, outputs=[chatbot, status], queue=False)" ] }, { From 11f982155dace8272d4fb4995f7e71cc73ca60d4 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 18 Mar 2024 16:34:18 +0100 Subject: [PATCH 4/4] fix notebook link --- notebooks/openvino/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/openvino/README.md b/notebooks/openvino/README.md index 611228dc35..d19fbb9288 100644 --- a/notebooks/openvino/README.md +++ b/notebooks/openvino/README.md @@ -12,5 +12,5 @@ The notebooks have been tested with Python 3.8 and 3.10 on Ubuntu Linux. |:----------|:-------------|:-------------|------:| | [How to run inference with the OpenVINO](https://github.com/huggingface/optimum-intel/blob/main/notebooks/openvino/optimum_openvino_inference.ipynb) | Explains how to export your model to OpenVINO and to run inference with OpenVINO Runtime on various tasks| [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/optimum-intel/blob/main/notebooks/openvino/optimum_openvino_inference.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/optimum-intel/blob/main/notebooks/openvino/optimum_openvino_inference.ipynb)| | [How to quantize a question answering model with OpenVINO NNCF](https://github.com/huggingface/optimum-intel/blob/main/notebooks/openvino/question_answering_quantization.ipynb) | Show how to apply post-training quantization on a question answering model using [NNCF](https://github.com/openvinotoolkit/nncf) and to accelerate inference with OpenVINO| [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/optimum-intel/blob/main/notebooks/openvino/question_answering_quantization.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/optimum-intel/blob/main/notebooks/openvino/question_answering_quantization.ipynb)| -| [Compare outputs of a quantized Stable Diffusion model with its full-precision counterpart](https://github.com/huggingface/optimum-intel/blob/main/notebooks/openvino/stable_diffusion_quantization.ipynb) | Show how to load and compare outputs from two Stable Diffusion models with different precision| [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/optimum-intel/blob/main/notebooks/openvino/stable_diffusion_quantization.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/optimum-intel/blob/main/notebooks/openvino/stable_diffusion_quantization.ipynb)| +| [Compare outputs of a quantized Stable Diffusion model with its full-precision counterpart](https://github.com/huggingface/optimum-intel/blob/main/notebooks/openvino/stable_diffusion_optimization.ipynb) | Show how to load and compare outputs from two Stable Diffusion models with different precision| [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/optimum-intel/blob/main/notebooks/openvino/stable_diffusion_optimization.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/optimum-intel/blob/main/notebooks/openvino/stable_diffusion_optimization.ipynb)|