Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename notebook #614

Merged
merged 4 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion notebooks/openvino/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ The notebooks have been tested with Python 3.8 and 3.10 on Ubuntu Linux.
|:----------|:-------------|:-------------|------:|
| [How to run inference with the OpenVINO](https://github.com/huggingface/optimum-intel/blob/main/notebooks/openvino/optimum_openvino_inference.ipynb) | Explains how to export your model to OpenVINO and to run inference with OpenVINO Runtime on various tasks| [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/optimum-intel/blob/main/notebooks/openvino/optimum_openvino_inference.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/optimum-intel/blob/main/notebooks/openvino/optimum_openvino_inference.ipynb)|
| [How to quantize a question answering model with OpenVINO NNCF](https://github.com/huggingface/optimum-intel/blob/main/notebooks/openvino/question_answering_quantization.ipynb) | Show how to apply post-training quantization on a question answering model using [NNCF](https://github.com/openvinotoolkit/nncf) and to accelerate inference with OpenVINO| [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/optimum-intel/blob/main/notebooks/openvino/question_answering_quantization.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/optimum-intel/blob/main/notebooks/openvino/question_answering_quantization.ipynb)|
| [Compare outputs of a quantized Stable Diffusion model with its full-precision counterpart](https://github.com/huggingface/optimum-intel/blob/main/notebooks/openvino/stable_diffusion_quantization.ipynb) | Show how to load and compare outputs from two Stable Diffusion models with different precision| [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/optimum-intel/blob/main/notebooks/openvino/stable_diffusion_quantization.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/optimum-intel/blob/main/notebooks/openvino/stable_diffusion_quantization.ipynb)|
| [Compare outputs of a quantized Stable Diffusion model with its full-precision counterpart](https://github.com/huggingface/optimum-intel/blob/main/notebooks/openvino/stable_diffusion_optimization.ipynb) | Show how to load and compare outputs from two Stable Diffusion models with different precision| [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/optimum-intel/blob/main/notebooks/openvino/stable_diffusion_optimization.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/optimum-intel/blob/main/notebooks/openvino/stable_diffusion_optimization.ipynb)|

Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,16 @@
"metadata": {},
"outputs": [],
"source": [
"model_name = 'microsoft/phi-2'\n",
"save_name = './phi-2-woq4'\n",
"precision = 'f32'\n",
"model_name = \"microsoft/phi-2\"\n",
"save_name = model_name.split(\"/\")[-1] + \"_openvino\"\n",
"precision = \"f32\"\n",
"quantization_config = OVWeightQuantizationConfig(\n",
" bits=4,\n",
" sym=False,\n",
" group_size=128,\n",
" ratio=0.8,\n",
")\n",
"device = 'gpu'"
"device = \"gpu\""
]
},
{
Expand Down Expand Up @@ -114,14 +114,14 @@
"source": [
"# Load kwargs\n",
"load_kwargs = {\n",
" 'device': device,\n",
" 'ov_config': {\n",
" \"device\": device,\n",
" \"ov_config\": {\n",
" \"PERFORMANCE_HINT\": \"LATENCY\",\n",
" \"INFERENCE_PRECISION_HINT\": precision,\n",
" \"CACHE_DIR\": os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n",
" },\n",
" 'compile': False,\n",
" 'quantization_config': quantization_config\n",
" \"compile\": False,\n",
" \"quantization_config\": quantization_config\n",
"}\n",
"\n",
"# Check whether the model was already exported\n",
Expand All @@ -143,7 +143,7 @@
"\n",
"# TODO Optional: export to huggingface/hub\n",
"\n",
"model_size = os.stat(os.path.join(save_name, 'openvino_model.bin')).st_size / 1024 ** 3\n",
"model_size = os.stat(os.path.join(save_name, \"openvino_model.bin\")).st_size / 1024 ** 3\n",
"print(f'Model size in FP32: ~5.4GB, current model size in 4bit: {model_size:.2f}GB')"
]
},
Expand Down Expand Up @@ -312,12 +312,12 @@
" for idx, (user_msg, model_msg) in enumerate(history):\n",
" # skip the last assistant message if its empty, the tokenizer will do the formating\n",
" if idx == len(history) - 1 and not model_msg:\n",
" messages.append({'role': 'User', 'content': user_msg})\n",
" messages.append({\"role\": \"User\", \"content\": user_msg})\n",
" break\n",
" if user_msg:\n",
" messages.append({'role': 'User', 'content': user_msg})\n",
" messages.append({\"role\": \"User\", \"content\": user_msg})\n",
" if model_msg:\n",
" messages.append({'role': 'Assistant', 'content': model_msg})\n",
" messages.append({\"role\": \"Assistant\", \"content\": model_msg})\n",
" input_token = tokenizer.apply_chat_template(\n",
" messages,\n",
" add_generation_prompt=True,\n",
Expand Down Expand Up @@ -356,7 +356,7 @@
"\n",
" prompt_char = '▌'\n",
" history[-1][1] = prompt_char\n",
" yield (history, 'Status: Generating...')\n",
" yield (history, \"Status: Generating...\")\n",
" \n",
" streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n",
"\n",
Expand Down Expand Up @@ -394,7 +394,7 @@
" break\n",
" elif is_partial_stop(partial_text, stop_str):\n",
" continue\n",
" yield (history, 'Status: Generating...')\n",
" yield (history, \"Status: Generating...\")\n",
" history[-1][1] = partial_text\n",
" generation_time = time.perf_counter() - start\n",
" yield (history, f'Generation time: {generation_time:.2f} sec')"
Expand Down Expand Up @@ -519,7 +519,7 @@
" queue=True\n",
" )\n",
" \n",
" clear.click(fn=lambda: (None, 'Status: Idle'), inputs=None, outputs=[chatbot, status], queue=False)"
" clear.click(fn=lambda: (None, \"Status: Idle\"), inputs=None, outputs=[chatbot, status], queue=False)"
]
},
{
Expand Down Expand Up @@ -575,7 +575,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.9.18"
}
},
"nbformat": 4,
Expand Down
Loading