From 9ba929d3b3bd537fa3ddc836a559bcb3bd73ffc6 Mon Sep 17 00:00:00 2001 From: wangmengbing Date: Tue, 26 Mar 2024 02:10:03 +0000 Subject: [PATCH 1/2] Verified baichuan2-7b-chat with GenAI text_generation, added it to Github workflow and README --- .github/workflows/causal_lm_cpp.yml | 43 ++++++++++++++++++++++++- text_generation/causal_lm/cpp/README.md | 12 ++++--- 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index 5cfd1cc7b7..adc322c4f4 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -244,6 +244,47 @@ jobs: source ./ov/setupvars.sh convert_tokenizer ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ --output ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ "你好!" > ./pred_qwen15.txt + cpp-beam_search_causal_lm-Baichuan2-7B-Chat: + runs-on: ubuntu-20.04-16-cores + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Install OpenVINO + run: | + mkdir ./ov/ + curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz + sudo ./ov/install_dependencies/install_openvino_dependencies.sh + - name: Download, convert and build + run: | + source ./ov/setupvars.sh + python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id baichuan-inc/Baichuan2-7B-Chat --output_dir ./Baichuan2-7B-Chat/ --precision FP16 & + cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ + cmake --build ./build/ --config Release -j + wait + - name: Run and Compare + run: | + source ./ov/setupvars.sh + convert_tokenizer ./Baichuan2-7B-Chat/pytorch/dldt/FP16/ --output ./Baichuan2-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code + timeout 50s ./build/beam_search_causal_lm ./Baichuan2-7B-Chat/pytorch/dldt/FP16/ "69" > ./pred_baichuan2.txt + python -c " + import transformers + with open('pred_baichuan2.txt', 'r') as file: + predictions = file.read() + + tokenizer = transformers.AutoTokenizer.from_pretrained('baichuan-inc/Baichuan2-7B-Chat',trust_remote_code=True) + tokenized = tokenizer('69', return_tensors='pt') + for beam in transformers.AutoModelForCausalLM.from_pretrained('baichuan-inc/Baichuan2-7B-Chat',trust_remote_code=True).generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): + ref = tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + idx = predictions.find(ref) + if -1 == idx: + raise RuntimeError(f'Missing "{ref=}" from predictions') + predictions = predictions[:idx] + predictions[idx + len(ref):] + " + echo 69 passed cpp-beam_search_causal_lm-Phi-2: runs-on: ubuntu-20.04-16-cores steps: @@ -376,4 +417,4 @@ jobs: raise RuntimeError(f'Missing "{ref=}" from predictions') predictions = predictions[:idx] + predictions[idx + len(ref):] " - echo Phi-1_5 passed \ No newline at end of file + echo Phi-1_5 passed diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md index fe513a5b11..50bd46985b 100644 --- a/text_generation/causal_lm/cpp/README.md +++ b/text_generation/causal_lm/cpp/README.md @@ -134,14 +134,16 @@ To enable Unicode characters for Windows cmd open `Region` settings from `Contro 4. https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GPTQ-Int4 [Qwen-7B-Chat-Int4 - Torch not compiled with CUDA enabled](../../../llm_bench/python/doc/NOTES.md#qwen-7b-chat-int4---torch-not-compiled-with-cuda-enabled) in case of `AssertionError` -7. Dolly +7. Baichuan + 1. https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat +8. Dolly 1. https://huggingface.co/databricks/dolly-v2-3b -8. Phi +9. Phi 1. https://huggingface.co/microsoft/phi-2 2. https://huggingface.co/microsoft/phi-1_5 -9. [notus-7b-v1](https://huggingface.co/argilla/notus-7b-v1) -10. [zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) -11. [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) +10. [notus-7b-v1](https://huggingface.co/argilla/notus-7b-v1) +11. [zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) +12. [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) This pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature. From afd4784ba554641f81df4364ddac9f27eaf4d440 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 9 Apr 2024 00:41:09 +0400 Subject: [PATCH 2/2] Update causal_lm_cpp.yml --- .github/workflows/causal_lm_cpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index 1184622641..b73b0289bf 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -282,7 +282,7 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager git+https://github.com/huggingface/optimum-intel.git@972491991710f8a92cdef35e0914de92a88995a4 -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id baichuan-inc/Baichuan2-7B-Chat --output_dir ./Baichuan2-7B-Chat/ --precision FP16 & + python -m pip install -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id baichuan-inc/Baichuan2-7B-Chat --output_dir ./Baichuan2-7B-Chat/ --precision FP16 & cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j wait