diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index 861be05a12..e58d4e67ee 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -192,6 +192,32 @@ jobs: source ./ov/setupvars.sh convert_tokenizer ./Qwen-7B-Chat/pytorch/dldt/FP16/ --output ./Qwen-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./Qwen-7B-Chat/pytorch/dldt/FP16/ 69 > ./pred.txt + cpp-beam_search_causal_lm-Qwen1_5-7B-Chat: + runs-on: ubuntu-20.04-16-cores + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Install OpenVINO + run: | + mkdir ./ov/ + curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz + sudo ./ov/install_dependencies/install_openvino_dependencies.sh + - name: Download, convert and build + run: | + source ./ov/setupvars.sh + python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id Qwen/Qwen1.5-7B-Chat --output_dir ./Qwen1.5-7B-Chat/ --precision FP16 & + cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ + cmake --build ./build/ --config Release -j + wait + - name: Run + run: | + source ./ov/setupvars.sh + convert_tokenizer ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ --output ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code + timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ "你好!" > ./pred_qwen15.txt cpp-beam_search_causal_lm-Phi-2: runs-on: ubuntu-20.04-16-cores steps: diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md index 78668ae67c..0ad2ffe928 100644 --- a/text_generation/causal_lm/cpp/README.md +++ b/text_generation/causal_lm/cpp/README.md @@ -130,6 +130,8 @@ To enable Unicode characters for Windows cmd open `Region` settings from `Contro 6. Qwen 1. https://huggingface.co/Qwen/Qwen-7B-Chat 2. https://huggingface.co/Qwen/Qwen-7B-Chat-Int4 - refer to + 3. https://huggingface.co/Qwen/Qwen1.5-7B-Chat + 4. https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GPTQ-Int4 [Qwen-7B-Chat-Int4 - Torch not compiled with CUDA enabled](../../../llm_bench/python/doc/NOTES.md#qwen-7b-chat-int4---torch-not-compiled-with-cuda-enabled) in case of `AssertionError` 7. Dolly