diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index e26ceefa66..527259f203 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -13,9 +13,9 @@ concurrency: cancel-in-progress: true env: - l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15945-a349dc82f9a/l_openvino_toolkit_ubuntu20_2024.3.0.dev20240708_x86_64.tgz - m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15945-a349dc82f9a/m_openvino_toolkit_macos_12_6_2024.3.0.dev20240708_x86_64.tgz - w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15945-a349dc82f9a/w_openvino_toolkit_windows_2024.3.0.dev20240708_x86_64.zip + l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.3.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.3.0.dev20240711_x86_64.tgz + m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.3.0rc1/macos/m_openvino_toolkit_macos_12_6_2024.3.0.dev20240711_x86_64.tgz + w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.3.0rc1/windows/w_openvino_toolkit_windows_2024.3.0.dev20240711_x86_64.zip jobs: cpp-multinomial-greedy_causal_lm-ubuntu: runs-on: ubuntu-20.04-8-cores @@ -34,8 +34,8 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --weight-format fp16 --model openlm-research/open_llama_3b_v2 open_llama_3b_v2 cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j @@ -77,8 +77,8 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j @@ -210,8 +210,8 @@ jobs: - name: Download, convert and build run: | call .\ov\setupvars.bat - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j @@ -255,8 +255,8 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen-7B-Chat Qwen-7B-Chat cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j @@ -282,8 +282,8 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen1.5-7B-Chat Qwen1.5-7B-Chat cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j @@ -310,8 +310,8 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --weight-format fp16 --model microsoft/phi-2 phi-2 cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j 15 @@ -338,8 +338,8 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --weight-format fp16 --model argilla/notus-7b-v1 notus-7b-v1 cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j @@ -366,8 +366,8 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ @@ -403,8 +403,8 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j @@ -447,8 +447,8 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --weight-format fp16 --model microsoft/phi-1_5 phi-1_5 cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j 15 @@ -495,8 +495,8 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --weight-format fp16 --model ikala/redpajama-3b-chat redpajama-3b-chat cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j @@ -545,8 +545,8 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j @@ -605,8 +605,8 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j @@ -649,8 +649,8 @@ jobs: - name: Install dependencies and build run: | call .\ov\setupvars.bat - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j @@ -688,8 +688,8 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j diff --git a/.github/workflows/genai_package.yml b/.github/workflows/genai_package.yml index 9e439eb11e..cf604b4bcc 100644 --- a/.github/workflows/genai_package.yml +++ b/.github/workflows/genai_package.yml @@ -5,9 +5,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }} cancel-in-progress: true env: - l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15945-a349dc82f9a/l_openvino_toolkit_ubuntu20_2024.3.0.dev20240708_x86_64.tgz - m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15945-a349dc82f9a/m_openvino_toolkit_macos_12_6_2024.3.0.dev20240708_x86_64.tgz - w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15945-a349dc82f9a/w_openvino_toolkit_windows_2024.3.0.dev20240708_x86_64.zip + l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.3.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.3.0.dev20240711_x86_64.tgz + m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.3.0rc1/macos/m_openvino_toolkit_macos_12_6_2024.3.0.dev20240711_x86_64.tgz + w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.3.0rc1/windows/w_openvino_toolkit_windows_2024.3.0.dev20240711_x86_64.zip jobs: ubuntu_genai_package: strategy: @@ -28,8 +28,8 @@ jobs: - run: sudo ./ov/install_dependencies/install_openvino_dependencies.sh - run: source ./ov/setupvars.sh && cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} -S ./ -B ./build/ - run: source ./ov/setupvars.sh && cmake --build ./build/ --config ${{ matrix.build-type }} --target package -j - - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - - run: source ./ov/setupvars.sh && python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + - run: source ./ov/setupvars.sh && python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release - run: source ./ov/setupvars.sh && optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 - run: source ./ov/setupvars.sh && cmake --install ./build/ --config ${{ matrix.build-type }} --prefix ov - run: ov/samples/cpp/build_samples.sh -i ${{ github.workspace }}/s\ pace @@ -57,8 +57,8 @@ jobs: - run: brew install coreutils scons - run: source ./ov/setupvars.sh && cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} -S ./ -B ./build/ - run: source ./ov/setupvars.sh && cmake --build ./build/ --config ${{ matrix.build-type }} --target package -j - - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - - run: source ./ov/setupvars.sh && python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + - run: source ./ov/setupvars.sh && python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release - run: source ./ov/setupvars.sh && optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 - run: source ./ov/setupvars.sh && cmake --install ./build/ --config ${{ matrix.build-type }} --prefix ov - run: ov/samples/cpp/build_samples.sh -i ${{ github.workspace }}/s\ pace @@ -100,8 +100,8 @@ jobs: shell: bash - run: call ov\setupvars.bat && cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} -S ./ -B ./build/ - run: call ov\setupvars.bat && cmake --build ./build/ --config ${{ matrix.build-type }} --target package -j - - run: call ov\setupvars.bat && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - - run: call ov\setupvars.bat && python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + - run: call ov\setupvars.bat && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + - run: call ov\setupvars.bat && python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release - run: call ov\setupvars.bat && optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 - run: call ov\setupvars.bat && cmake --install ./build/ --config ${{ matrix.build-type }} --prefix ov - run: call ov\samples\cpp\build_samples_msvc.bat -i "${{ github.workspace }}/samples_install" diff --git a/.github/workflows/genai_python_lib.yml b/.github/workflows/genai_python_lib.yml index 38552174b6..141c379da6 100644 --- a/.github/workflows/genai_python_lib.yml +++ b/.github/workflows/genai_python_lib.yml @@ -5,9 +5,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }} cancel-in-progress: true env: - l_ov_centos_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15945-a349dc82f9a/l_openvino_toolkit_centos7_2024.3.0.dev20240708_x86_64.tgz - m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15945-a349dc82f9a/m_openvino_toolkit_macos_12_6_2024.3.0.dev20240708_x86_64.tgz - w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15945-a349dc82f9a/w_openvino_toolkit_windows_2024.3.0.dev20240708_x86_64.zip + l_ov_centos_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.3.0rc1/linux/l_openvino_toolkit_centos7_2024.3.0.dev20240711_x86_64.tgz + m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.3.0rc1/macos/m_openvino_toolkit_macos_12_6_2024.3.0.dev20240711_x86_64.tgz + w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.3.0rc1/windows/w_openvino_toolkit_windows_2024.3.0.dev20240711_x86_64.zip jobs: ubuntu_genai_python_lib: # A tokenizers' dependency fails to compile on ubuntu-20 n CenOS7 env. @@ -29,7 +29,7 @@ jobs: - run: sudo ./ov/install_dependencies/install_openvino_dependencies.sh - run: source ./ov/setupvars.sh && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ - run: source ./ov/setupvars.sh && cmake --build ./build/ --config Release -j - - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --upgrade-strategy eager + - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release --upgrade-strategy eager - run: source ./ov/setupvars.sh && PYTHONPATH=./build/:$PYTHONPATH python -m pytest ./tests/python_tests/ - run: source ./ov/setupvars.sh && python -m pip install . --verbose - run: python -m pytest ./tests/python_tests/ @@ -52,7 +52,7 @@ jobs: - run: brew install coreutils scons - run: source ./ov/setupvars.sh && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ - run: source ./ov/setupvars.sh && cmake --build ./build/ --config Release -j - - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --upgrade-strategy eager + - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release --upgrade-strategy eager - run: source ./ov/setupvars.sh && PYTHONPATH=./build/:$PYTHONPATH python -m pytest ./tests/python_tests/ - run: source ./ov/setupvars.sh && python -m pip install . --verbose - run: python -c "from openvino_genai import LLMPipeline" @@ -79,12 +79,9 @@ jobs: unzip -d ov ov.zip dirs=(ov/*) && mv ov/*/* ov && rmdir "${dirs[@]}" shell: bash - - name: Install dependencies and build - run: | - call .\ov\setupvars.bat - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --upgrade-strategy eager - cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ - cmake --build ./build/ --config Release -j + - run: call ./ov/setupvars.bat && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ + - run: call ./ov/setupvars.bat && cmake --build ./build/ --config Release -j + - run: call ./ov/setupvars.bat && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release --upgrade-strategy eager # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that. - run: set "PYTHONPATH=./build/" && call ./ov/setupvars.bat && python -m pytest ./tests/python_tests/ - run: call ./ov/setupvars.bat && python -m pip install . --verbose diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml index 2d450ad9c8..82a74f8cdf 100644 --- a/.github/workflows/lcm_dreamshaper_cpp.yml +++ b/.github/workflows/lcm_dreamshaper_cpp.yml @@ -50,8 +50,8 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_lcm_cpp - python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install -r ../../requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install -r ../../requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release - name: Download and convert model and tokenizer working-directory: ${{ env.working_directory }} @@ -95,8 +95,8 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_lcm_cpp - python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install -r ../../requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install -r ../../requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release - name: Download and convert model and tokenizer working-directory: ${{ env.working_directory }} diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml index cda567c23b..5197b27da8 100644 --- a/.github/workflows/stable_diffusion_1_5_cpp.yml +++ b/.github/workflows/stable_diffusion_1_5_cpp.yml @@ -49,8 +49,8 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_sd_cpp - python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - python -m pip install -r ../../requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install -r ../../requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release - name: Download and convert model and tokenizer working-directory: ${{ env.working_directory }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 5f7390f981..27ed56b453 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,11 +57,27 @@ if(ENABLE_PYTHON) endif() endif() +if(ENABLE_PYTHON) + # the following two calls are required for cross-compilation + if(OpenVINODeveloperPackage_DIR) + ov_find_python3(REQUIRED) + ov_detect_python_module_extension() + else() + if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) + find_package(Python3 REQUIRED COMPONENTS Interpreter Development.Module) + else() + find_package(Python3 REQUIRED COMPONENTS Interpreter Development) + endif() + endif() +endif() + add_subdirectory(thirdparty) add_subdirectory(src) add_subdirectory(samples) add_subdirectory(tests/cpp) +install(FILES LICENSE DESTINATION docs/licensing COMPONENT licensing_genai RENAME LICENSE-GENAI) +install(FILES third-party-programs.txt DESTINATION docs/licensing COMPONENT licensing_genai RENAME third-party-programs-genai.txt) install(FILES LICENSE DESTINATION docs/licensing COMPONENT licensing_genai RENAME LICENSE-GENAI) install(FILES third-party-programs.txt DESTINATION docs/licensing COMPONENT licensing_genai RENAME third-party-programs-genai.txt) set(CPACK_ARCHIVE_COMPONENT_INSTALL ON) diff --git a/samples/cpp/beam_search_causal_lm/README.md b/samples/cpp/beam_search_causal_lm/README.md index 2d7c0a69d8..0d2ee83bfc 100644 --- a/samples/cpp/beam_search_causal_lm/README.md +++ b/samples/cpp/beam_search_causal_lm/README.md @@ -1,4 +1,4 @@ -# Text generation C++ sample that supports most popular models like LLaMA 2 +# Text generation C++ sample that supports most popular models like LLaMA 3 This example showcases inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. It's only possible to change the device for inference to a differnt one, GPU for example, from the command line interface. The sample fearures `ov::genai::LLMPipeline` and configures it to use multiple beam grops. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-chatbot) which provides an example of LLM-powered Chatbot in Python. diff --git a/samples/cpp/chat_sample/README.md b/samples/cpp/chat_sample/README.md index 2d39077c66..a2eccb4d3d 100644 --- a/samples/cpp/chat_sample/README.md +++ b/samples/cpp/chat_sample/README.md @@ -1,4 +1,4 @@ -# C++ chat_sample that supports most popular models like LLaMA 2 +# C++ chat_sample that supports most popular models like LLaMA 3 This example showcases inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample fearures `ov::genai::LLMPipeline` and configures it for the chat scenario. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-chatbot) which provides an example of LLM-powered Chatbot in Python. diff --git a/samples/cpp/greedy_causal_lm/README.md b/samples/cpp/greedy_causal_lm/README.md index 932f3d6ec5..79852e0d10 100644 --- a/samples/cpp/greedy_causal_lm/README.md +++ b/samples/cpp/greedy_causal_lm/README.md @@ -1,4 +1,4 @@ -# Text generation C++ greedy_causal_lm that supports most popular models like LLaMA 2 +# Text generation C++ greedy_causal_lm that supports most popular models like LLaMA 3 This example showcases inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample fearures `ov::genai::LLMPipeline` and configures it to run the simplest deterministic greedy sampling algorithm. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-chatbot) which provides an example of LLM-powered Chatbot in Python. diff --git a/samples/cpp/multinomial_causal_lm/CMakeLists.txt b/samples/cpp/multinomial_causal_lm/CMakeLists.txt index efcac50f09..98bc76ee3c 100644 --- a/samples/cpp/multinomial_causal_lm/CMakeLists.txt +++ b/samples/cpp/multinomial_causal_lm/CMakeLists.txt @@ -11,7 +11,7 @@ set_target_properties(multinomial_causal_lm PROPERTIES COMPILE_PDB_NAME multinomial_causal_lm # Ensure out of box LC_RPATH on macOS with SIP INSTALL_RPATH_USE_LINK_PATH ON) -target_compile_features(greedy_causal_lm PRIVATE cxx_std_11) +target_compile_features(multinomial_causal_lm PRIVATE cxx_std_11) install(TARGETS multinomial_causal_lm RUNTIME DESTINATION samples_bin/ COMPONENT samples_bin diff --git a/samples/cpp/multinomial_causal_lm/README.md b/samples/cpp/multinomial_causal_lm/README.md index 1642b61856..21c9a07e77 100644 --- a/samples/cpp/multinomial_causal_lm/README.md +++ b/samples/cpp/multinomial_causal_lm/README.md @@ -1,4 +1,4 @@ -# Text generation C++ multinomial_causal_lm that supports most popular models like LLaMA 2 +# Text generation C++ multinomial_causal_lm that supports most popular models like LLaMA 3 This example showcases inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample fearures `ov::genai::LLMPipeline` and configures it to run random sampling algorithm. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-chatbot) which provides an example of LLM-powered Chatbot in Python. diff --git a/samples/cpp/prompt_lookup_decoding_lm/README.md b/samples/cpp/prompt_lookup_decoding_lm/README.md index 398fdd49b8..c5517c5bf6 100644 --- a/samples/cpp/prompt_lookup_decoding_lm/README.md +++ b/samples/cpp/prompt_lookup_decoding_lm/README.md @@ -1,4 +1,4 @@ -# prompt_lookup_decoding_lm C++ sample that supports most popular models like LLaMA 2 +# prompt_lookup_decoding_lm C++ sample that supports most popular models like LLaMA 3 [Prompt Lookup decoding](https://github.com/apoorvumang/prompt-lookup-decoding) is [assested-generation](https://huggingface.co/blog/assisted-generation#understanding-text-generation-latency) technique where the draft model is replaced with simple string matching the prompt to generate candidate token sequences. This method highly effective for input grounded generation (summarization, document QA, multi-turn chat, code editing), where there is high n-gram overlap between LLM input (prompt) and LLM output. This could be entity names, phrases, or code chunks that the LLM directly copies from the input while generating the output. Prompt lookup exploits this pattern to speed up autoregressive decoding in LLMs. This results in significant speedups with no effect on output quality. diff --git a/samples/cpp/speculative_decoding_lm/README.md b/samples/cpp/speculative_decoding_lm/README.md index 480a14d762..644ebd2c94 100644 --- a/samples/cpp/speculative_decoding_lm/README.md +++ b/samples/cpp/speculative_decoding_lm/README.md @@ -1,4 +1,4 @@ -# speculative_decoding_lm C++ sample that supports most popular models like LLaMA 2 +# speculative_decoding_lm C++ sample that supports most popular models like LLaMA 3 Speculative decoding (or [assisted-generation](https://huggingface.co/blog/assisted-generation#understanding-text-generation-latency) in HF terminology) is a recent technique, that allows to speed up token generation when an additional smaller draft model is used alonside with the main model. diff --git a/samples/python/beam_search_causal_lm/README.md b/samples/python/beam_search_causal_lm/README.md index ff5286d010..5e80aa69da 100644 --- a/samples/python/beam_search_causal_lm/README.md +++ b/samples/python/beam_search_causal_lm/README.md @@ -1,4 +1,4 @@ -# Text generation Python sample that supports most popular models like LLaMA 2 +# Text generation Python sample that supports most popular models like LLaMA 3 This example showcases inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. It's only possible to change the device for inference to a differnt one, GPU for example, from the command line interface. The sample fearures `openvino_genai.LLMPipeline` and configures it to use multiple beam grops. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-chatbot) which provides an example of LLM-powered Chatbot in Python. diff --git a/samples/python/chat_sample/README.md b/samples/python/chat_sample/README.md index 34d71fab8a..983789d0eb 100644 --- a/samples/python/chat_sample/README.md +++ b/samples/python/chat_sample/README.md @@ -1,4 +1,4 @@ -# Python chat_sample that supports most popular models like LLaMA 2 +# Python chat_sample that supports most popular models like LLaMA 3 This example showcases inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample fearures `openvino_genai.LLMPipeline` and configures it for the chat scenario. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-chatbot) which provides an example of LLM-powered Chatbot in Python. diff --git a/samples/python/greedy_causal_lm/README.md b/samples/python/greedy_causal_lm/README.md index 7c87b04aad..97b044eb51 100644 --- a/samples/python/greedy_causal_lm/README.md +++ b/samples/python/greedy_causal_lm/README.md @@ -1,4 +1,4 @@ -# Text generation Python greedy_causal_lm that supports most popular models like LLaMA 2 +# Text generation Python greedy_causal_lm that supports most popular models like LLaMA 3 This example showcases inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample fearures `openvino_genai.LLMPipeline` and configures it to run the simplest deterministic greedy sampling algorithm. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-chatbot) which provides an example of LLM-powered Chatbot in Python. diff --git a/samples/python/multinomial_causal_lm/README.md b/samples/python/multinomial_causal_lm/README.md index d76b933663..d39142f3de 100644 --- a/samples/python/multinomial_causal_lm/README.md +++ b/samples/python/multinomial_causal_lm/README.md @@ -1,4 +1,4 @@ -# Text generation Python multinomial_causal_lm that supports most popular models like LLaMA 2 +# Text generation Python multinomial_causal_lm that supports most popular models like LLaMA 3 This example showcases inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample fearures `ov::genai::LLMPipeline` and configures it to run random sampling algorithm. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-chatbot) which provides an example of LLM-powered Chatbot in Python. diff --git a/src/README.md b/src/README.md index c67a60eaec..445b88aa58 100644 --- a/src/README.md +++ b/src/README.md @@ -23,7 +23,7 @@ To build OpenVINO™ GenAI library from source, refer to the [Build Instructions > git clone --recursive https://github.com/openvinotoolkit/openvino.genai.git > cd openvino.genai > # Install python dependencies - > python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + > python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release > python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt > ``` diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt index 454c53b944..c140bf9ac7 100644 --- a/src/cpp/CMakeLists.txt +++ b/src/cpp/CMakeLists.txt @@ -103,7 +103,8 @@ install(TARGETS ${TARGET_NAME} EXPORT OpenVINOGenAITargets install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ DESTINATION runtime/include COMPONENT core_genai_dev) install(EXPORT OpenVINOGenAITargets FILE OpenVINOGenAITargets.cmake - NAMESPACE openvino:: DESTINATION runtime/cmake) + NAMESPACE openvino:: DESTINATION runtime/cmake + COMPONENT core_genai_dev) include(CMakePackageConfigHelpers) configure_package_config_file("${OpenVINOGenAI_SOURCE_DIR}/cmake/templates/OpenVINOGenAIConfig.cmake.in" diff --git a/src/cpp/src/tokenizer.cpp b/src/cpp/src/tokenizer.cpp index 9b4a206a1e..ac6b925dcb 100644 --- a/src/cpp/src/tokenizer.cpp +++ b/src/cpp/src/tokenizer.cpp @@ -98,8 +98,11 @@ class Tokenizer::TokenizerImpl { device).create_infer_request(); // Get special token ids by inference if they are not defined. - // todo: do not call until CVS-143410 is resolved - // infer_special_tokens_if_necessary(); + infer_special_tokens_if_necessary(); + // Initialize tokenizer's cache to save time later. + // infer_special_tokens_if_necessary() already could do that + // but it didn't run decode() for sure. + decode(encode("").input_ids); } // load special tokens ids from config.json diff --git a/src/docs/BUILD.md b/src/docs/BUILD.md index 710428139e..3b89995dc2 100644 --- a/src/docs/BUILD.md +++ b/src/docs/BUILD.md @@ -1,5 +1,8 @@ # How to Build OpenVINO™ GenAI +> **NOTE**: There is a known Python API issue with `ov::Tensor`. The issue is reproduced when building OpenVINO GenAI from sources while using OpenVINO from archives. Using `ov::Tensor` with OpenVINO GenAI fails. Possible errors: `TypeError: generate(): incompatible function arguments.`, `TypeError: __init__(): incompatible constructor arguments.`, `TypeError: Unregistered type : ov::Tensor`. +The preferred approach is to build both OpenVINO and OpenVINO GenAI from sources using the same build environment. Or to install prebuilt OpenVINO GenAI from [distribution channels](https://docs.openvino.ai/2024/get-started/install-openvino.html). + ## Build for Linux Systems ### Software Requirements @@ -10,20 +13,16 @@ ### Build Instructions -1. Clone OpenVINO GenAI repository and init submodules: +1. Build and install OpenVINO from sources following the [instructions](https://github.com/openvinotoolkit/openvino/wiki#how-to-build). +The path to the openvino install directory is referred as throughout the document. +2. Clone OpenVINO GenAI repository and init submodules: ```sh git clone --recursive https://github.com/openvinotoolkit/openvino.genai.git cd openvino.genai ``` -2. Download OpenVINO archive and install dependencies: - ```sh - mkdir ./ov/ - curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15805-6138d624dc1/l_openvino_toolkit_ubuntu20_2024.3.0.dev20240626_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz - sudo ./ov/install_dependencies/install_openvino_dependencies.sh - ``` 3. Build the project: ```sh - source ./ov/setupvars.sh + source /setupvars.sh cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release --target package -j cmake --install ./build/ --config Release --prefix ov @@ -40,21 +39,16 @@ ### Build Instructions -1. Clone OpenVINO GenAI repository and init submodules: +1. Build and install OpenVINO from sources following the [instructions](https://github.com/openvinotoolkit/openvino/wiki#how-to-build) +The path to the openvino install directory is referred as throughout the document. +2. Clone OpenVINO GenAI repository and init submodules: ```sh git clone --recursive https://github.com/openvinotoolkit/openvino.genai.git cd openvino.genai ``` -2. Download OpenVINO archive and install dependencies: - ```sh - mkdir ./ov/ - curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15805-6138d624dc1/w_openvino_toolkit_windows_2024.3.0.dev20240626_x86_64.zip - unzip ov.zip - mklink /D ov w_openvino_toolkit_windows_2024.3.0.dev20240626_x86_64 - ``` 3. Build the project: ```sh - call ov\setupvars.bat + call \setupvars.bat cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release --target package -j cmake --install ./build/ --config Release --prefix ov @@ -77,19 +71,16 @@ ### Build Instructions -1. Clone OpenVINO GenAI repository and init submodules: +1. Build and install OpenVINO from sources following the [instructions](https://github.com/openvinotoolkit/openvino/wiki#how-to-build) +The path to the openvino install directory is referred as throughout the document. +2. Clone OpenVINO GenAI repository and init submodules: ```sh git clone --recursive https://github.com/openvinotoolkit/openvino.genai.git cd openvino.genai ``` -2. Download OpenVINO archive and install dependencies: - ```sh - mkdir ./ov/ - curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15805-6138d624dc1/l_openvino_toolkit_ubuntu20_2024.3.0.dev20240626_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz - ``` 3. Build the project: ```sh - source ./ov/setupvars.sh + source /setupvars.sh cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release --target package -j cmake --install ./build/ --config Release --prefix ov diff --git a/src/docs/SUPPORTED_MODELS.md b/src/docs/SUPPORTED_MODELS.md index 0e6099db03..3eb2af17b4 100644 --- a/src/docs/SUPPORTED_MODELS.md +++ b/src/docs/SUPPORTED_MODELS.md @@ -45,7 +45,19 @@ - LlamaForCausalLM + LlamaForCausalLM + Llama 3 + + + + + + Llama 2
    diff --git a/tests/python_tests/README.md b/tests/python_tests/README.md new file mode 100644 index 0000000000..e5381708de --- /dev/null +++ b/tests/python_tests/README.md @@ -0,0 +1,47 @@ +# OpenVINO™ GenAI Tests + +This tests aim to validate support for vanilla and continuous batching GenAI APIs. + +## Setup environemnt + +In order to run tests first of all build or install OpenVINO GenAI library, follow instructions [GenAI Library README](../../src/README.md). + +Then install requirements for tests: +```sh +pip install -r tests/python_tests/requirements.txt +``` + +## Run Tests + +```sh +python -m pytest tests/python_tests/ -m precommit +``` + +During the test downloaded HuggingFace (HF) models will be saved into the current directory. If you wish to place them somewhere else you can specify `GENAI_MODELS_PATH_PREFIX` environenment variable, e.g. +```sh +GENAI_MODELS_PATH_PREFIX=$HOME/test_models python -m pytest tests/python_tests/ -m precommit +``` + +If you have built GenAI library by yourself instead of using wheel please set `PYTHONPATH` so that test could find library, e.g. +```sh +PYTHONPATH=$PYTHONPATH:.../openvino.genai/build-Release/ python -m pytest tests/python_tests/ -m precommit +``` + +## Customise tests run + +Tests have `precommit` and `nightly` set of models. `precommit` contains lightweight models which can be quickly inferred, `nightly` models are heavier and required more time for interence. If you wish to run specific tests only for nightly models, you can use `-k` option, for example to run only multibatch and chat tests: +```sh +python -m pytest tests/python_tests/ -m nightly -k "test_multibatch and test_chat" +``` + +If you wish to run all tests except beam search do the following: +```sh +python -m pytest tests/python_tests/ -m precommit -k "not test_beam_search" +``` + +Argument `--model_ids` can be used to run tests selectively only for specific models. HF model ids should be separated by space, e.g: +```sh +python -m pytest tests/python_tests/ -m nightly -k "test_multibatch" --model_ids "TinyLlama/TinyLlama-1.1B-Chat-v1.0 Qwen/Qwen2-0.5B-Instruct" +``` + +List of currently supported `nightly` and `precommit` models can be found in tests/python_tests/ov_genai_test_utils.py:get_models_list diff --git a/tests/python_tests/conftest.py b/tests/python_tests/conftest.py index 66212468af..f98f47ecf3 100644 --- a/tests/python_tests/conftest.py +++ b/tests/python_tests/conftest.py @@ -14,6 +14,11 @@ def pytest_make_parametrize_id(config, val, argname): return f'{argname}={val}' return None -def pytest_configure(config): +def pytest_addoption(parser): + parser.addoption("--model_ids", help="Select models to run") + +def pytest_configure(config: pytest.Config): marker = 'precommit' if config.getoption('-m') == 'precommit' else 'nightly' pytest.run_marker = marker + pytest.selected_model_ids = config.getoption('--model_ids', default=None) + diff --git a/tests/python_tests/ov_genai_test_utils.py b/tests/python_tests/ov_genai_test_utils.py index 7bceb29458..7560486d42 100644 --- a/tests/python_tests/ov_genai_test_utils.py +++ b/tests/python_tests/ov_genai_test_utils.py @@ -49,7 +49,10 @@ def get_models_list(): model_ids = precommit_models else: model_ids = nightly_models - + + if pytest.selected_model_ids: + model_ids = [model_id for model_id in model_ids if model_id in pytest.selected_model_ids.split(' ')] + # pytest.set_trace() prefix = pathlib.Path(os.getenv('GENAI_MODELS_PATH_PREFIX', '')) return [(model_id, prefix / model_id.split('/')[1]) for model_id in model_ids] diff --git a/tests/python_tests/test_chat_generate_api.py b/tests/python_tests/test_chat_generate_api.py index 94de8f6cc2..5a73d481d3 100644 --- a/tests/python_tests/test_chat_generate_api.py +++ b/tests/python_tests/test_chat_generate_api.py @@ -33,6 +33,7 @@ @pytest.mark.parametrize("generation_config", configs) @pytest.mark.parametrize("model_descr", get_chat_models_list()) @pytest.mark.precommit +@pytest.mark.nightly def test_chat_compare_with_HF(model_descr, generation_config: Dict): device = 'CPU' chat_history_hf = [] @@ -69,6 +70,7 @@ def test_chat_compare_with_HF(model_descr, generation_config: Dict): @pytest.mark.parametrize("generation_config", configs) @pytest.mark.parametrize("model_descr", get_chat_models_list()) @pytest.mark.precommit +@pytest.mark.nightly def test_chat_compare_text_history_with_HF(model_descr, generation_config: Dict): # compares with HF when history in ov_genai is save as a text device = 'CPU' @@ -104,6 +106,7 @@ def test_chat_compare_text_history_with_HF(model_descr, generation_config: Dict) @pytest.mark.parametrize("generation_config", configs) @pytest.mark.parametrize("model_descr", get_chat_models_list()) @pytest.mark.precommit +@pytest.mark.nightly def test_chat_compare_statefull_vs_text_history(model_descr, generation_config: Dict): # Check that when history is stored in KV cache results are the same as when history stored in a text. device ='CPU' @@ -144,6 +147,7 @@ def test_chat_compare_statefull_vs_text_history(model_descr, generation_config: {'role': 'user', 'content': 'What was my first question?'}, ] @pytest.mark.precommit +@pytest.mark.nightly @pytest.mark.parametrize('chat_config', get_chat_templates()) def test_apply_chat_template(model_tmp_path, chat_config: Tuple[str, Dict]): tokenizer_config = chat_config[1] diff --git a/tests/python_tests/test_generate_api.py b/tests/python_tests/test_generate_api.py index 40bc121293..b4e275eef2 100644 --- a/tests/python_tests/test_generate_api.py +++ b/tests/python_tests/test_generate_api.py @@ -151,6 +151,7 @@ def hf_ov_genai_tensors_comparison( @pytest.mark.parametrize("generation_config,prompt", test_cases) @pytest.mark.parametrize("model_descr", get_models_list()) @pytest.mark.precommit +@pytest.mark.nightly def test_decoding(model_descr, generation_config, prompt): run_hf_ov_genai_comparison(read_model(model_descr), generation_config, prompt) @@ -168,6 +169,7 @@ def test_decoding(model_descr, generation_config, prompt): condition=sys.platform in ["linux", "win32"] ) @pytest.mark.precommit +@pytest.mark.nightly def test_ov_tensors(model_descr, inputs): hf_ov_genai_tensors_comparison(read_model(model_descr), dict(max_new_tokens=20), *inputs) @@ -182,6 +184,7 @@ def test_ov_tensors(model_descr, inputs): @pytest.mark.parametrize("model_descr", get_models_list()) @pytest.mark.parametrize("prompt", prompts) @pytest.mark.precommit +@pytest.mark.nightly @pytest.mark.xfail( raises=TypeError, reason="pybind was unable to find ov::Tensor from openvino yet", @@ -217,6 +220,7 @@ def test_genai_tokenizer_encode(model_descr, prompt): @pytest.mark.parametrize("model_descr", get_models_list()) @pytest.mark.parametrize("encoded_prompt", encoded_prompts) @pytest.mark.precommit +@pytest.mark.nightly @pytest.mark.xfail( raises=TypeError, reason="pybind was unable to find ov::Tensor from openvino yet", @@ -252,6 +256,7 @@ def test_genai_tokenizer_decode(model_descr, encoded_prompt): @pytest.mark.parametrize("prompts", batched_prompts) @pytest.mark.parametrize("model_descr", get_models_list()) @pytest.mark.precommit +@pytest.mark.nightly def test_multibatch(model_descr, generation_config, prompts): run_hf_ov_genai_comparison_batched(read_model(model_descr), generation_config, prompts) @@ -264,6 +269,7 @@ def test_multibatch(model_descr, generation_config, prompts): @pytest.mark.parametrize("prompt", prompts) @pytest.mark.parametrize("model_descr", get_models_list()) @pytest.mark.precommit +@pytest.mark.nightly def test_beam_search_decoding(model_descr, num_beam_groups, group_size, max_new_tokens, diversity_penalty, prompt): generation_config = dict( @@ -281,6 +287,7 @@ def test_beam_search_decoding(model_descr, num_beam_groups, group_size, @pytest.mark.parametrize("max_new_tokens", [10, 80]) @pytest.mark.parametrize("model_descr", get_models_list()) @pytest.mark.precommit +@pytest.mark.nightly def test_stop_criteria(model_descr, stop_criteria, prompt, max_new_tokens): # todo: with EARLY stop_criteria looks like HF return unvalid out with sentence # while genai ends sentence with @@ -323,6 +330,7 @@ def user_defined_callback(subword): @pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)]) @pytest.mark.precommit +@pytest.mark.nightly def test_callback_one_string(callback): pipe = read_model(get_models_list()[0])[4] generation_config = pipe.get_generation_config() @@ -332,6 +340,7 @@ def test_callback_one_string(callback): @pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)]) @pytest.mark.precommit +@pytest.mark.nightly def test_callback_batch_fail(callback): pipe = read_model(get_models_list()[0])[4] with pytest.raises(RuntimeError): @@ -340,12 +349,14 @@ def test_callback_batch_fail(callback): @pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)]) @pytest.mark.precommit +@pytest.mark.nightly def test_callback_kwargs_one_string(callback): pipe = read_model(get_models_list()[0])[4] pipe.generate('table is made of', max_new_tokens=10, streamer=callback) @pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)]) @pytest.mark.precommit +@pytest.mark.nightly @pytest.mark.parametrize("model_descr", get_models_list()) def test_callback_decoding_metallama(model_descr, callback): # On metallam this prompt generates output which can shorten after adding new tokens. @@ -359,6 +370,7 @@ def test_callback_decoding_metallama(model_descr, callback): @pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)]) @pytest.mark.precommit +@pytest.mark.nightly def test_callback_kwargs_batch_fail(callback): pipe = read_model(get_models_list()[0])[4] with pytest.raises(RuntimeError): @@ -380,6 +392,7 @@ def end(self): @pytest.mark.precommit +@pytest.mark.nightly def test_streamer_one_string(): pipe = read_model(get_models_list()[0])[4] generation_config = pipe.get_generation_config() @@ -389,6 +402,7 @@ def test_streamer_one_string(): @pytest.mark.precommit +@pytest.mark.nightly def test_streamer_batch_fail(): pipe = read_model(get_models_list()[0])[4] printer = Printer(pipe.get_tokenizer()) @@ -397,6 +411,7 @@ def test_streamer_batch_fail(): @pytest.mark.precommit +@pytest.mark.nightly def test_streamer_kwargs_one_string(): pipe = read_model(get_models_list()[0])[4] printer = Printer(pipe.get_tokenizer()) @@ -404,6 +419,7 @@ def test_streamer_kwargs_one_string(): @pytest.mark.precommit +@pytest.mark.nightly def test_streamer_kwargs_batch_fail(): pipe = read_model(get_models_list()[0])[4] printer = Printer(pipe.get_tokenizer()) @@ -412,6 +428,7 @@ def test_streamer_kwargs_batch_fail(): @pytest.mark.precommit +@pytest.mark.nightly @pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)]) def test_operator_with_callback_one_string(callback): pipe = read_model(get_models_list()[0])[4] @@ -421,6 +438,7 @@ def test_operator_with_callback_one_string(callback): @pytest.mark.precommit +@pytest.mark.nightly @pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)]) def test_operator_with_callback_batch_fail(callback): pipe = read_model(get_models_list()[0])[4] @@ -429,6 +447,7 @@ def test_operator_with_callback_batch_fail(callback): @pytest.mark.precommit +@pytest.mark.nightly def test_operator_with_streamer_kwargs_one_string(): pipe = read_model(get_models_list()[0])[4] printer = Printer(pipe.get_tokenizer()) @@ -436,6 +455,7 @@ def test_operator_with_streamer_kwargs_one_string(): @pytest.mark.precommit +@pytest.mark.nightly def test_operator_with_streamer_kwargs_batch_fail(): pipe = read_model(get_models_list()[0])[4] printer = Printer(pipe.get_tokenizer()) @@ -444,6 +464,7 @@ def test_operator_with_streamer_kwargs_batch_fail(): @pytest.mark.precommit +@pytest.mark.nightly def test_load_special_tokens_ids_1(model_tmp_path): # test when there is an available config.json config_json = { @@ -458,6 +479,7 @@ def test_load_special_tokens_ids_1(model_tmp_path): @pytest.mark.precommit +@pytest.mark.nightly def test_load_special_tokens_str_2(model_tmp_path): # test with special_tokens_map special_tokens_map_json = { @@ -472,6 +494,7 @@ def test_load_special_tokens_str_2(model_tmp_path): @pytest.mark.precommit +@pytest.mark.nightly def test_load_special_tokens_3_(model_tmp_path): # special_tokens_map is not available # but tokenize_config.json exists @@ -498,6 +521,7 @@ def test_load_special_tokens_3_(model_tmp_path): @pytest.mark.precommit +@pytest.mark.nightly def test_load_special_tokens_3(model_tmp_path): # both config.json is availabel and tokenizer_config.json available # check that it does not read int values from tokenizer_config.json if they are in config.json @@ -532,6 +556,7 @@ def test_load_special_tokens_3(model_tmp_path): @pytest.mark.precommit +@pytest.mark.nightly @pytest.mark.xfail( raises=AssertionError, reason="CVS-143410 ov tokenizer should be aligned with hf", @@ -575,6 +600,7 @@ def test_load_special_tokens_4(model_tmp_path): ] @pytest.mark.parametrize("generation_config", invalid_configs) @pytest.mark.precommit +@pytest.mark.nightly def test_invalid_configs(model_tmp_path, generation_config): model_id, temp_path = model_tmp_path config_json = {} @@ -584,6 +610,7 @@ def test_invalid_configs(model_tmp_path, generation_config): @pytest.mark.precommit +@pytest.mark.nightly def test_valid_configs(model_tmp_path): model_id, temp_path = model_tmp_path pipe = load_pipe([({"eos_token_id": 37}, "config.json")], temp_path) @@ -602,6 +629,7 @@ def test_valid_configs(model_tmp_path): dict(top_k=0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid top_k ] @pytest.mark.precommit +@pytest.mark.nightly @pytest.mark.parametrize("generation_config", invalid_py_configs) def test_python_generation_config_validation(model_tmp_path, generation_config): model_id, temp_path = model_tmp_path @@ -615,6 +643,7 @@ def test_python_generation_config_validation(model_tmp_path, generation_config): @pytest.mark.precommit +@pytest.mark.nightly def test_unicode_pybind_decoding_1(): # On this model this prompt generates unfinished utf string. # Test that pybind will not fail. @@ -626,6 +655,7 @@ def test_unicode_pybind_decoding_1(): @pytest.mark.precommit +@pytest.mark.nightly def test_unicode_pybind_decoding_2(): # On this model this prompt generates unfinished utf string. # Test that pybind will not fail. @@ -636,6 +666,7 @@ def test_unicode_pybind_decoding_2(): @pytest.mark.precommit +@pytest.mark.nightly def test_unicode_pybind_decoding_3(): # On this model this prompt generates unfinished utf-8 string # and streams it. Test that pybind will not fail while we pass string to python. @@ -648,6 +679,7 @@ def test_unicode_pybind_decoding_3(): @pytest.mark.skip(reason="probably both models ov + hf doesn't fit to memory") @pytest.mark.precommit +@pytest.mark.nightly @pytest.mark.skipif(sys.platform.startswith("win"), reason="not enough space for this model on Win") def test_left_pad(): # test left pad tokenizer post processing implementation