Merge remote-tracking branch 'upstream/master' into speculative_decoding

openvinotoolkit · Sep 6, 2024 · c39ff0a · c39ff0a
2 parents 611c8bb + 0a53c91
commit c39ff0a
Show file tree

Hide file tree

Showing 31 changed files with 480 additions and 246 deletions.
diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml
@@ -16,8 +16,8 @@ permissions: read-all  # Required by https://github.com/ossf/scorecard/blob/e23b
 env:
   WORKING_DIRECTORY: "./image_generation/lcm_dreamshaper_v7/cpp/"
   PYTHON_VERSION: '3.8'
-  LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.4.0-16527-382ac845923/l_openvino_toolkit_ubuntu20_2024.4.0.dev20240828_x86_64.tgz
-  WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.4.0-16527-382ac845923/w_openvino_toolkit_windows_2024.4.0.dev20240828_x86_64.zip
+  LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-16570-19eb02fe60b/l_openvino_toolkit_ubuntu20_2024.5.0.dev20240830_x86_64.tgz
+  WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-16570-19eb02fe60b/w_openvino_toolkit_windows_2024.5.0.dev20240830_x86_64.zip
   OV_INSTALL_DIR: ${{ github.workspace }}/ov
 
 concurrency:
@@ -41,36 +41,36 @@ jobs:
           mkdir ${{ env.OV_INSTALL_DIR }}
           tar -xzf openvino_package.tar.gz -C ${{ env.OV_INSTALL_DIR }} --strip-components=1
 
+      - name: Build app
+        working-directory: ${{ env.WORKING_DIRECTORY }}
+        run: |
+          source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
+          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+          cmake --build ./build/ --config Release --parallel
+
       - name: Setup Python ${{ env.PYTHON_VERSION }}
         uses: actions/setup-python@v5
         with:
           python-version: ${{ env.PYTHON_VERSION }}
           cache: 'pip'
-        
+
       - name: Create virtual environment
         working-directory: ${{ env.WORKING_DIRECTORY }}
         run: python3 -m venv openvino_lcm_cpp
-      
+
       - name: Install python dependencies
         working-directory: ${{ env.WORKING_DIRECTORY }}
         run: |
           source openvino_lcm_cpp/bin/activate
           python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           python -m pip install -r ../../requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-          
+
       - name: Download and convert model and tokenizer
         working-directory: ${{ env.WORKING_DIRECTORY }}
         run: |
           source openvino_lcm_cpp/bin/activate
           optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 models/lcm_dreamshaper_v7/FP16
 
-      - name: Build app
-        working-directory: ${{ env.WORKING_DIRECTORY }}
-        run: |
-          source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
-          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
-          cmake --build ./build/ --config Release --parallel
-      
       - name: Run app
         working-directory: ${{ env.WORKING_DIRECTORY }}
         run: |
@@ -96,16 +96,23 @@ jobs:
             mv ./tmp/*/* .
           popd
 
+      - name: Build app
+        working-directory: ${{ env.WORKING_DIRECTORY }}
+        run: |
+          . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
+          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+          cmake --build ./build/ --config Release --parallel
+
       - name: Setup Python ${{ env.PYTHON_VERSION }}
         uses: actions/setup-python@v5
         with:
           python-version: ${{ env.PYTHON_VERSION }}
           cache: 'pip'
-        
+
       - name: Create virtual environment
         working-directory: ${{ env.WORKING_DIRECTORY }}
         run: python -m venv openvino_lcm_cpp
-      
+
       - name: Install python dependencies
         working-directory: ${{ env.WORKING_DIRECTORY }}
         run: |
@@ -118,14 +125,7 @@ jobs:
         run: |
           . "./openvino_lcm_cpp/Scripts/Activate.ps1"
           optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 models/lcm_dreamshaper_v7/FP16
-  
-      - name: Build app
-        working-directory: ${{ env.WORKING_DIRECTORY }}
-        run: |
-          . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
-          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
-          cmake --build ./build/ --config Release --parallel
-      
+
       - name: Run app
         working-directory: ${{ env.WORKING_DIRECTORY }}
         run: |

diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -16,8 +16,8 @@ permissions: read-all  # Required by https://github.com/ossf/scorecard/blob/e23b
 env:
   WORKING_DIRECTORY: "./image_generation/stable_diffusion_1_5/cpp/"
   PYTHON_VERSION: '3.8'
-  LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.4.0-16527-382ac845923/l_openvino_toolkit_ubuntu20_2024.4.0.dev20240828_x86_64.tgz
-  WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.4.0-16527-382ac845923/w_openvino_toolkit_windows_2024.4.0.dev20240828_x86_64.zip
+  LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-16570-19eb02fe60b/l_openvino_toolkit_ubuntu20_2024.5.0.dev20240830_x86_64.tgz
+  WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-16570-19eb02fe60b/w_openvino_toolkit_windows_2024.5.0.dev20240830_x86_64.zip
   OV_INSTALL_DIR: ${{ github.workspace }}/ov
 
 concurrency:
@@ -41,12 +41,19 @@ jobs:
           mkdir ${{ env.OV_INSTALL_DIR }}
           tar -xzf openvino_package.tar.gz -C ${{ env.OV_INSTALL_DIR }} --strip-components=1
 
+      - name: Build app
+        working-directory: ${{ env.WORKING_DIRECTORY }}
+        run: |
+          source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
+          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+          cmake --build ./build/ --config Release --parallel
+
       - name: Setup Python ${{ env.PYTHON_VERSION }}
         uses: actions/setup-python@v5
         with:
           python-version: ${{ env.PYTHON_VERSION }}
           cache: 'pip'
-        
+
       - name: Create virtual environment
         working-directory: ${{ env.WORKING_DIRECTORY }}
         run: python3 -m venv openvino_sd_cpp
@@ -62,14 +69,7 @@ jobs:
         working-directory: ${{ env.WORKING_DIRECTORY }}
         run: |
           source openvino_sd_cpp/bin/activate
-          optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion models/stable_diffusion_v1_5_ov/FP16
-
-      - name: Build app
-        working-directory: ${{ env.WORKING_DIRECTORY }}
-        run: |
-          source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
-          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
-          cmake --build ./build/ --config Release --parallel
+          optimum-cli export openvino --model botp/stable-diffusion-v1-5 --task stable-diffusion models/stable_diffusion_v1_5_ov/FP16
 
       - name: Run app
         working-directory: ${{ env.WORKING_DIRECTORY }}
@@ -95,37 +95,37 @@ jobs:
               Expand-Archive openvino_package.zip -DestinationPath ./tmp
               mv ./tmp/*/* .
             popd
-  
+
+        - name: Build app
+          working-directory: ${{ env.WORKING_DIRECTORY }}
+          run: |
+            . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
+            cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+            cmake --build ./build/ --config Release --parallel
+
         - name: Setup Python ${{ env.PYTHON_VERSION }}
           uses: actions/setup-python@v5
           with:
             python-version: ${{ env.PYTHON_VERSION }}
             cache: 'pip'
-          
+
         - name: Create virtual environment
           working-directory: ${{ env.WORKING_DIRECTORY }}
           run: python -m venv openvino_sd_cpp
-  
+
         - name: Install python dependencies
           working-directory: ${{ env.WORKING_DIRECTORY }}
           run: |
             . "./openvino_sd_cpp/Scripts/Activate.ps1"
             python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
             python -m pip install -r ../../requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-  
+
         - name: Download and convert model and tokenizer
           working-directory: ${{ env.WORKING_DIRECTORY }}
           run: |
             . "./openvino_sd_cpp/Scripts/Activate.ps1"
-            optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion models/stable_diffusion_v1_5_ov/FP16
-  
-        - name: Build app
-          working-directory: ${{ env.WORKING_DIRECTORY }}
-          run: |
-            . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
-            cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
-            cmake --build ./build/ --config Release --parallel
-  
+            optimum-cli export openvino --model botp/stable-diffusion-v1-5 --task stable-diffusion models/stable_diffusion_v1_5_ov/FP16
+
         - name: Run app
           working-directory: ${{ env.WORKING_DIRECTORY }}
           run: |

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -26,18 +26,18 @@ if(POLICY CMP0169)
 endif()
 
 project(OpenVINOGenAI
-        VERSION 2024.4.0.0
+        VERSION 2024.5.0.0
         DESCRIPTION "OpenVINO GenAI"
         HOMEPAGE_URL "https://github.com/openvinotoolkit/openvino.genai"
         LANGUAGES CXX)
 
 # Find OpenVINODeveloperPackage first to compile with SDL flags
 find_package(OpenVINODeveloperPackage ${OpenVINOGenAI_VERSION} QUIET
-             COMPONENTS Runtime Threading
+             COMPONENTS Runtime
              PATHS "${OpenVINO_DIR}")
 if(NOT OpenVINODeveloperPackage_FOUND)
     find_package(OpenVINO ${OpenVINOGenAI_VERSION} REQUIRED
-                 COMPONENTS Runtime Threading)
+                 COMPONENTS Runtime)
 endif()
 
 include(cmake/features.cmake)
@@ -56,6 +56,10 @@ if(ENABLE_PYTHON)
     endif()
 endif()
 
+if(WIN32 OR APPLE)
+  set(CMAKE_DEBUG_POSTFIX "d")
+endif()
+
 add_subdirectory(thirdparty)
 add_subdirectory(src)
 add_subdirectory(samples)

diff --git a/image_generation/lcm_dreamshaper_v7/cpp/CMakeLists.txt b/image_generation/lcm_dreamshaper_v7/cpp/CMakeLists.txt
@@ -15,7 +15,7 @@ set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake build type")
 
 # dependencies
 
-find_package(OpenVINO REQUIRED COMPONENTS Runtime Threading)
+find_package(OpenVINO REQUIRED COMPONENTS Runtime)
 
 include(FetchContent)
 

diff --git a/image_generation/requirements.txt b/image_generation/requirements.txt
@@ -1,2 +1,2 @@
 -r ../samples/requirements.txt
-diffusers==0.30.1
+diffusers==0.30.2
diff --git a/image_generation/stable_diffusion_1_5/cpp/CMakeLists.txt b/image_generation/stable_diffusion_1_5/cpp/CMakeLists.txt
@@ -15,7 +15,7 @@ set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake build type")
 
 # dependencies
 
-find_package(OpenVINO REQUIRED COMPONENTS Runtime Threading)
+find_package(OpenVINO REQUIRED COMPONENTS Runtime)
 
 include(FetchContent)
 

diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md
@@ -57,7 +57,7 @@ The path to the OpenVINO install directory is referred as `<INSTALL_DIR>` throug
 2. Download the model from Huggingface and convert it to OpenVINO IR via [optimum-intel CLI](https://github.com/huggingface/optimum-intel).
 
     Example models to download:
-    - [runwayml/stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5)
+    - [botp/stable-diffusion-v1-5](https://huggingface.co/botp/stable-diffusion-v1-5)
     - [dreamlike-art/dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0)
 
     Example command for downloading [dreamlike-art/dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0) model and exporting it with FP16 precision:

diff --git a/llm_bench/python/benchmark.py b/llm_bench/python/benchmark.py
@@ -224,11 +224,6 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
             llm_bench_utils.output_file.output_input_text(in_text, args, model_precision, prompt_index, bs_index, proc_id)
     pt_inputs = tokenizer(input_text_list, return_tensors="pt")
     input_token_size = pt_inputs.input_ids.shape[1]
-    pipe_tokenizer = model.get_tokenizer()
-    tok_encode_start = time.perf_counter()
-    input_data = pipe_tokenizer.encode(input_text_list)
-    tok_encode_end = time.perf_counter()
-    tok_encode_time = (tok_encode_end - tok_encode_start) * 1000
     if args['batch_size'] > 1:
         out_str = '[warm-up]' if num == 0 else '[{}]'.format(num)
         out_str += " Batch_size={}, ".format(args['batch_size'])
@@ -243,21 +238,19 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
         mem_consumption.start_collect_memory_consumption()
     max_gen_tokens = DEFAULT_OUTPUT_TOKEN_SIZE if args['infer_count'] is None else args['infer_count']
-    streamer.reset()
     start = time.perf_counter()
-    generated_tokens = model.generate(input_data, max_new_tokens=max_gen_tokens, num_beams=args["num_beams"], streamer=streamer).tokens
+    generation_result = model.generate(input_text_list, max_new_tokens=max_gen_tokens, num_beams=args["num_beams"])
     end = time.perf_counter()
-    log.info(type(generated_tokens[0]))
+    generated_text = generation_result.texts
+    perf_metrics = generation_result.perf_metrics
+
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
         mem_consumption.end_collect_momory_consumption()
         max_rss_mem_consumption, max_shared_mem_consumption, max_uss_mem_consumption = mem_consumption.get_max_memory_consumption()
         mem_consumption.clear_max_memory_consumption()
 
     generation_time = end - start
-    tok_decode_start = time.perf_counter()
-    generated_text = pipe_tokenizer.decode(generated_tokens)
-    tok_decode_end = time.perf_counter()
-    tok_decode_time = (tok_decode_end - tok_decode_start) * 1000
+    generated_tokens = [tokenizer(text).input_ids for text in generated_text]
     # Only text_gen need to minus length of input_data, because generated_text may include input_text
     num_tokens = 0
     result_md5_list = []
@@ -275,9 +268,13 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
     else:
         md5_list[num][prompt_index] = result_md5_list
     per_token_time = generation_time * 1000 / (num_tokens / args['batch_size'])
-    tm_list = streamer.get_time_list()
+    tm_list = np.array(perf_metrics.raw_metrics.m_durations) / 1000 / 1000
     log.debug('latency of all tokens:')
     [log.debug('[{}]{:.4f}'.format(idx, tm)) for idx, tm in enumerate(tm_list)]
+    tokenization_time = (
+        np.mean(perf_metrics.raw_metrics.tokenization_durations) / 1000,
+        np.mean(perf_metrics.raw_metrics.detokenization_durations) / 1000
+    )
     iter_data = gen_iterate_data(
         num,
         input_token_size * args['batch_size'],
@@ -290,19 +287,19 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
         max_shared_mem=max_shared_mem_consumption,
         max_uss_mem=max_uss_mem_consumption,
         prompt_idx=prompt_index,
-        tokenization_time=(tok_encode_time, tok_decode_time)
+        tokenization_time=tokenization_time
     )
     iter_data_list.append(iter_data)
     llm_bench_utils.metrics_print.print_metrics(
         num,
         iter_data,
-        tm_list,
+        tm_list.tolist(),
         [],
         warm_up=(num == 0),
         max_rss_mem=max_rss_mem_consumption,
         max_shared_mem=max_shared_mem_consumption,
         max_uss_mem=max_uss_mem_consumption,
-        tokenization_time=(tok_encode_time, tok_decode_time),
+        tokenization_time=tokenization_time,
         batch_size=args['batch_size']
     )
     if num > 0:
@@ -320,7 +317,6 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
                 assert (result_md5_list == prev_md5)
     else:
         llm_bench_utils.metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0])
-    streamer.reset()
 
 
 def run_text_generation_benchmark(model_path, framework, device, args, num_iters):
@@ -726,7 +722,8 @@ def get_argprser():
 
 
 def main():
-    log.basicConfig(format='[ %(levelname)s ] %(message)s', level=os.environ.get("LOGLEVEL", log.INFO), stream=sys.stdout, encoding="utf-8")
+    logging_kwargs = {"encoding": "utf-8"} if sys.version_info[1] > 8 else {}
+    log.basicConfig(format='[ %(levelname)s ] %(message)s', level=os.environ.get("LOGLEVEL", log.INFO), stream=sys.stdout, **logging_kwargs)
     args = get_argprser()
     model_path, framework, model_args, model_name = llm_bench_utils.model_utils.analyze_args(args)