Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into speculative_decoding
Browse files Browse the repository at this point in the history
  • Loading branch information
iefode committed Sep 6, 2024
2 parents 611c8bb + 0a53c91 commit c39ff0a
Show file tree
Hide file tree
Showing 31 changed files with 480 additions and 246 deletions.
190 changes: 139 additions & 51 deletions .github/workflows/causal_lm_cpp.yml

Large diffs are not rendered by default.

44 changes: 22 additions & 22 deletions .github/workflows/lcm_dreamshaper_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b
env:
WORKING_DIRECTORY: "./image_generation/lcm_dreamshaper_v7/cpp/"
PYTHON_VERSION: '3.8'
LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.4.0-16527-382ac845923/l_openvino_toolkit_ubuntu20_2024.4.0.dev20240828_x86_64.tgz
WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.4.0-16527-382ac845923/w_openvino_toolkit_windows_2024.4.0.dev20240828_x86_64.zip
LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-16570-19eb02fe60b/l_openvino_toolkit_ubuntu20_2024.5.0.dev20240830_x86_64.tgz
WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-16570-19eb02fe60b/w_openvino_toolkit_windows_2024.5.0.dev20240830_x86_64.zip
OV_INSTALL_DIR: ${{ github.workspace }}/ov

concurrency:
Expand All @@ -41,36 +41,36 @@ jobs:
mkdir ${{ env.OV_INSTALL_DIR }}
tar -xzf openvino_package.tar.gz -C ${{ env.OV_INSTALL_DIR }} --strip-components=1
- name: Build app
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
cmake --build ./build/ --config Release --parallel
- name: Setup Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'

- name: Create virtual environment
working-directory: ${{ env.WORKING_DIRECTORY }}
run: python3 -m venv openvino_lcm_cpp

- name: Install python dependencies
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
source openvino_lcm_cpp/bin/activate
python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
python -m pip install -r ../../requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
- name: Download and convert model and tokenizer
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
source openvino_lcm_cpp/bin/activate
optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 models/lcm_dreamshaper_v7/FP16
- name: Build app
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
cmake --build ./build/ --config Release --parallel
- name: Run app
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
Expand All @@ -96,16 +96,23 @@ jobs:
mv ./tmp/*/* .
popd
- name: Build app
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
cmake --build ./build/ --config Release --parallel
- name: Setup Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'

- name: Create virtual environment
working-directory: ${{ env.WORKING_DIRECTORY }}
run: python -m venv openvino_lcm_cpp

- name: Install python dependencies
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
Expand All @@ -118,14 +125,7 @@ jobs:
run: |
. "./openvino_lcm_cpp/Scripts/Activate.ps1"
optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 models/lcm_dreamshaper_v7/FP16
- name: Build app
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
cmake --build ./build/ --config Release --parallel
- name: Run app
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
Expand Down
48 changes: 24 additions & 24 deletions .github/workflows/stable_diffusion_1_5_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b
env:
WORKING_DIRECTORY: "./image_generation/stable_diffusion_1_5/cpp/"
PYTHON_VERSION: '3.8'
LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.4.0-16527-382ac845923/l_openvino_toolkit_ubuntu20_2024.4.0.dev20240828_x86_64.tgz
WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.4.0-16527-382ac845923/w_openvino_toolkit_windows_2024.4.0.dev20240828_x86_64.zip
LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-16570-19eb02fe60b/l_openvino_toolkit_ubuntu20_2024.5.0.dev20240830_x86_64.tgz
WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-16570-19eb02fe60b/w_openvino_toolkit_windows_2024.5.0.dev20240830_x86_64.zip
OV_INSTALL_DIR: ${{ github.workspace }}/ov

concurrency:
Expand All @@ -41,12 +41,19 @@ jobs:
mkdir ${{ env.OV_INSTALL_DIR }}
tar -xzf openvino_package.tar.gz -C ${{ env.OV_INSTALL_DIR }} --strip-components=1
- name: Build app
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
cmake --build ./build/ --config Release --parallel
- name: Setup Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'

- name: Create virtual environment
working-directory: ${{ env.WORKING_DIRECTORY }}
run: python3 -m venv openvino_sd_cpp
Expand All @@ -62,14 +69,7 @@ jobs:
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
source openvino_sd_cpp/bin/activate
optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion models/stable_diffusion_v1_5_ov/FP16
- name: Build app
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
cmake --build ./build/ --config Release --parallel
optimum-cli export openvino --model botp/stable-diffusion-v1-5 --task stable-diffusion models/stable_diffusion_v1_5_ov/FP16
- name: Run app
working-directory: ${{ env.WORKING_DIRECTORY }}
Expand All @@ -95,37 +95,37 @@ jobs:
Expand-Archive openvino_package.zip -DestinationPath ./tmp
mv ./tmp/*/* .
popd
- name: Build app
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
cmake --build ./build/ --config Release --parallel
- name: Setup Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'

- name: Create virtual environment
working-directory: ${{ env.WORKING_DIRECTORY }}
run: python -m venv openvino_sd_cpp

- name: Install python dependencies
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
. "./openvino_sd_cpp/Scripts/Activate.ps1"
python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
python -m pip install -r ../../requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
- name: Download and convert model and tokenizer
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
. "./openvino_sd_cpp/Scripts/Activate.ps1"
optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion models/stable_diffusion_v1_5_ov/FP16
- name: Build app
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
cmake --build ./build/ --config Release --parallel
optimum-cli export openvino --model botp/stable-diffusion-v1-5 --task stable-diffusion models/stable_diffusion_v1_5_ov/FP16
- name: Run app
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
Expand Down
10 changes: 7 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,18 @@ if(POLICY CMP0169)
endif()

project(OpenVINOGenAI
VERSION 2024.4.0.0
VERSION 2024.5.0.0
DESCRIPTION "OpenVINO GenAI"
HOMEPAGE_URL "https://github.com/openvinotoolkit/openvino.genai"
LANGUAGES CXX)

# Find OpenVINODeveloperPackage first to compile with SDL flags
find_package(OpenVINODeveloperPackage ${OpenVINOGenAI_VERSION} QUIET
COMPONENTS Runtime Threading
COMPONENTS Runtime
PATHS "${OpenVINO_DIR}")
if(NOT OpenVINODeveloperPackage_FOUND)
find_package(OpenVINO ${OpenVINOGenAI_VERSION} REQUIRED
COMPONENTS Runtime Threading)
COMPONENTS Runtime)
endif()

include(cmake/features.cmake)
Expand All @@ -56,6 +56,10 @@ if(ENABLE_PYTHON)
endif()
endif()

if(WIN32 OR APPLE)
set(CMAKE_DEBUG_POSTFIX "d")
endif()

add_subdirectory(thirdparty)
add_subdirectory(src)
add_subdirectory(samples)
Expand Down
2 changes: 1 addition & 1 deletion image_generation/lcm_dreamshaper_v7/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake build type")

# dependencies

find_package(OpenVINO REQUIRED COMPONENTS Runtime Threading)
find_package(OpenVINO REQUIRED COMPONENTS Runtime)

include(FetchContent)

Expand Down
2 changes: 1 addition & 1 deletion image_generation/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
-r ../samples/requirements.txt
diffusers==0.30.1
diffusers==0.30.2
2 changes: 1 addition & 1 deletion image_generation/stable_diffusion_1_5/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake build type")

# dependencies

find_package(OpenVINO REQUIRED COMPONENTS Runtime Threading)
find_package(OpenVINO REQUIRED COMPONENTS Runtime)

include(FetchContent)

Expand Down
2 changes: 1 addition & 1 deletion image_generation/stable_diffusion_1_5/cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ The path to the OpenVINO install directory is referred as `<INSTALL_DIR>` throug
2. Download the model from Huggingface and convert it to OpenVINO IR via [optimum-intel CLI](https://github.com/huggingface/optimum-intel).

Example models to download:
- [runwayml/stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5)
- [botp/stable-diffusion-v1-5](https://huggingface.co/botp/stable-diffusion-v1-5)
- [dreamlike-art/dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0)

Example command for downloading [dreamlike-art/dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0) model and exporting it with FP16 precision:
Expand Down
33 changes: 15 additions & 18 deletions llm_bench/python/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,11 +224,6 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
llm_bench_utils.output_file.output_input_text(in_text, args, model_precision, prompt_index, bs_index, proc_id)
pt_inputs = tokenizer(input_text_list, return_tensors="pt")
input_token_size = pt_inputs.input_ids.shape[1]
pipe_tokenizer = model.get_tokenizer()
tok_encode_start = time.perf_counter()
input_data = pipe_tokenizer.encode(input_text_list)
tok_encode_end = time.perf_counter()
tok_encode_time = (tok_encode_end - tok_encode_start) * 1000
if args['batch_size'] > 1:
out_str = '[warm-up]' if num == 0 else '[{}]'.format(num)
out_str += " Batch_size={}, ".format(args['batch_size'])
Expand All @@ -243,21 +238,19 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
mem_consumption.start_collect_memory_consumption()
max_gen_tokens = DEFAULT_OUTPUT_TOKEN_SIZE if args['infer_count'] is None else args['infer_count']
streamer.reset()
start = time.perf_counter()
generated_tokens = model.generate(input_data, max_new_tokens=max_gen_tokens, num_beams=args["num_beams"], streamer=streamer).tokens
generation_result = model.generate(input_text_list, max_new_tokens=max_gen_tokens, num_beams=args["num_beams"])
end = time.perf_counter()
log.info(type(generated_tokens[0]))
generated_text = generation_result.texts
perf_metrics = generation_result.perf_metrics

if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
mem_consumption.end_collect_momory_consumption()
max_rss_mem_consumption, max_shared_mem_consumption, max_uss_mem_consumption = mem_consumption.get_max_memory_consumption()
mem_consumption.clear_max_memory_consumption()

generation_time = end - start
tok_decode_start = time.perf_counter()
generated_text = pipe_tokenizer.decode(generated_tokens)
tok_decode_end = time.perf_counter()
tok_decode_time = (tok_decode_end - tok_decode_start) * 1000
generated_tokens = [tokenizer(text).input_ids for text in generated_text]
# Only text_gen need to minus length of input_data, because generated_text may include input_text
num_tokens = 0
result_md5_list = []
Expand All @@ -275,9 +268,13 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
else:
md5_list[num][prompt_index] = result_md5_list
per_token_time = generation_time * 1000 / (num_tokens / args['batch_size'])
tm_list = streamer.get_time_list()
tm_list = np.array(perf_metrics.raw_metrics.m_durations) / 1000 / 1000
log.debug('latency of all tokens:')
[log.debug('[{}]{:.4f}'.format(idx, tm)) for idx, tm in enumerate(tm_list)]
tokenization_time = (
np.mean(perf_metrics.raw_metrics.tokenization_durations) / 1000,
np.mean(perf_metrics.raw_metrics.detokenization_durations) / 1000
)
iter_data = gen_iterate_data(
num,
input_token_size * args['batch_size'],
Expand All @@ -290,19 +287,19 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
max_shared_mem=max_shared_mem_consumption,
max_uss_mem=max_uss_mem_consumption,
prompt_idx=prompt_index,
tokenization_time=(tok_encode_time, tok_decode_time)
tokenization_time=tokenization_time
)
iter_data_list.append(iter_data)
llm_bench_utils.metrics_print.print_metrics(
num,
iter_data,
tm_list,
tm_list.tolist(),
[],
warm_up=(num == 0),
max_rss_mem=max_rss_mem_consumption,
max_shared_mem=max_shared_mem_consumption,
max_uss_mem=max_uss_mem_consumption,
tokenization_time=(tok_encode_time, tok_decode_time),
tokenization_time=tokenization_time,
batch_size=args['batch_size']
)
if num > 0:
Expand All @@ -320,7 +317,6 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
assert (result_md5_list == prev_md5)
else:
llm_bench_utils.metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0])
streamer.reset()


def run_text_generation_benchmark(model_path, framework, device, args, num_iters):
Expand Down Expand Up @@ -726,7 +722,8 @@ def get_argprser():


def main():
log.basicConfig(format='[ %(levelname)s ] %(message)s', level=os.environ.get("LOGLEVEL", log.INFO), stream=sys.stdout, encoding="utf-8")
logging_kwargs = {"encoding": "utf-8"} if sys.version_info[1] > 8 else {}
log.basicConfig(format='[ %(levelname)s ] %(message)s', level=os.environ.get("LOGLEVEL", log.INFO), stream=sys.stdout, **logging_kwargs)
args = get_argprser()
model_path, framework, model_args, model_name = llm_bench_utils.model_utils.analyze_args(args)

Expand Down
Loading

0 comments on commit c39ff0a

Please sign in to comment.