From f26da46e3b6bafb5884dafca5ffcbe7793ac1926 Mon Sep 17 00:00:00 2001 From: Alexey Suhov Date: Thu, 12 Nov 2020 19:35:17 +0300 Subject: [PATCH] Publishing 2020.3.1 LTS content (#3108) --- inference-engine/cmake/FindTBB.cmake | 32 + inference-engine/cmake/dependencies.cmake | 19 +- inference-engine/cmake/ie_parallel.cmake | 15 +- .../cmake/tbb/lnx/TBBConfig.cmake | 196 +++++ .../cmake/tbb/mac/TBBConfig.cmake | 114 +++ .../cmake/tbb/win/TBBConfig.cmake | 140 ++++ .../src/cldnn_engine/cldnn_engine.cpp | 4 +- .../src/cldnn_engine/cldnn_infer_request.cpp | 1 + .../src/gna_plugin/gna_slope_scale.h | 4 +- .../convert_function_to_cnn_network.cpp | 1 + .../ie_cnn_layer_builder_ngraph.cpp | 18 + .../ie_cnn_net_reader_impl.cpp | 135 ++++ .../src/inference_engine/ie_ir_parser.hpp | 2 + .../src/inference_engine/ie_system_conf.cpp | 4 + .../threading/ie_cpu_streams_executor.cpp | 4 + .../threading/ie_istreams_executor.cpp | 7 + .../src/legacy_api/src/graph_transformer.cpp | 3 +- .../legacy_api/src/ie_layer_validators.cpp | 47 +- inference-engine/src/mkldnn_plugin/config.cpp | 15 + inference-engine/src/mkldnn_plugin/config.h | 9 +- .../mkldnn_plugin/mkldnn_infer_request.cpp | 1 + .../src/mkldnn_plugin/mkldnn_plugin.cpp | 3 +- .../nodes/mkldnn_bin_conv_node.cpp | 6 + .../mkldnn_plugin/nodes/mkldnn_conv_node.cpp | 7 +- .../nodes/mkldnn_deconv_node.cpp | 2 + .../nodes/mkldnn_fullyconnected_node.cpp | 2 + .../mkldnn_plugin/nodes/mkldnn_mvn_node.cpp | 2 + .../nodes/mkldnn_quantize_node.cpp | 12 +- .../nodes/mkldnn_quantize_node.h | 20 +- .../nodes/mkldnn_resample_node.cpp | 2 + .../src/mkldnn_plugin/nodes/select.cpp | 283 ++++--- .../convert_reduce_to_pooling.hpp | 22 +- .../myriad_executable_network.cpp | 15 +- .../myriad_plugin/myriad_executable_network.h | 8 +- .../src/vpu/myriad_plugin/myriad_executor.cpp | 28 +- .../src/vpu/myriad_plugin/myriad_executor.h | 5 +- .../vpu/myriad_plugin/myriad_mvnc_wraper.cpp | 22 +- .../vpu/myriad_plugin/myriad_mvnc_wraper.h | 18 +- .../src/vpu/myriad_plugin/myriad_plugin.cpp | 8 +- .../src/vpu/myriad_plugin/myriad_plugin.h | 2 +- .../single_layer_tests/select.cpp | 86 ++ .../single_layer_tests/select.cpp | 33 + .../include/single_layer_tests/select.hpp | 34 + .../shared/src/single_layer_tests/select.cpp | 99 +++ .../functional_test_utils/blob_utils.hpp | 8 + .../include/ngraph_functions/select.hpp | 51 ++ .../tests/ngraph_functions/src/select.cpp | 32 + .../unit/engines/vpu/mvnc/watchdog_tests.cpp | 253 ++---- .../myriad_tests/helpers/myriad_mvnc_stub.h | 2 + .../cnn_ngraph_impl_tests.cpp | 94 +++ .../thirdparty/clDNN/api/device.hpp | 8 + .../thirdparty/clDNN/src/gpu/device_info.cpp | 117 ++- .../thirdparty/clDNN/src/gpu/device_info.h | 4 +- .../thirdparty/clDNN/src/gpu/ocl_builder.cpp | 88 +- .../thirdparty/clDNN/src/gpu/ocl_builder.h | 10 +- .../thirdparty/clDNN/src/gpu/ocl_toolkit.cpp | 1 - .../graph_optimizer/prepare_buffer_fusing.cpp | 4 + .../clDNN/tests/test_cases/crop_gpu_test.cpp | 41 + .../thirdparty/mkl-dnn/include/mkldnn.h | 15 +- .../thirdparty/mkl-dnn/include/mkldnn.hpp | 45 +- .../mkl-dnn/src/common/primitive_attr.cpp | 66 +- .../mkl-dnn/src/common/primitive_attr.hpp | 32 +- .../thirdparty/movidius/mvnc/CMakeLists.txt | 4 +- .../thirdparty/movidius/mvnc/include/mvnc.h | 12 +- .../movidius/mvnc/include/ncPrivateTypes.h | 2 +- .../movidius/mvnc/include/watchdog/watchdog.h | 34 +- .../mvnc/include/watchdog/watchdogPrivate.hpp | 60 +- .../mvnc/include/watchdog/xlink_device.h | 25 + .../thirdparty/movidius/mvnc/src/mvnc_api.c | 32 +- .../movidius/mvnc/src/watchdog/watchdog.cpp | 752 +++++++----------- .../mvnc/src/watchdog/xlink_device.cpp | 173 ++++ .../extensions/front/tf/activation_ext.py | 2 +- 72 files changed, 2377 insertions(+), 1080 deletions(-) create mode 100644 inference-engine/cmake/FindTBB.cmake create mode 100644 inference-engine/cmake/tbb/lnx/TBBConfig.cmake create mode 100644 inference-engine/cmake/tbb/mac/TBBConfig.cmake create mode 100644 inference-engine/cmake/tbb/win/TBBConfig.cmake create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/select.cpp create mode 100644 inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/select.cpp create mode 100644 inference-engine/tests/functional/plugin/shared/include/single_layer_tests/select.hpp create mode 100644 inference-engine/tests/functional/plugin/shared/src/single_layer_tests/select.cpp create mode 100644 inference-engine/tests/ngraph_functions/include/ngraph_functions/select.hpp create mode 100644 inference-engine/tests/ngraph_functions/src/select.cpp create mode 100644 inference-engine/thirdparty/movidius/mvnc/include/watchdog/xlink_device.h create mode 100644 inference-engine/thirdparty/movidius/mvnc/src/watchdog/xlink_device.cpp diff --git a/inference-engine/cmake/FindTBB.cmake b/inference-engine/cmake/FindTBB.cmake new file mode 100644 index 00000000000000..688e6fb46dc3ca --- /dev/null +++ b/inference-engine/cmake/FindTBB.cmake @@ -0,0 +1,32 @@ +# Copyright (C) 2020 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +######################################################################## +# +# Perform search of TBB package corresponding with specified search order. +# +# TBBROOT var is set into external package path or has a default value +# with IE own version of TBB. Search order is next: +# 1) ${TBBROOT}/cmake +# 2) ${TBBROOT} with IE own version of TBBConfig.cmake (actual for TBB < 2017.7) +# + +## Path to IE own version of TBBConfig.cmake old TBB version without cmake config. +if(APPLE) + set(IE_OWN_TBB_CONFIG tbb/mac) +elseif(UNIX) + set(IE_OWN_TBB_CONFIG tbb/lnx) +elseif(WIN) + set(IE_OWN_TBB_CONFIG tbb/win) +else() + unset(IE_OWN_TBB_CONFIG) +endif() + +find_package(TBB + CONFIG + NO_DEFAULT_PATH + PATHS ${TBBROOT}/cmake + ${CMAKE_CURRENT_LIST_DIR}/${IE_OWN_TBB_CONFIG} +) +find_package_handle_standard_args(TBB CONFIG_MODE) diff --git a/inference-engine/cmake/dependencies.cmake b/inference-engine/cmake/dependencies.cmake index ff1b9e7a931abf..141e83f5f1f22c 100644 --- a/inference-engine/cmake/dependencies.cmake +++ b/inference-engine/cmake/dependencies.cmake @@ -77,7 +77,7 @@ endif () ## TBB package if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") - reset_deps_cache(TBBROOT TBB_DIR) + reset_deps_cache(TBBROOT) if(NOT DEFINED TBB_DIR AND NOT DEFINED ENV{TBB_DIR}) if (WIN32 AND X86_64) @@ -85,25 +85,21 @@ if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") RESOLVE_DEPENDENCY(TBB ARCHIVE_WIN "tbb2020_20200415_win.zip" TARGET_PATH "${TEMP}/tbb" - ENVIRONMENT "TBBROOT" - VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*") + ENVIRONMENT "TBBROOT") elseif(ANDROID) # Should be before LINUX due LINUX is detected as well RESOLVE_DEPENDENCY(TBB ARCHIVE_ANDROID "tbb2020_20200404_android.tgz" TARGET_PATH "${TEMP}/tbb" - ENVIRONMENT "TBBROOT" - VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*") + ENVIRONMENT "TBBROOT") elseif(LINUX AND X86_64) RESOLVE_DEPENDENCY(TBB ARCHIVE_LIN "tbb2020_20200415_lin_strip.tgz" - TARGET_PATH "${TEMP}/tbb" - ENVIRONMENT "TBBROOT") + TARGET_PATH "${TEMP}/tbb") elseif(APPLE AND X86_64) RESOLVE_DEPENDENCY(TBB ARCHIVE_MAC "tbb2020_20200404_mac.tgz" TARGET_PATH "${TEMP}/tbb" - ENVIRONMENT "TBBROOT" - VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*") + ENVIRONMENT "TBBROOT") else() message(FATAL_ERROR "TBB is not available on current platform") endif() @@ -116,12 +112,11 @@ if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") endif() update_deps_cache(TBBROOT "${TBB}" "Path to TBB root folder") - update_deps_cache(TBB_DIR "${TBBROOT}/cmake" "Path to TBB package folder") if (WIN32) - log_rpath_from_dir(TBB "${TBB_DIR}/../bin") + log_rpath_from_dir(TBB "${TBB}/bin") else () - log_rpath_from_dir(TBB "${TBB_DIR}/../lib") + log_rpath_from_dir(TBB "${TBB}/lib") endif () debug_message(STATUS "tbb=" ${TBB}) endif () diff --git a/inference-engine/cmake/ie_parallel.cmake b/inference-engine/cmake/ie_parallel.cmake index 0f3c41e0a5c61d..ad30184cc1ee82 100644 --- a/inference-engine/cmake/ie_parallel.cmake +++ b/inference-engine/cmake/ie_parallel.cmake @@ -2,6 +2,20 @@ # SPDX-License-Identifier: Apache-2.0 # +if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") + find_package(TBB COMPONENTS tbb tbbmalloc) + if (TBB_FOUND) + if (${TBB_VERSION} VERSION_LESS 2020) + ext_message(WARNING "TBB version is less than OpenVINO recommends to use.\ + Some TBB related features like NUMA-aware tbb::task_arena\ + execution will be disabled.") + endif() + else () + ext_message(WARNING "TBB was not found by the configured TBB_DIR/TBBROOT path. \ + SEQ method will be used.") + endif () +endif() + function(set_ie_threading_interface_for TARGET_NAME) get_target_property(target_type ${TARGET_NAME} TYPE) if(target_type STREQUAL "INTERFACE_LIBRARY") @@ -48,7 +62,6 @@ function(set_ie_threading_interface_for TARGET_NAME) set(IE_THREAD_DEFINE "IE_THREAD_SEQ") if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") - find_package(TBB COMPONENTS tbb tbbmalloc) if (TBB_FOUND) set(IE_THREAD_DEFINE "IE_THREAD_TBB") ie_target_link_libraries(${TARGET_NAME} ${LINK_TYPE} ${TBB_IMPORTED_TARGETS}) diff --git a/inference-engine/cmake/tbb/lnx/TBBConfig.cmake b/inference-engine/cmake/tbb/lnx/TBBConfig.cmake new file mode 100644 index 00000000000000..e7dd356b39cd82 --- /dev/null +++ b/inference-engine/cmake/tbb/lnx/TBBConfig.cmake @@ -0,0 +1,196 @@ +#=============================================================================== +# Copyright 2017-2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +# TBB_FOUND should not be set explicitly. It is defined automatically by CMake. +# Handling of TBB_VERSION is in TBBConfigVersion.cmake. + +if (NOT TBB_FIND_COMPONENTS) + set(TBB_FIND_COMPONENTS "tbb;tbbmalloc;tbbmalloc_proxy") + foreach (_tbb_component ${TBB_FIND_COMPONENTS}) + set(TBB_FIND_REQUIRED_${_tbb_component} 1) + endforeach() +endif() + +# Add components with internal dependencies: tbbmalloc_proxy -> tbbmalloc +list(FIND TBB_FIND_COMPONENTS tbbmalloc_proxy _tbbmalloc_proxy_ix) +if (NOT _tbbmalloc_proxy_ix EQUAL -1) + list(FIND TBB_FIND_COMPONENTS tbbmalloc _tbbmalloc_ix) + if (_tbbmalloc_ix EQUAL -1) + list(APPEND TBB_FIND_COMPONENTS tbbmalloc) + set(TBB_FIND_REQUIRED_tbbmalloc ${TBB_FIND_REQUIRED_tbbmalloc_proxy}) + endif() +endif() + +if (NOT TBBROOT) + if(DEFINED ENV{TBBROOT}) + set (TBBROOT $ENV{TBBROOT}) + endif() +endif() + +set(_tbb_root ${TBBROOT}) + +set(_tbb_x32_subdir ia32) +set(_tbb_x64_subdir intel64) + +if (CMAKE_SIZEOF_VOID_P EQUAL 8) + set(_tbb_arch_subdir ${_tbb_x64_subdir}) +else() + set(_tbb_arch_subdir ${_tbb_x32_subdir}) +endif() + +if (CMAKE_CXX_COMPILER_LOADED) + set(_tbb_compiler_id ${CMAKE_CXX_COMPILER_ID}) + set(_tbb_compiler_ver ${CMAKE_CXX_COMPILER_VERSION}) +elseif (CMAKE_C_COMPILER_LOADED) + set(_tbb_compiler_id ${CMAKE_C_COMPILER_ID}) + set(_tbb_compiler_ver ${CMAKE_C_COMPILER_VERSION}) +endif() + +# For non-GCC compilers try to find version of system GCC to choose right compiler subdirectory. +if (NOT _tbb_compiler_id STREQUAL "GNU") + execute_process(COMMAND gcc --version OUTPUT_VARIABLE _tbb_gcc_ver_output ERROR_QUIET) + string(REGEX REPLACE ".*gcc.*([0-9]+\\.[0-9]+)\\.[0-9]+.*" "\\1" _tbb_compiler_ver "${_tbb_gcc_ver_output}") + if (NOT _tbb_compiler_ver) + message(FATAL_ERROR "This Intel TBB package is intended to be used only environment with available 'gcc'") + endif() + unset(_tbb_gcc_ver_output) +endif() + +if (EXISTS "${_tbb_root}/lib/${_tbb_arch_subdir}") + set(_tbb_lib ${_tbb_root}/lib/${_tbb_arch_subdir}) + set(_tbb_inc ${_tbb_root}/include) + + file(GLOB _tbb_gcc_versions_available RELATIVE ${_tbb_lib} ${_tbb_lib}/*) + # shall we check _tbb_gcc_versions_available is not empty? + foreach (_tbb_gcc_version ${_tbb_gcc_versions_available}) + string(SUBSTRING ${_tbb_gcc_version} 3 -1 _tbb_gcc_version_number) + if (NOT _tbb_compiler_ver VERSION_LESS _tbb_gcc_version_number) + set(_tbb_compiler_subdir ${_tbb_gcc_version}) + endif() + endforeach() +else() + if (TBBROOT) + set(__tbb_hint_path "${TBBROOT}") + else() + set(__tbb_hint_path "/non/existing/path") + endif() + + # try to find TBB in the system + find_library(_tbb_lib NAMES tbb + HINTS "${__tbb_hint_path}" + PATH_SUFFIXES lib lib64) + find_path(_tbb_inc NAMES tbb.h + HINTS "${__tbb_hint_path}" + PATH_SUFFIXES include tbb include/tbb) + unset(__tbb_hint_path) + + if (NOT _tbb_lib OR NOT _tbb_inc) + message("FATAL_ERROR" "Cannot find TBB") + endif() + + get_filename_component(_tbb_lib "${_tbb_lib}" PATH) + get_filename_component(_tbb_inc "${_tbb_inc}" PATH) + + set(_tbb_arch_subdir "") + set(_tbb_compiler_subdir "") +endif() + +unset(_tbb_gcc_version_number) +unset(_tbb_compiler_id) +unset(_tbb_compiler_ver) + +# Now we check that all the needed component are present +get_filename_component(_tbb_lib_path "${_tbb_lib}/${_tbb_compiler_subdir}" ABSOLUTE) + +if (TBB_FOUND) + return() +endif() + +# detect version +find_file(_tbb_def_header tbb_stddef.h HINTS "${_tbb_root}/include/tbb") + +if (_tbb_def_header) + file(READ "${_tbb_def_header}" _tbb_def_content) + string(REGEX MATCH "TBB_VERSION_MAJOR[ ]*[0-9]*" _tbb_version_major ${_tbb_def_content}) + string(REGEX MATCH "[0-9][0-9]*" _tbb_version_major ${_tbb_version_major}) + + string(REGEX MATCH "TBB_VERSION_MINOR[ ]*[0-9]" _tbb_version_minor ${_tbb_def_content}) + string(REGEX MATCH "[0-9][0-9]*" _tbb_version_minor ${_tbb_version_minor}) + + set(TBB_VERSION "${_tbb_version_major}.${_tbb_version_minor}") +else() + set(TBB_VERSION "") +endif() + +foreach (_tbb_soversion 2 12) +foreach (_tbb_component ${TBB_FIND_COMPONENTS}) + set(_tbb_release_lib + "${_tbb_lib_path}/lib${_tbb_component}.so.${_tbb_soversion}") + set(_tbb_debug_lib + "${_tbb_lib_path}/lib${_tbb_component}_debug.so.${_tbb_soversion}") + + # oneDNN change: check library existence (BUILD_MODE related only, not both) + string(TOUPPER "${CMAKE_BUILD_TYPE}" UPPERCASE_CMAKE_BUILD_TYPE) + if (UPPERCASE_CMAKE_BUILD_TYPE STREQUAL "DEBUG") + if (EXISTS "${_tbb_debug_lib}") + set(_lib_exists TRUE) + elseif (EXISTS "${_tbb_release_lib}") + message(FATAL_ERROR + "Intel TBB release library is found here: ${_tbb_release_lib}. " + "But the debug library + (lib${_tbb_component}_debug.so.${_tbb_soversion}) is missing.") + endif() + else() + if (EXISTS "${_tbb_release_lib}") + set(_lib_exists TRUE) + endif() + endif() + + if (_lib_exists) + if (NOT TARGET TBB::${_tbb_component}) + add_library(TBB::${_tbb_component} SHARED IMPORTED) + set_target_properties(TBB::${_tbb_component} PROPERTIES + IMPORTED_CONFIGURATIONS "RELEASE;DEBUG" + IMPORTED_LOCATION_RELEASE "${_tbb_release_lib}" + IMPORTED_LOCATION_DEBUG "${_tbb_debug_lib}" + INTERFACE_INCLUDE_DIRECTORIES "${_tbb_inc}") + + # Add internal dependencies for imported targets: TBB::tbbmalloc_proxy -> TBB::tbbmalloc + if (_tbb_component STREQUAL tbbmalloc_proxy) + set_target_properties(TBB::tbbmalloc_proxy PROPERTIES INTERFACE_LINK_LIBRARIES TBB::tbbmalloc) + endif() + + list(APPEND TBB_IMPORTED_TARGETS TBB::${_tbb_component}) + set(TBB_${_tbb_component}_FOUND 1) + endif() + break() + endif() +endforeach() +endforeach() + +if (NOT _lib_exists AND TBB_FIND_REQUIRED AND TBB_FIND_REQUIRED_${_tbb_component}) + message(FATAL_ERROR "Missed required Intel TBB component: ${_tbb_component}") +endif() + +unset(_tbb_x32_subdir) +unset(_tbb_x64_subdir) +unset(_tbb_arch_subdir) +unset(_tbb_compiler_subdir) +unset(_tbbmalloc_proxy_ix) +unset(_tbbmalloc_ix) +unset(_tbb_lib_path) +unset(_tbb_release_lib) +unset(_tbb_debug_lib) diff --git a/inference-engine/cmake/tbb/mac/TBBConfig.cmake b/inference-engine/cmake/tbb/mac/TBBConfig.cmake new file mode 100644 index 00000000000000..602dceff5efd86 --- /dev/null +++ b/inference-engine/cmake/tbb/mac/TBBConfig.cmake @@ -0,0 +1,114 @@ +#=============================================================================== +# Copyright 2017-2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +# TBB_FOUND should not be set explicitly. It is defined automatically by CMake. +# Handling of TBB_VERSION is in TBBConfigVersion.cmake. + +if (NOT TBB_FIND_COMPONENTS) + set(TBB_FIND_COMPONENTS "tbb;tbbmalloc;tbbmalloc_proxy") + foreach (_tbb_component ${TBB_FIND_COMPONENTS}) + set(TBB_FIND_REQUIRED_${_tbb_component} 1) + endforeach() +endif() + +# Add components with internal dependencies: tbbmalloc_proxy -> tbbmalloc +list(FIND TBB_FIND_COMPONENTS tbbmalloc_proxy _tbbmalloc_proxy_ix) +if (NOT _tbbmalloc_proxy_ix EQUAL -1) + list(FIND TBB_FIND_COMPONENTS tbbmalloc _tbbmalloc_ix) + if (_tbbmalloc_ix EQUAL -1) + list(APPEND TBB_FIND_COMPONENTS tbbmalloc) + set(TBB_FIND_REQUIRED_tbbmalloc ${TBB_FIND_REQUIRED_tbbmalloc_proxy}) + endif() +endif() + +if (NOT TBBROOT) + if(DEFINED ENV{TBBROOT}) + set (TBBROOT $ENV{TBBROOT}) + else() + message("FATAL_ERROR" "TBBROOT is unset") + endif() +endif() + +set(_tbb_root ${TBBROOT}) + +set(_tbb_x32_subdir .) +set(_tbb_x64_subdir .) + +if (CMAKE_SIZEOF_VOID_P EQUAL 8) + set(_tbb_arch_subdir ${_tbb_x64_subdir}) +else() + set(_tbb_arch_subdir ${_tbb_x32_subdir}) +endif() + +set(_tbb_compiler_subdir .) + +get_filename_component(_tbb_lib_path "${_tbb_root}/lib/${_tbb_arch_subdir}/${_tbb_compiler_subdir}" ABSOLUTE) + +if (TBB_FOUND) + return() +endif() + +# detect version +find_file(_tbb_def_header tbb_stddef.h HINTS "${_tbb_root}/include/tbb") + +if (_tbb_def_header) + file(READ "${_tbb_def_header}" _tbb_def_content) + string(REGEX MATCH "TBB_VERSION_MAJOR[ ]*[0-9]*" _tbb_version_major ${_tbb_def_content}) + string(REGEX MATCH "[0-9][0-9]*" _tbb_version_major ${_tbb_version_major}) + + string(REGEX MATCH "TBB_VERSION_MINOR[ ]*[0-9]" _tbb_version_minor ${_tbb_def_content}) + string(REGEX MATCH "[0-9][0-9]*" _tbb_version_minor ${_tbb_version_minor}) + + set(TBB_VERSION "${_tbb_version_major}.${_tbb_version_minor}") +else() + set(TBB_VERSION "") +endif() + +foreach (_tbb_component ${TBB_FIND_COMPONENTS}) + set(_tbb_release_lib "${_tbb_lib_path}/lib${_tbb_component}.dylib") + set(_tbb_debug_lib "${_tbb_lib_path}/lib${_tbb_component}_debug.dylib") + + if (EXISTS "${_tbb_release_lib}" AND EXISTS "${_tbb_debug_lib}") + if (NOT TARGET TBB::${_tbb_component}) + add_library(TBB::${_tbb_component} SHARED IMPORTED) + set_target_properties(TBB::${_tbb_component} PROPERTIES + IMPORTED_CONFIGURATIONS "RELEASE;DEBUG" + IMPORTED_LOCATION_RELEASE "${_tbb_release_lib}" + IMPORTED_LOCATION_DEBUG "${_tbb_debug_lib}" + INTERFACE_INCLUDE_DIRECTORIES "${_tbb_root}/include") + + # Add internal dependencies for imported targets: TBB::tbbmalloc_proxy -> TBB::tbbmalloc + if (_tbb_component STREQUAL tbbmalloc_proxy) + set_target_properties(TBB::tbbmalloc_proxy PROPERTIES INTERFACE_LINK_LIBRARIES TBB::tbbmalloc) + endif() + + list(APPEND TBB_IMPORTED_TARGETS TBB::${_tbb_component}) + set(TBB_${_tbb_component}_FOUND 1) + endif() + elseif (TBB_FIND_REQUIRED AND TBB_FIND_REQUIRED_${_tbb_component}) + message(FATAL_ERROR "Missed required Intel TBB component: ${_tbb_component}") + endif() +endforeach() + +unset(_tbb_x32_subdir) +unset(_tbb_x64_subdir) +unset(_tbb_arch_subdir) +unset(_tbb_compiler_subdir) +unset(_tbbmalloc_proxy_ix) +unset(_tbbmalloc_ix) +unset(_tbb_lib_path) +unset(_tbb_release_lib) +unset(_tbb_debug_lib) diff --git a/inference-engine/cmake/tbb/win/TBBConfig.cmake b/inference-engine/cmake/tbb/win/TBBConfig.cmake new file mode 100644 index 00000000000000..46fc3ada82a6f3 --- /dev/null +++ b/inference-engine/cmake/tbb/win/TBBConfig.cmake @@ -0,0 +1,140 @@ +#=============================================================================== +# Copyright 2017-2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +# TBB_FOUND should not be set explicitly. It is defined automatically by CMake. +# Handling of TBB_VERSION is in TBBConfigVersion.cmake. + +if (NOT TBB_FIND_COMPONENTS) + set(TBB_FIND_COMPONENTS "tbb;tbbmalloc;tbbmalloc_proxy") + foreach (_tbb_component ${TBB_FIND_COMPONENTS}) + set(TBB_FIND_REQUIRED_${_tbb_component} 1) + endforeach() +endif() + +# Add components with internal dependencies: tbbmalloc_proxy -> tbbmalloc +list(FIND TBB_FIND_COMPONENTS tbbmalloc_proxy _tbbmalloc_proxy_ix) +if (NOT _tbbmalloc_proxy_ix EQUAL -1) + list(FIND TBB_FIND_COMPONENTS tbbmalloc _tbbmalloc_ix) + if (_tbbmalloc_ix EQUAL -1) + list(APPEND TBB_FIND_COMPONENTS tbbmalloc) + set(TBB_FIND_REQUIRED_tbbmalloc ${TBB_FIND_REQUIRED_tbbmalloc_proxy}) + endif() +endif() + +if (NOT TBBROOT) + if(DEFINED ENV{TBBROOT}) + set (TBBROOT $ENV{TBBROOT}) + else() + message("FATAL_ERROR" "TBBROOT is unset") + endif() +endif() + +set(_tbb_root ${TBBROOT}) + +set(_tbb_x32_subdir ia32) +set(_tbb_x64_subdir intel64) + +if (CMAKE_SIZEOF_VOID_P EQUAL 8) + set(_tbb_arch_subdir ${_tbb_x64_subdir}) +else() + set(_tbb_arch_subdir ${_tbb_x32_subdir}) +endif() + +if (NOT MSVC) + message(FATAL_ERROR "This Intel TBB package is intended to be used only in the project with MSVC") +endif() + +# Detect the most relevant MSVC subdirectory +set(_tbb_msvc_1700_subdir vc11) +set(_tbb_msvc_1800_subdir vc12) +set(_tbb_msvc_1900_subdir vc14) +set(_tbb_msvc_ver ${MSVC_VERSION}) +if (MSVC_VERSION VERSION_LESS 1700) + message(FATAL_ERROR "This Intel TBB package is intended to be used only in the project with MSVC version 1700 (vc11) or higher") +elseif (MSVC_VERSION VERSION_GREATER 1900) + set(_tbb_msvc_ver 1900) +endif() +set(_tbb_compiler_subdir ${_tbb_msvc_${_tbb_msvc_ver}_subdir}) +unset(_tbb_msvc_1700_subdir) +unset(_tbb_msvc_1800_subdir) +unset(_tbb_msvc_1900_subdir) + +if (WINDOWS_STORE) + set(_tbb_compiler_subdir ${_tbb_compiler_subdir}_ui) +endif() + +#set conveniance variable to locate TBB files (these are used for a PSXE install) +get_filename_component(_tbb_lib_path "${_tbb_root}/lib/${_tbb_arch_subdir}/${_tbb_compiler_subdir}" ABSOLUTE) +get_filename_component(_tbb_inc_path "${_tbb_root}/include/" ABSOLUTE) + +if (TBB_FOUND) + return() +endif() + +# detect version +find_file(_tbb_def_header tbb_stddef.h HINTS "${_tbb_root}/include/tbb") + +if (_tbb_def_header) + file(READ "${_tbb_def_header}" _tbb_def_content) + string(REGEX MATCH "TBB_VERSION_MAJOR[ ]*[0-9]*" _tbb_version_major ${_tbb_def_content}) + string(REGEX MATCH "[0-9][0-9]*" _tbb_version_major ${_tbb_version_major}) + + string(REGEX MATCH "TBB_VERSION_MINOR[ ]*[0-9]" _tbb_version_minor ${_tbb_def_content}) + string(REGEX MATCH "[0-9][0-9]*" _tbb_version_minor ${_tbb_version_minor}) + + set(TBB_VERSION "${_tbb_version_major}.${_tbb_version_minor}") +else() + set(TBB_VERSION "") +endif() + +foreach (_tbb_component ${TBB_FIND_COMPONENTS}) + set(_tbb_release_lib "${_tbb_lib_path}/${_tbb_component}.lib") + set(_tbb_debug_lib "${_tbb_lib_path}/${_tbb_component}_debug.lib") + + if (EXISTS "${_tbb_release_lib}" AND EXISTS "${_tbb_debug_lib}") + if (NOT TARGET TBB::${_tbb_component}) + add_library(TBB::${_tbb_component} SHARED IMPORTED) + set_target_properties(TBB::${_tbb_component} PROPERTIES + IMPORTED_CONFIGURATIONS "RELEASE;DEBUG" + IMPORTED_LOCATION_RELEASE "${_tbb_release_lib}" + IMPORTED_LOCATION_DEBUG "${_tbb_debug_lib}" + INTERFACE_INCLUDE_DIRECTORIES "${_tbb_inc_path}" + IMPORTED_IMPLIB_RELEASE "${_tbb_release_lib}" + IMPORTED_IMPLIB_DEBUG "${_tbb_debug_lib}" + INTERFACE_COMPILE_DEFINITIONS "__TBB_NO_IMPLICIT_LINKAGE=1") + + # Add internal dependencies for imported targets: TBB::tbbmalloc_proxy -> TBB::tbbmalloc + if (_tbb_component STREQUAL tbbmalloc_proxy) + set_target_properties(TBB::tbbmalloc_proxy PROPERTIES INTERFACE_LINK_LIBRARIES TBB::tbbmalloc) + endif() + + list(APPEND TBB_IMPORTED_TARGETS TBB::${_tbb_component}) + set(TBB_${_tbb_component}_FOUND 1) + endif() + elseif (TBB_FIND_REQUIRED AND TBB_FIND_REQUIRED_${_tbb_component}) + message(FATAL_ERROR "Missed required Intel TBB component: ${_tbb_component}") + endif() +endforeach() + +unset(_tbb_x32_subdir) +unset(_tbb_x64_subdir) +unset(_tbb_arch_subdir) +unset(_tbb_compiler_subdir) +unset(_tbbmalloc_proxy_ix) +unset(_tbbmalloc_ix) +unset(_tbb_lib_path) +unset(_tbb_release_lib) +unset(_tbb_debug_lib) diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp index 4161630a705152..f522dd9800dff3 100644 --- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp @@ -331,7 +331,9 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map configKeys; for (auto opt : _impl->m_config.key_config_map) diff --git a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp index 0f217dbd35fcda..08aee218cdfb8d 100644 --- a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp @@ -250,6 +250,7 @@ void CLDNNInferRequest::copyInputData(std::shared_ptr network, case Precision::BOOL: { uint8_t* blob_ptr = const_cast(locked.as()) + offset; network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); + break; } default: THROW_IE_EXCEPTION << "The plugin does not support input " << inputBlob.getTensorDesc().getPrecision() << " precision"; diff --git a/inference-engine/src/gna_plugin/gna_slope_scale.h b/inference-engine/src/gna_plugin/gna_slope_scale.h index a4b9f0eef7d67b..967bdeb393dea3 100644 --- a/inference-engine/src/gna_plugin/gna_slope_scale.h +++ b/inference-engine/src/gna_plugin/gna_slope_scale.h @@ -7,9 +7,9 @@ #include typedef struct { - double slope; + double slope {}; uint64_t slope_scale = 0; - uint32_t slope_scale_index; + uint32_t slope_scale_index {}; } pwl_gna_slope_scale_t; pwl_gna_slope_scale_t gna_slope(const double slope, const double in_scale, const double out_scale); diff --git a/inference-engine/src/inference_engine/convert_function_to_cnn_network.cpp b/inference-engine/src/inference_engine/convert_function_to_cnn_network.cpp index 0d41c87f5bade1..36a89979b2bdd7 100644 --- a/inference-engine/src/inference_engine/convert_function_to_cnn_network.cpp +++ b/inference-engine/src/inference_engine/convert_function_to_cnn_network.cpp @@ -138,6 +138,7 @@ std::shared_ptr convertFunctionToICNNNetwork(const std::shared_p std::make_shared>(), std::make_shared>(), std::make_shared>(), + std::make_shared>(), std::make_shared>(), std::make_shared>(), std::make_shared>(), diff --git a/inference-engine/src/inference_engine/ie_cnn_layer_builder_ngraph.cpp b/inference-engine/src/inference_engine/ie_cnn_layer_builder_ngraph.cpp index 4b8606cb2a92ea..253bacbcd4860e 100644 --- a/inference-engine/src/inference_engine/ie_cnn_layer_builder_ngraph.cpp +++ b/inference-engine/src/inference_engine/ie_cnn_layer_builder_ngraph.cpp @@ -1367,6 +1367,24 @@ CNNLayer::Ptr NodeConverter::createLayer(const std::shared_ptr< return res; } +template <> +CNNLayer::Ptr NodeConverter::createLayer(const std::shared_ptr& layer) const { + LayerParams params = {layer->get_friendly_name(), "Select", details::ngraph::convertPrecision(layer->get_output_element_type(0))}; + + auto res = std::make_shared(params); + auto castedLayer = ngraph::as_type_ptr(layer); + if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name; + + auto broadcast = castedLayer->get_auto_broadcast().m_type; + if (broadcast == ngraph::op::AutoBroadcastType::NUMPY) { + res->params["auto_broadcast"] = "numpy"; + } else if (broadcast == ngraph::op::AutoBroadcastType::NONE) { + res->params["auto_broadcast"] = "none"; + } + + return res; +} + template <> CNNLayer::Ptr NodeConverter::createLayer(const std::shared_ptr& layer) const { LayerParams params = {layer->get_friendly_name(), "Eltwise", diff --git a/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.cpp b/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.cpp index 1c34f888b7baff..6570363621b210 100644 --- a/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.cpp +++ b/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.cpp @@ -19,16 +19,148 @@ #include "ie_ir_reader.hpp" #include "ie_profiling.hpp" #include "parsers.h" +#include "blob_factory.hpp" +#include "debug.h" #include "xml_parse_utils.h" using namespace std; using namespace InferenceEngine; using namespace InferenceEngine::details; +using namespace XMLParseUtils; IE_SUPPRESS_DEPRECATED_START CNNNetReaderImpl::CNNNetReaderImpl(const FormatParserCreator::Ptr& _creator) : parseSuccess(false), _version(0), parserCreator(_creator) {} +#if defined(ENABLE_IR_READER) +static void parsePreProcess(std::shared_ptr& network, const pugi::xml_node& root, const TBlob::Ptr& weights) { + /* + + + // in case of array – ref to the .bin file + + + */ + + auto ppNode = root.child("pre-process"); + if (ppNode.empty()) { + return; + } + // find out to what input this belongs to + std::string inputName; + InputInfo::Ptr preProcessInput; + + inputName = GetStrAttr(ppNode, "reference-layer-name", ""); + inputName = details::trim(inputName); + InputsDataMap inputs; + network->getInputsInfo(inputs); + if (inputName.empty()) { + // fallback (old format), look for the picture in the inputs + if (inputs.empty()) THROW_IE_EXCEPTION << "network has no input"; + + for (auto i : inputs) { + if (i.second->getTensorDesc().getDims().size() == 4) { + preProcessInput = i.second; + break; + } + } + if (!preProcessInput) { + preProcessInput = inputs.begin()->second; + } + + inputName = preProcessInput->name(); + } else { + preProcessInput = inputs.at(inputName); + if (!preProcessInput) + THROW_IE_EXCEPTION << "pre-process name ref '" << inputName << "' refers to un-existing input"; + } + + // dims vector without batch size + SizeVector inputDims = preProcessInput->getTensorDesc().getDims(); + size_t noOfChannels = 0, width = 0, height = 0; + + if (inputDims.size() < 2) { + THROW_IE_EXCEPTION << "network did not define input dimensions properly"; + } else if (inputDims.size() == 2) { // NC + noOfChannels = inputDims[1]; + width = inputDims[1]; + height = inputDims[0]; + } else if (inputDims.size() == 3) { + width = inputDims[2]; + height = inputDims[1]; + noOfChannels = inputDims[0]; + } else if (inputDims.size() == 4) { + width = inputDims[3]; + height = inputDims[2]; + noOfChannels = inputDims[1]; + } else if (inputDims.size() == 5) { + width = inputDims[4]; + height = inputDims[3]; + noOfChannels = inputDims[2]; + } + + PreProcessInfo& pp = preProcessInput->getPreProcess(); + pp.init(noOfChannels); + + auto meanSegmentPrecision = GetPrecisionAttr(ppNode, "mean-precision", Precision::UNSPECIFIED); + if (!meanSegmentPrecision || meanSegmentPrecision == Precision::MIXED) + THROW_IE_EXCEPTION << "mean blob defined without specifying precision."; + + ResponseDesc resp; + InferenceEngine::PreProcessChannel::Ptr preProcessChannel; + + int lastChanNo = -1; + std::unordered_set idsForMeanImage; + + FOREACH_CHILD(chan, ppNode, "channel") { + int chanNo = GetIntAttr(chan, "id", lastChanNo + 1); + if (chanNo >= static_cast(noOfChannels) || chanNo < 0) { + THROW_IE_EXCEPTION << "Pre-process channel id invalid: " << chanNo; + } + lastChanNo = chanNo; + preProcessChannel = pp[chanNo]; + + auto meanNode = chan.child("mean"); + if (!meanNode.empty()) { + if (!meanNode.attribute("size")) { + THROW_IE_EXCEPTION << "mean should have the attribute: size"; + } + if (meanNode.attribute("size")) { + idsForMeanImage.insert(chanNo); + size_t size = static_cast(GetIntAttr(meanNode, "size")); + size_t offset = static_cast(GetIntAttr(meanNode, "offset")); + if (width * height * meanSegmentPrecision.size() != size) { + THROW_IE_EXCEPTION << "mean blob size mismatch expected input, got: " << size + << " extpecting " << width << " x " << height << " x " + << meanSegmentPrecision.size(); + } + preProcessChannel->meanData = make_blob_with_precision(TensorDesc(meanSegmentPrecision, {height, width}, Layout::HW)); + preProcessChannel->meanData->allocate(); + auto lockedMem = preProcessChannel->meanData->buffer(); + char* data = lockedMem.as(); + auto weightsLocked = weights->cbuffer(); + const char* origData = weightsLocked.as(); + memcpy(data, origData + offset, size); + } + } + } + + if (idsForMeanImage.size() == noOfChannels) { + pp.setVariant(MEAN_IMAGE); + } else if (idsForMeanImage.size() == 0) { + pp.setVariant(NONE); + } else { + std::string validMeanImageIds = ""; + for (auto id : idsForMeanImage) { + validMeanImageIds += std::to_string(id) + " "; + } + THROW_IE_EXCEPTION << "mean is not provided for all channels\n" + "Provided mean image for: " + << validMeanImageIds; + } + } +#endif + StatusCode CNNNetReaderImpl::SetWeights(const TBlob::Ptr& weights, ResponseDesc* desc) noexcept { if (!_parser && _version < 10) { return DescriptionBuffer(desc) << "network must be read first"; @@ -41,6 +173,9 @@ StatusCode CNNNetReaderImpl::SetWeights(const TBlob::Ptr& weights, Resp std::stringstream model; xmlDoc->save(model); network = std::make_shared(v10Reader.read(model.str(), weights)); + pugi::xml_node root = xmlDoc->document_element(); + + parsePreProcess(network, root, weights); #else return DescriptionBuffer(desc) << "Please, recompile Inference Engine with the ENABLE_IR_READER=ON Cmake option"; #endif diff --git a/inference-engine/src/inference_engine/ie_ir_parser.hpp b/inference-engine/src/inference_engine/ie_ir_parser.hpp index c7cfe5ca513015..f6d1937010bbf7 100644 --- a/inference-engine/src/inference_engine/ie_ir_parser.hpp +++ b/inference-engine/src/inference_engine/ie_ir_parser.hpp @@ -22,6 +22,7 @@ #include #include +#include #include "cnn_network_impl.hpp" #include "ie_ngraph_utils.hpp" @@ -161,6 +162,7 @@ class V10Parser : public IParser { const Blob::CPtr& weights, const GenericLayerParams& params); GenericLayerParams parseGenericParams(const pugi::xml_node& node); + void parsePreProcess(CNNNetwork& network, const pugi::xml_node& root, std::istream& binStream); std::map portsToData; std::map layersParseInfo; diff --git a/inference-engine/src/inference_engine/ie_system_conf.cpp b/inference-engine/src/inference_engine/ie_system_conf.cpp index f7aed759bf56ef..af9ad709af43bd 100644 --- a/inference-engine/src/inference_engine/ie_system_conf.cpp +++ b/inference-engine/src/inference_engine/ie_system_conf.cpp @@ -124,7 +124,11 @@ std::vector getAvailableNUMANodes() { return {0}; } #if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO)) std::vector getAvailableNUMANodes() { +#if TBB_INTERFACE_VERSION >= 11100 return tbb::info::numa_nodes(); +#else + return {0}; +#endif } #endif diff --git a/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp b/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp index d4d4b9448b946a..ab48e7fd0871a8 100644 --- a/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp +++ b/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp @@ -119,7 +119,11 @@ CPUStreamsExecutor::CPUStreamsExecutor(const IStreamsExecutor::Config& config) : #if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO auto concurrency = (0 == config._threadsPerStream) ? tbb::task_arena::automatic : config._threadsPerStream; if (ThreadBindingType::NUMA == config._threadBindingType) { +#if TBB_INTERFACE_VERSION >= 11100 // TBB has numa aware task_arena api stream._taskArena.reset(new tbb::task_arena(tbb::task_arena::constraints(stream._numaNodeId, concurrency))); +#else + stream._taskArena.reset(new tbb::task_arena(concurrency)); +#endif } else if ((0 != config._threadsPerStream) || ThreadBindingType::CORES == config._threadBindingType) { stream._taskArena.reset(new tbb::task_arena(concurrency)); if (ThreadBindingType::CORES == config._threadBindingType) { diff --git a/inference-engine/src/inference_engine/threading/ie_istreams_executor.cpp b/inference-engine/src/inference_engine/threading/ie_istreams_executor.cpp index c2ca71e25e4660..c4727471eb0dfa 100644 --- a/inference-engine/src/inference_engine/threading/ie_istreams_executor.cpp +++ b/inference-engine/src/inference_engine/threading/ie_istreams_executor.cpp @@ -30,6 +30,13 @@ std::vector IStreamsExecutor::Config::SupportedKeys() { void IStreamsExecutor::Config::SetConfig(const std::string& key, const std::string& value) { if (key == CONFIG_KEY(CPU_BIND_THREAD)) { if (value == CONFIG_VALUE(YES) || value == CONFIG_VALUE(NUMA)) { +#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) && (TBB_INTERFACE_VERSION < 11100) + if (value == CONFIG_VALUE(NUMA)) + THROW_IE_EXCEPTION << CONFIG_KEY(CPU_BIND_THREAD) << " property value was set to NUMA. But IE was built with " + << "TBB version without NUMA-aware API. Current TBB API version is " << TBB_INTERFACE_VERSION + << ", required API version 11100 or greater."; +#endif + #if (defined(__APPLE__) || defined(_WIN32)) // on the Windows and Apple the CORES and NUMA pinning options are the same _threadBindingType = IStreamsExecutor::ThreadBindingType::NUMA; diff --git a/inference-engine/src/legacy_api/src/graph_transformer.cpp b/inference-engine/src/legacy_api/src/graph_transformer.cpp index 3b0be37731c037..d2c691986b1978 100644 --- a/inference-engine/src/legacy_api/src/graph_transformer.cpp +++ b/inference-engine/src/legacy_api/src/graph_transformer.cpp @@ -221,7 +221,8 @@ const std::map ConstTransformer::getConstLayers(const std::ve // Layers with "Shape" and "Const" type are Const by definition if (layer->type == "Shape" || layer->type == "Const") { mapConstLayers[layer->name] = false; - } else if ((layer->type != "FakeQuantize") && (layer->type != "Quantize") && (!isForFakeQuantzie(*layer))) { + } else if ((layer->type != "FakeQuantize") && (layer->type != "Quantize") && + (layer->type != "Convolution") && (layer->type != "CumSum") && (!isForFakeQuantzie(*layer))) { bool isAllInputsConst = true; for (auto const& data : layer->insData) { auto creator = data.lock()->getCreatorLayer().lock(); diff --git a/inference-engine/src/legacy_api/src/ie_layer_validators.cpp b/inference-engine/src/legacy_api/src/ie_layer_validators.cpp index ccf9f74676cd44..9b4ce0218c491b 100644 --- a/inference-engine/src/legacy_api/src/ie_layer_validators.cpp +++ b/inference-engine/src/legacy_api/src/ie_layer_validators.cpp @@ -2680,40 +2680,33 @@ void NormalizeValidator::checkParams(const CNNLayer* layer) { SelectValidator::SelectValidator(const std::string& _type): LayerValidator(_type) {} void SelectValidator::checkShapes(const CNNLayer* layer, const std::vector& inShapes) const { - enum { condition, then_, else_, numOfInputs }; - auto casted = dynamic_cast(layer); - if (!casted) { - THROW_IE_EXCEPTION << layer->name << " Layer is not instance of SelectLayer class"; - } + enum { CONDITION, THEN, ELSE, numOfInputs }; size_t numInputs = inShapes.size(); - if (numOfInputs != numInputs) THROW_IE_EXCEPTION << " Select can take 3 inputs, but actually it has: " << numInputs; + if (numOfInputs != numInputs) THROW_IE_EXCEPTION << "Select layer with name '" << layer->name << "' take 3 inputs, but actually it has: " << numInputs; - if (inShapes[then_] != inShapes[else_]) { - THROW_IE_EXCEPTION << " Positive input shape should be the same as negative input shape"; - } + size_t new_rank = inShapes[ELSE].size(); + new_rank = std::max(new_rank, inShapes[THEN].size()); - if (inShapes[condition].size() > inShapes[then_].size()) { - THROW_IE_EXCEPTION << " Condition input dimensions count (" << inShapes[condition].size() - << ") should be less or equel then" - << " posititve input dimension count (" << inShapes[then_].size() << ")"; - } + if (inShapes[CONDITION].size() > new_rank) + THROW_IE_EXCEPTION << "Select layer with name '" << layer->name << "' has 'Mask' input's rank more than broadcasted 'Then' and 'Else' inputs' ranks"; - if (inShapes[condition].size() > inShapes[else_].size()) { - THROW_IE_EXCEPTION << " Condition input dimensions count (" << inShapes[condition].size() - << ") should be less or equel then" - << " negative input dimension count (" << inShapes[else_].size() << ")"; - } + for (size_t i = 0; i < new_rank; i++) { + auto in1 = i < (new_rank - inShapes[THEN].size()) ? 1 : inShapes[THEN][i - (new_rank - inShapes[THEN].size())]; + auto in2 = i < (new_rank - inShapes[ELSE].size()) ? 1 : inShapes[ELSE][i - (new_rank - inShapes[ELSE].size())]; - for (std::size_t i = 0; i < inShapes[condition].size(); ++i) { - const auto& cond_dim = inShapes[condition][inShapes[condition].size() - 1 - i]; - const auto& then_dim = inShapes[then_][inShapes[then_].size() - 1 - i]; + size_t tmp = 0; + if (in1 == in2 || in1 == 1 || in2 == 1) + tmp = std::max(in1, in2); + else + THROW_IE_EXCEPTION << "Select layer with name '" << layer->name << "' has incompatible 'Then' and 'Else' inputs' shapes"; - if (cond_dim != then_dim && cond_dim != 1) { - THROW_IE_EXCEPTION << " Condition input dimension " << (inShapes[condition].size() - 1 - i) << " (" - << cond_dim << ") should be less or equel then posititve and negative" - << " input dimension " << (inShapes[then_].size() - 1 - i) << " (" << then_dim << ")"; - } + auto in0 = i < (new_rank - inShapes[CONDITION].size()) ? 1 : inShapes[CONDITION][i - (new_rank - inShapes[CONDITION].size())]; + if (tmp == in0 || in0 == 1) + tmp = std::max(tmp, in0); + else + THROW_IE_EXCEPTION << "Select layer with name '" << layer->name + << "' has incompatible 'Mask' input's shapes and broadcasted 'Then' and 'Else' inputs' shapes"; } } diff --git a/inference-engine/src/mkldnn_plugin/config.cpp b/inference-engine/src/mkldnn_plugin/config.cpp index 694b6d07ab2c20..091c7fb72602ae 100644 --- a/inference-engine/src/mkldnn_plugin/config.cpp +++ b/inference-engine/src/mkldnn_plugin/config.cpp @@ -23,6 +23,21 @@ namespace MKLDNNPlugin { using namespace InferenceEngine; +Config::Config() { +#if (defined(__APPLE__) || defined(_WIN32)) +#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) && (TBB_INTERFACE_VERSION >= 11100) + // If we sure that TBB has NUMA aware API part. + streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NUMA; +#else + streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NONE; +#endif +#else + streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::CORES; +#endif + + updateProperties(); +} + void Config::readProperties(const std::map &prop) { auto streamExecutorConfigKeys = streamExecutorConfig.SupportedKeys(); diff --git a/inference-engine/src/mkldnn_plugin/config.h b/inference-engine/src/mkldnn_plugin/config.h index 0007bc5a198180..f61c310e52a37d 100644 --- a/inference-engine/src/mkldnn_plugin/config.h +++ b/inference-engine/src/mkldnn_plugin/config.h @@ -11,14 +11,7 @@ namespace MKLDNNPlugin { struct Config { - Config() { -#if (defined(__APPLE__) || defined(_WIN32)) - streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NUMA; -#else - streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::CORES; -#endif - updateProperties(); - } + Config(); enum LPTransformsMode { Off, diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp index 91daf28bcc63b3..f450ea1152df2f 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp @@ -120,6 +120,7 @@ void MKLDNNPlugin::MKLDNNInferRequest::InferImpl() { } break; case InferenceEngine::Precision::U8: + case InferenceEngine::Precision::BOOL: if (graph->hasMeanImageFor(input.first)) { // If a mean image exists, we convert the blob and send FP32 iconv = InferenceEngine::make_shared_blob({InferenceEngine::Precision::FP32, diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index 27ec34b8d876d5..376519562d1011 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -69,7 +69,8 @@ Engine::LoadExeNetworkImpl(const ICore * /*core*/, const InferenceEngine::ICNNNe input_precision != InferenceEngine::Precision::U16 && input_precision != InferenceEngine::Precision::I16 && input_precision != InferenceEngine::Precision::I8 && - input_precision != InferenceEngine::Precision::U8) { + input_precision != InferenceEngine::Precision::U8 && + input_precision != InferenceEngine::Precision::BOOL) { THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str << "Input image format " << input_precision << " is not supported yet..."; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp index 10a74f3640b7bb..a2bb2e4bc22b38 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp @@ -184,6 +184,7 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x); + PostOpsIntBlobMemory[blob_idx]->FillZero(); PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x, depthwiseLayer->_weights->buffer(), @@ -201,6 +202,7 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32, memory::format::x); + PostOpsIntBlobMemory[blob_idx + 1]->FillZero(); PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x, depthwiseLayer->_biases->buffer(), depthwiseLayer->_biases->size() * @@ -243,6 +245,7 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); PostOpsIntBlobMemory[blob_idx]->Create(binarizationDims, memory::data_type::f32, memory::format::x); + PostOpsIntBlobMemory[blob_idx]->FillZero(); PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x, quantizeNode->getBinarizationTresholdsPtr(), @@ -251,6 +254,7 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); PostOpsIntBlobMemory[blob_idx+1]->Create(binarizationDims, memory::data_type::f32, memory::format::x); + PostOpsIntBlobMemory[blob_idx+1]->FillZero(); PostOpsIntBlobMemory[blob_idx+1]->SetData(memory::data_type::f32, memory::x, quantizeNode->getBinarizationOutputMaskPtr(), @@ -282,6 +286,7 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool MKLDNNDims dwWeightsDims( {dw_conv_oc, (ptrdiff_t) 1, (ptrdiff_t) 1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]}); PostOpsIntBlobMemory[blob_idx]->Create(dwWeightsDims, memory::data_type::f32, w_fmt); + PostOpsIntBlobMemory[blob_idx]->FillZero(); PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::goihw, convLayer->_weights->buffer(), @@ -293,6 +298,7 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool MKLDNNDims dwBiasesDims({dw_conv_oc}); PostOpsIntBlobMemory[blob_idx + 1]->Create(dwBiasesDims, memory::data_type::f32, memory::format::x); + PostOpsIntBlobMemory[blob_idx + 1]->FillZero(); PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x, convLayer->_biases->buffer(), dwBiasesDims.size() * diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp index 4520d70da207a4..f630a511aecdaa 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp @@ -369,7 +369,7 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x); - + PostOpsIntBlobMemory[blob_idx]->FillZero(); PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x, depthwiseLayer->_weights->buffer(), depthwiseLayer->_weights->size() * @@ -386,6 +386,7 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32, memory::format::x); + PostOpsIntBlobMemory[blob_idx + 1]->FillZero(); PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x, depthwiseLayer->_biases->buffer(), depthwiseLayer->_biases->size() * @@ -438,6 +439,7 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]}); PostOpsIntBlobMemory[blob_idx]->Create(dwWeightsDims, weightsPrc, memory::format::Goihw8g); + PostOpsIntBlobMemory[blob_idx]->FillZero(); Blob::Ptr weights = convLayer->blobs.find("weights")->second; Blob::Ptr biases = convLayer->blobs.find("biases")->second; @@ -448,6 +450,7 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); MKLDNNDims dwBiasesDims({dw_conv_oc}); PostOpsIntBlobMemory[blob_idx + 1]->Create(dwBiasesDims, biasPrc, memory::format::x); + PostOpsIntBlobMemory[blob_idx + 1]->FillZero(); PostOpsIntBlobMemory[blob_idx + 1]->SetData(biasPrc, memory::x, biases->buffer(), dwBiasesDims.size() * MKLDNNExtensionUtils::sizeOfDataType(biasPrc)); ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS], @@ -498,11 +501,13 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); PostOpsIntBlobMemory[blob_idx]->Create(oScaleDims, memory::data_type::f32, memory::format::x); + PostOpsIntBlobMemory[blob_idx]->FillZero(); PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x, &oScaleDataVector[0], oScaleDataVector.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32)); PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); PostOpsIntBlobMemory[blob_idx + 1]->Create(oScaleDims, memory::data_type::f32, memory::format::x); + PostOpsIntBlobMemory[blob_idx + 1]->FillZero(); PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x, &oShiftDataVector[0], oShiftDataVector.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32)); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp index 350d717e0afb92..438b66bf19202a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp @@ -119,6 +119,7 @@ void MKLDNNDeconvolutionNode::setBiasAsPostOp() { PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); PostOpsIntBlobMemory[0]->Create(depthwiseDims, memory::data_type::f32, memory::format::x); + PostOpsIntBlobMemory[0]->FillZero(); std::vector weights(biases->size()); for (int i = 0; i < biases->size(); i++) { weights[i] = 1; @@ -128,6 +129,7 @@ void MKLDNNDeconvolutionNode::setBiasAsPostOp() { PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); PostOpsIntBlobMemory[1]->Create(depthwiseDims, memory::data_type::f32, memory::format::x); + PostOpsIntBlobMemory[1]->FillZero(); PostOpsIntBlobMemory[1]->SetData(memory::data_type::f32, memory::x, biases->buffer(), biases->size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32)); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp index 4bc22de3647ca8..e19d98d42338c1 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp @@ -199,6 +199,7 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x); + PostOpsIntBlobMemory[blob_idx]->FillZero(); PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x, depthwiseLayer->_weights->buffer(), @@ -216,6 +217,7 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32, memory::format::x); + PostOpsIntBlobMemory[blob_idx + 1]->FillZero(); PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x, depthwiseLayer->_biases->buffer(), depthwiseLayer->_biases->size() * diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp index 11224c89a5ff35..025656d78fb6e1 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp @@ -605,6 +605,7 @@ void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) { PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x); + PostOpsIntBlobMemory[blob_idx]->FillZero(); PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x, depthwiseLayer->_weights->buffer(), @@ -622,6 +623,7 @@ void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) { PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32, memory::format::x); + PostOpsIntBlobMemory[blob_idx + 1]->FillZero(); PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x, depthwiseLayer->_biases->buffer(), depthwiseLayer->_biases->size() * diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp index 5c222a078473c5..2306865bea44a9 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp @@ -468,7 +468,17 @@ void MKLDNNQuantizeNode::execute(mkldnn::stream strm) { } void MKLDNNQuantizeNode::appendPostOps(mkldnn::post_ops& ops) { - ops.append_quantization(quantizeAlgorithm , cropLow, cropHigh, inputScale, inputShift, outputScale, outputShift); + if (!isPostOpDataInitialized) { + isPostOpDataInitialized = true; + cropLowData.set(cropLow.size(), 1 << 1, &cropLow[0]); + cropHighData.set(cropHigh.size(), 1 << 1, &cropHigh[0]); + inputScaleData.set(inputScale.size(), 1 << 1, &inputScale[0]); + inputShiftData.set(inputShift.size(), 1 << 1, &inputShift[0]); + outputScaleData.set(outputScale.size(), 1 << 1, &outputScale[0]); + outputShiftData.set(outputShift.size(), 1 << 1, &outputShift[0]); + } + + ops.append_quantization(quantizeAlgorithm, &cropLowData, &cropHighData, &inputScaleData, &inputShiftData, &outputScaleData, &outputShiftData); } bool MKLDNNQuantizeNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h index 7e8b94d148fc72..e51717c3fdf6db 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h @@ -10,6 +10,7 @@ #include #include #include +#include namespace MKLDNNPlugin { @@ -39,10 +40,12 @@ class MKLDNNQuantizeNode : public MKLDNNNode { const std::vector& getInputScale() const { return inputScale; } const std::vector& getInputShift() const { return inputShift; } - void setCropLow(std::vector newCropLow) { cropLow = std::move(newCropLow); } - void setCropHigh(std::vector newCropHigh) { cropHigh = std::move(newCropHigh); } - void setInputScale(std::vector newInputScale) { inputScale = std::move(newInputScale); } - void setInputShift(std::vector newInputShift) { inputShift = std::move(newInputShift); } + void setCropLow(std::vector newCropLow) { cropLow = std::move(newCropLow); isPostOpDataInitialized = false; } + void setCropHigh(std::vector newCropHigh) { cropHigh = std::move(newCropHigh); isPostOpDataInitialized = false; } + void setInputScale(std::vector newInputScale) { inputScale = std::move(newInputScale); isPostOpDataInitialized = false; } + void setInputShift(std::vector newInputShift) { inputShift = std::move(newInputShift); isPostOpDataInitialized = false; } + void setOutputScale(std::vector newOutputScale) { outputScale = std::move(newOutputScale); isPostOpDataInitialized = false;} + void setOutputShift(std::vector newOutputShift) { outputShift = std::move(newOutputShift); isPostOpDataInitialized = false; } InferenceEngine::Precision getInputPrecision() const { return inputPrecision; } InferenceEngine::Precision getOutputPrecision() const { return outputPrecision; } @@ -65,6 +68,15 @@ class MKLDNNQuantizeNode : public MKLDNNNode { std::vector outputScale; std::vector outputShift; + // mkldnn style post ops data representation + bool isPostOpDataInitialized = false; + mkldnn::impl::shifts_t cropLowData; + mkldnn::impl::shifts_t cropHighData; + mkldnn::impl::scales_t inputScaleData; + mkldnn::impl::shifts_t inputShiftData; + mkldnn::impl::scales_t outputScaleData; + mkldnn::impl::shifts_t outputShiftData; + bool isInputLowBroadcasted = false; bool isInputHighBroadcasted = false; bool isOutputLowBroadcasted = false; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_resample_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_resample_node.cpp index 764aae805c47b0..bc095ec02f5b2f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_resample_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_resample_node.cpp @@ -435,6 +435,7 @@ void MKLDNNResampleNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeigh PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x); + PostOpsIntBlobMemory[blob_idx]->FillZero(); PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x, depthwiseLayer->_weights->buffer(), @@ -452,6 +453,7 @@ void MKLDNNResampleNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeigh PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32, memory::format::x); + PostOpsIntBlobMemory[blob_idx + 1]->FillZero(); PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x, depthwiseLayer->_biases->buffer(), depthwiseLayer->_biases->size() * diff --git a/inference-engine/src/mkldnn_plugin/nodes/select.cpp b/inference-engine/src/mkldnn_plugin/nodes/select.cpp index 119fc4cd6c94c3..35c606b5102af8 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/select.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/select.cpp @@ -5,11 +5,8 @@ #include "list.hpp" #include "base.hpp" -#include #include #include -#include -#include #include "ie_parallel.hpp" namespace InferenceEngine { @@ -17,132 +14,212 @@ namespace Extensions { namespace Cpu { class SelectImpl: public ExtLayerBase { - enum {condition, then_, else_, numOfInputs}; + enum { CONDITION, THEN, ELSE, numOfInputs }; + enum { N, C, D, H, W, numOfDims }; + + std::string broadcast; + std::vector resDims; + std::vector resOffset; + std::vector condOffset; + std::vector thenOffset; + std::vector elseOffset; public: explicit SelectImpl(const CNNLayer* layer) { try { - if (numOfInputs != layer->insData.size() || 1 != layer->outData.size()) { - THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!"; - } + if (layer->insData.size() != numOfInputs || layer->outData.size() != 1) + THROW_IE_EXCEPTION << "Select layer with name '" << layer->name << "' has incorrect number of input/output edges!"; + + broadcast = layer->GetParamAsString("auto_broadcast", "numpy"); + + if (layer->insData[THEN].lock()->getTensorDesc().getPrecision() != layer->insData[ELSE].lock()->getTensorDesc().getPrecision()) + THROW_IE_EXCEPTION << "Select layer with name '" << layer->name << "' has different precisions on 'Then' and 'Else' inputs"; + + const auto& conditionPrecision = layer->insData[CONDITION].lock()->getTensorDesc().getPrecision(); + if (conditionPrecision != Precision::BOOL && conditionPrecision != Precision::I32 && conditionPrecision != Precision::U8) + THROW_IE_EXCEPTION << "Select layer with name '" << layer->name << "' has unsupported precision: " << conditionPrecision + << " on 'Condition' input"; + + const auto& inputPrecisionSize = layer->insData[THEN].lock()->getTensorDesc().getPrecision().size(); + if (inputPrecisionSize != 1 && inputPrecisionSize != 2 && inputPrecisionSize != 4 && inputPrecisionSize != 8) + THROW_IE_EXCEPTION << "Select layer with name '" << layer->name << "' has unsupported precision: " << + layer->insData[THEN].lock()->getTensorDesc().getPrecision() << " on 'Then' and 'Else' inputs"; + + const auto &conditionShapes = layer->insData[CONDITION].lock()->getTensorDesc().getDims(); + const auto &thenShapes = layer->insData[THEN].lock()->getTensorDesc().getDims(); + const auto &elseShapes = layer->insData[ELSE].lock()->getTensorDesc().getDims(); + const auto &outputShapes = layer->outData[0]->getTensorDesc().getDims(); + + if (broadcast != "none" && broadcast != "numpy") + THROW_IE_EXCEPTION << "Select layer with name '" << layer->name << "' has unsupported broadcast type: " << broadcast; - auto conditionPrecision = layer->insData[condition].lock()->getTensorDesc().getPrecision(); + if (broadcast == "none" && ((conditionShapes != outputShapes) || (thenShapes != outputShapes) || (elseShapes != outputShapes))) + THROW_IE_EXCEPTION << "Select layer with name '" << layer->name << "' and auto_broadcast='none' has input shapes mismatch"; - if (Precision::I32 != conditionPrecision - && Precision::FP32 != conditionPrecision - && Precision::U8 != conditionPrecision) { - THROW_IE_EXCEPTION << layer->name << " Incorrect condition tensor precision: " << conditionPrecision - << ". Should be I32, U8 or FP32"; + if (broadcast == "numpy") { + if (outputShapes.size() < conditionShapes.size() || outputShapes.size() < thenShapes.size() || outputShapes.size() < elseShapes.size()) + THROW_IE_EXCEPTION << "Select layer with name '" << layer->name << "' and auto_broadcast='numpy' has incompatible input and output shapes"; + + for (int condIt = conditionShapes.size() - 1, outIt = outputShapes.size() - 1; condIt >= 0; condIt--, outIt--) + if (conditionShapes[condIt] != outputShapes[outIt] && conditionShapes[condIt] != 1) + THROW_IE_EXCEPTION << "Select layer with name '" << layer->name + << "' and auto_broadcast='numpy' has incompatible 'Condition' input and output shapes"; + + for (int thenIt = thenShapes.size() - 1, outIt = outputShapes.size() - 1; thenIt >= 0; thenIt--, outIt--) + if (thenShapes[thenIt] != outputShapes[outIt] && thenShapes[thenIt] != 1) + THROW_IE_EXCEPTION << "Select layer with name '" << layer->name + << "' and auto_broadcast='numpy' has incompatible 'Then' input and output shapes"; + + + for (int elseIt = elseShapes.size() - 1, outIt = outputShapes.size() - 1; elseIt >= 0; elseIt--, outIt--) + if (elseShapes[elseIt] != outputShapes[outIt] && elseShapes[elseIt] != 1) + THROW_IE_EXCEPTION << "Select layer with name '" << layer->name + << "' and auto_broadcast='numpy' has incompatible 'Else' input and output shapes"; } - addConfig(layer, {{ConfLayout::PLN, false}, - {ConfLayout::PLN, false}, - {ConfLayout::PLN, false}}, - {{ConfLayout::PLN, false}}); - } catch (InferenceEngine::details::InferenceEngineException &ex) { - errorMsg = ex.what(); - } - } + resDims.resize(numOfDims, 1); + std::copy(std::begin(outputShapes), std::end(outputShapes), std::begin(resDims) + (numOfDims - outputShapes.size())); + if (broadcast == "numpy") { + calcOutOffset(resOffset, resDims); - template - void execute_impl(std::vector& inputs, Blob::Ptr& output) noexcept { - auto *conditionData = inputs[condition]->cbuffer().as(); - auto *thenData = inputs[then_]->cbuffer().as(); - auto *elseData = inputs[else_]->cbuffer().as(); - - auto *dstData = output->cbuffer().as(); - enum {N, C, H, W, Dims}; - int dim[Dims] = {1, 1, 1, 1}; - int cdim[Dims] = {1, 1, 1, 1}; - - SizeVector dims = inputs[then_]->getTensorDesc().getDims(); - std::copy(std::begin(dims), std::end(dims), std::begin(dim) + (Dims - dims.size())); - - SizeVector cDims = inputs[condition]->getTensorDesc().getDims(); - std::copy(std::begin(cDims), std::end(cDims), std::begin(cdim) + (Dims - cDims.size())); - - parallel_for3d(dim[N], dim[H], dim[W], [&](int b, int h, int w) { - for (int c = 0; c < dim[C]; c++) { - dstData[b*dim[C]*dim[H]*dim[W] + c*dim[H]*dim[W] + h*dim[W] + w] - = conditionData[(b % cdim[N])*cdim[C]*cdim[H]*cdim[W] + - (c % cdim[C])*cdim[H]*cdim[W] + - (h % cdim[H])*cdim[W] + - (w % cdim[W])] - ? thenData[b*dim[C]*dim[H]*dim[W] + c*dim[H]*dim[W] + h*dim[W] + w] - : elseData[b*dim[C]*dim[H]*dim[W] + c*dim[H]*dim[W] + h*dim[W] + w]; + std::vector condDims(numOfDims, 1); + std::copy(std::begin(conditionShapes), std::end(conditionShapes), std::begin(condDims) + (numOfDims - conditionShapes.size())); + calcInOffset(condOffset, condDims, resDims); + + std::vector thenDims(numOfDims, 1); + std::copy(std::begin(thenShapes), std::end(thenShapes), std::begin(thenDims) + (numOfDims - thenShapes.size())); + calcInOffset(thenOffset, thenDims, resDims); + + std::vector elseDims(numOfDims, 1); + std::copy(std::begin(elseShapes), std::end(elseShapes), std::begin(elseDims) + (numOfDims - elseShapes.size())); + calcInOffset(elseOffset, elseDims, resDims); } - }); - } - StatusCode execute(std::vector& inputs, std::vector& outputs, ResponseDesc *resp) noexcept override { - auto &outputData = outputs[0]; + LayerConfig config; + for (size_t i = 0; i < numOfInputs; i++) { + DataConfig inConfig; + inConfig.inPlace = -1; + inConfig.constant = false; - auto cond_precision = inputs[condition]->getTensorDesc().getPrecision(); - auto data_precision = inputs[then_]->getTensorDesc().getPrecision(); + Precision inPrecision = layer->insData[i].lock()->getTensorDesc().getPrecision(); + const SizeVector& inDims = layer->insData[i].lock()->getTensorDesc().getDims(); + inConfig.desc = TensorDesc(inPrecision, inDims, InferenceEngine::TensorDesc::getLayoutByDims(inDims)); - auto compare = getPrecisionMask(cond_precision, data_precision); - switch (compare) { - /* 64 bit data type */ - case getPrecisionMask(Precision::I32, Precision::I64): - execute_impl(inputs, outputData); - break; - case getPrecisionMask(Precision::U8, Precision::I64): - execute_impl(inputs, outputData); - break; - case getPrecisionMask(Precision::I32, Precision::U64): - execute_impl(inputs, outputData); - break; - case getPrecisionMask(Precision::U8, Precision::U64): - execute_impl(inputs, outputData); - break; + config.inConfs.push_back(inConfig); + } - /* 32 bit data type */ - case getPrecisionMask(Precision::I32, Precision::FP32): - case getPrecisionMask(Precision::I32, Precision::I32): - execute_impl(inputs, outputData); - break; - case getPrecisionMask(Precision::U8, Precision::FP32): - case getPrecisionMask(Precision::U8, Precision::I32): - execute_impl(inputs, outputData); - break; + DataConfig outConfig; + outConfig.inPlace = -1; + outConfig.constant = false; + Precision outPrecision = layer->insData[1].lock()->getTensorDesc().getPrecision(); + const SizeVector& outDims = layer->outData[0]->getTensorDesc().getDims(); + outConfig.desc = TensorDesc(outPrecision, outDims, InferenceEngine::TensorDesc::getLayoutByDims(outDims)); + config.outConfs.push_back(outConfig); - /* 16 bit data type */ - case getPrecisionMask(Precision::I32, Precision::FP16): - case getPrecisionMask(Precision::I32, Precision::Q78): - case getPrecisionMask(Precision::I32, Precision::I16): - case getPrecisionMask(Precision::I32, Precision::U16): - execute_impl(inputs, outputData); - break; - case getPrecisionMask(Precision::U8, Precision::FP16): - case getPrecisionMask(Precision::U8, Precision::Q78): - case getPrecisionMask(Precision::U8, Precision::I16): - case getPrecisionMask(Precision::U8, Precision::U16): - execute_impl(inputs, outputData); - break; + config.dynBatchSupport = false; + confs.push_back(config); + } catch (InferenceEngine::details::InferenceEngineException &ex) { + errorMsg = ex.what(); + } + } - /* 8 bit data type */ - case getPrecisionMask(Precision::I32, Precision::I8): - case getPrecisionMask(Precision::I32, Precision::U8): - execute_impl(inputs, outputData); + StatusCode execute(std::vector& inputs, std::vector& outputs, ResponseDesc *resp) noexcept override { + auto &outputData = outputs[0]; + const size_t condPrecSize = inputs[CONDITION]->getTensorDesc().getPrecision().size(); + const size_t inputsPrecSize = inputs[THEN]->getTensorDesc().getPrecision().size(); + + switch (condPrecSize) { + case 1: { + switch (inputsPrecSize) { + case 1: { execute_impl(inputs, outputData); break; } + case 2: { execute_impl(inputs, outputData); break; } + case 4: { execute_impl(inputs, outputData); break; } + case 8: { execute_impl(inputs, outputData); break; } + default: { + if (resp) { + std::string errorMsg = "Select layer doesn't support 'Then' and 'Else' inputs' precision: " + + std::string(inputs[THEN]->getTensorDesc().getPrecision().name()); + errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); + } + return GENERAL_ERROR; + } + } break; - case getPrecisionMask(Precision::U8, Precision::I8): - case getPrecisionMask(Precision::U8, Precision::U8): - execute_impl(inputs, outputData); + } + case 4: { + switch (inputsPrecSize) { + case 1: { execute_impl(inputs, outputData); break; } + case 2: { execute_impl(inputs, outputData); break; } + case 4: { execute_impl(inputs, outputData); break; } + case 8: { execute_impl(inputs, outputData); break; } + default: { + if (resp) { + std::string errorMsg = "Select layer doesn't support 'Then' and 'Else' inputs' precision: " + + std::string(inputs[THEN]->getTensorDesc().getPrecision().name()); + errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); + } + return GENERAL_ERROR; + } + } break; - - default: + } + default: { if (resp) { - std::string errorMsg = "Incorrect Reduce layer type"; - errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); + std::string errorMsg = "Select layer doesn't support 'Condition' inputs' precision: " + + std::string(inputs[CONDITION]->getTensorDesc().getPrecision().name()); + errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); } return GENERAL_ERROR; + } } - return OK; } -}; +private: + void calcOutOffset(std::vector& offset, const std::vector& dims) { + offset.resize(numOfDims); + int k = 1; + for (int i = dims.size() - 1; i >= 0; i--) { + offset[i] = k; + k *= dims[i]; + } + } + + void calcInOffset(std::vector& offset, const std::vector& inDims, const std::vector& outDims) { + offset.resize(numOfDims); + int k = 1; + for (int i = inDims.size() - 1; i >= 0; i--) { + offset[i] = (inDims[i] == outDims[i]) ? k : 0; + k *= inDims[i]; + } + } + + template + void execute_impl(std::vector& inputs, Blob::Ptr& output) noexcept { + auto *conditionData = inputs[CONDITION]->cbuffer().as() + inputs[CONDITION]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + auto *thenData = inputs[THEN]->cbuffer().as() + inputs[THEN]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + auto *elseData = inputs[ELSE]->cbuffer().as() + inputs[ELSE]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + auto *dstData = output->buffer().as() + output->getTensorDesc().getBlockingDesc().getOffsetPadding(); + + if (broadcast == "none") { + size_t dstDataSize = std::accumulate(begin(resDims), end(resDims), 1, std::multiplies()); + parallel_for(dstDataSize, [&](size_t i) { + dstData[i] = conditionData[i] ? thenData[i] : elseData[i]; + }); + } else { + parallel_for4d(resDims[N], resDims[C], resDims[D], resDims[H], [&](int b, int c, int d, int h) { + for (int w = 0; w < resDims[W]; w++) { + size_t indexOut = b * resOffset[N] + c * resOffset[C] + d * resOffset[D] + h * resOffset[H] + w * resOffset[W]; + size_t indexCond = b * condOffset[N] + c * condOffset[C] + d * condOffset[D] + h * condOffset[H] + w * condOffset[W]; + size_t indexThen = b * thenOffset[N] + c * thenOffset[C] + d * thenOffset[D] + h * thenOffset[H] + w * thenOffset[W]; + size_t indexElse = b * elseOffset[N] + c * elseOffset[C] + d * elseOffset[D] + h * elseOffset[H] + w * elseOffset[W]; + dstData[indexOut] = conditionData[indexCond] ? thenData[indexThen] : elseData[indexElse]; + } + }); + } + } +}; REG_FACTORY_FOR(ImplFactory, Select); } // namespace Cpu diff --git a/inference-engine/src/transformations/include/transformations/convert_reduce_to_pooling.hpp b/inference-engine/src/transformations/include/transformations/convert_reduce_to_pooling.hpp index 4b6a0f96ffe6ee..f3eef77a696ef8 100644 --- a/inference-engine/src/transformations/include/transformations/convert_reduce_to_pooling.hpp +++ b/inference-engine/src/transformations/include/transformations/convert_reduce_to_pooling.hpp @@ -54,7 +54,7 @@ void ngraph::pass::ConvertReduceToPooling::convert_reduce_to_pooling() { return false; } - auto input = reduce->input(0).get_source_output().get_node_shared_ptr(); + auto input = reduce->input_value(0); auto axes_node = reduce->input(1).get_source_output().get_node_shared_ptr(); if (!std::dynamic_pointer_cast(axes_node)) { @@ -74,7 +74,7 @@ void ngraph::pass::ConvertReduceToPooling::convert_reduce_to_pooling() { // If axes are empty we just remove Reduction operation if (axes_vector.empty()) { - replace_node(reduce, input); + replace_node(reduce, {input}); return true; } @@ -166,9 +166,9 @@ void ngraph::pass::ConvertReduceToPooling::convert_reduce_to_pooling() { * Note: some of reshape nodes can be optimized if they do nothing. */ - if (!shape_begin.empty() && shape_begin != input->output(0).get_shape()) { + if (!shape_begin.empty() && shape_begin != input.get_shape()) { input = std::make_shared(input, opset1::Constant::create(element::i64, Shape{shape_begin.size()}, shape_begin), true); - input->set_friendly_name(reduce->get_friendly_name() + "/reshape_begin"); + input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name() + "/reshape_begin"); } if (std::is_same()) { @@ -180,7 +180,7 @@ void ngraph::pass::ConvertReduceToPooling::convert_reduce_to_pooling() { true, op::RoundingType::FLOOR); - input->set_friendly_name(reduce->get_friendly_name() + "/pool"); + input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name() + "/pool"); } else if (std::is_same()) { input = std::make_shared(input, strides, @@ -189,7 +189,7 @@ void ngraph::pass::ConvertReduceToPooling::convert_reduce_to_pooling() { kernel, op::RoundingType::FLOOR); - input->set_friendly_name(reduce->get_friendly_name() + "/pool"); + input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name() + "/pool"); } else if (std::is_same()) { input = std::make_shared(input, strides, @@ -199,21 +199,21 @@ void ngraph::pass::ConvertReduceToPooling::convert_reduce_to_pooling() { true, op::RoundingType::FLOOR); - input->set_friendly_name(reduce->get_friendly_name() + "/pool"); + input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name() + "/pool"); input = std::make_shared(input, opset1::Constant::create(reduce->input(0).get_element_type(), Shape{1}, {reduction_dims_count})); - input->set_friendly_name(reduce->get_friendly_name() + "/mul"); + input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name() + "/mul"); } else { return false; } - if (!shape_end.empty() && shape_end != input->output(0).get_shape()) { + if (!shape_end.empty() && shape_end != input.get_shape()) { input = std::make_shared(input, opset1::Constant::create(element::i64, Shape{shape_end.size()}, shape_end), true); } - input->set_friendly_name(reduce->get_friendly_name()); + input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name()); - replace_node(reduce, input); + replace_node(reduce, {input}); return true; }; diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp index 6d2c5a71b168dc..a4ac2c5e39baeb 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp +++ b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp @@ -22,6 +22,7 @@ namespace vpu { namespace MyriadPlugin { ExecutableNetwork::ExecutableNetwork( + std::shared_ptr mvnc, std::vector& devicePool, const MyriadConfig& config) : _config(config) { @@ -32,7 +33,7 @@ ExecutableNetwork::ExecutableNetwork( _config.logLevel(), defaultOutput(_config.pluginLogFilePath())); - _executor = std::make_shared(_config.forceReset(), _config.logLevel(), _log); + _executor = std::make_shared(_config.forceReset(), std::move(mvnc), _config.logLevel(), _log); _device = _executor->openDevice(devicePool, _config); const auto& compileConfig = config.compileConfig(); @@ -49,9 +50,11 @@ ExecutableNetwork::ExecutableNetwork( } ExecutableNetwork::ExecutableNetwork( - ICNNNetwork& network, std::vector& devicePool, + ICNNNetwork& network, + std::shared_ptr mvnc, + std::vector& devicePool, const MyriadConfig& config) : - ExecutableNetwork(devicePool, config) { + ExecutableNetwork(std::move(mvnc), devicePool, config) { VPU_PROFILE(ExecutableNetwork); const auto compilerLog = std::make_shared( @@ -139,18 +142,20 @@ void ExecutableNetwork::Import(std::istream& strm, } ExecutableNetwork::ExecutableNetwork(std::istream& strm, + std::shared_ptr mvnc, std::vector &devicePool, const MyriadConfig& config) : - ExecutableNetwork(devicePool, config) { + ExecutableNetwork(std::move(mvnc), devicePool, config) { VPU_PROFILE(ExecutableNetwork); Import(strm, devicePool, config); } ExecutableNetwork::ExecutableNetwork( const std::string& blobFilename, + std::shared_ptr mvnc, std::vector& devicePool, const MyriadConfig& config) : - ExecutableNetwork(devicePool, config) { + ExecutableNetwork(std::move(mvnc), devicePool, config) { VPU_PROFILE(ExecutableNetwork); std::ifstream blobFile{blobFilename, std::ios::binary}; Import(blobFile, devicePool, config); diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h index 2e03ea02f9e1d1..1e106c06cbc6ab 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h +++ b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h @@ -33,14 +33,17 @@ class ExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDef typedef std::shared_ptr Ptr; explicit ExecutableNetwork(InferenceEngine::ICNNNetwork &network, + std::shared_ptr mvnc, std::vector &devicePool, const MyriadConfig& config); explicit ExecutableNetwork(std::istream& strm, + std::shared_ptr mvnc, std::vector &devicePool, const MyriadConfig& config); explicit ExecutableNetwork(const std::string &blobFilename, + std::shared_ptr mvnc, std::vector &devicePool, const MyriadConfig& config); @@ -126,8 +129,9 @@ class ExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDef const size_t _maxTaskExecutorGetResultCount = 1; std::queue _taskExecutorGetResultIds; - ExecutableNetwork(std::vector &devicePool, - const MyriadConfig& config); + ExecutableNetwork(std::shared_ptr mvnc, + std::vector &devicePool, + const MyriadConfig& config); InferenceEngine::ITaskExecutor::Ptr getNextTaskExecutor() { std::string id = _taskExecutorGetResultIds.front(); diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp index f553c18edcb7a3..9b7b96cfabdd48 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp +++ b/inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp @@ -36,9 +36,10 @@ using namespace vpu; static std::mutex device_mutex; -MyriadExecutor::MyriadExecutor(bool forceReset, const LogLevel& vpuLogLevel, const Logger::Ptr& log) : _log(log) { +MyriadExecutor::MyriadExecutor(bool forceReset, std::shared_ptr mvnc, + const LogLevel& vpuLogLevel, const Logger::Ptr& log) : _log(log), _mvnc(std::move(mvnc)) { VPU_PROFILE(MyriadExecutor); - _mvnc = std::make_shared(); + VPU_THROW_UNLESS(_mvnc, "mvnc is null"); int ncResetAll = forceReset; auto status = ncGlobalSetOption(NC_RW_RESET_ALL, &ncResetAll, sizeof(ncResetAll)); if (status != NC_OK) { @@ -136,12 +137,17 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector &devicePool, return statusOpen; } + ncDeviceOpenParams_t deviceOpenParams = {}; + deviceOpenParams.watchdogHndl = _mvnc->watchdogHndl(); + deviceOpenParams.watchdogInterval = config.watchdogInterval().count(); + deviceOpenParams.customFirmwareDirectory = dirName.c_str(); + // Open new device with specific path to FW folder statusOpen = ncDeviceOpen(&device._deviceHandle, - in_deviceDesc, config.watchdogInterval().count(), dirName.c_str()); + in_deviceDesc, deviceOpenParams); if (statusOpen != NC_OK) { - ncDeviceClose(&device._deviceHandle); + ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl()); return statusOpen; } @@ -154,7 +160,7 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector &devicePool, reinterpret_cast(&device._platform), &dataLength); if (status != NC_OK || dataLength != sizeof(device._platform)) { _log->warning("Failed to get device platform"); - ncDeviceClose(&device._deviceHandle); + ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl()); return status != NC_OK ? status : NC_ERROR; // for dataLength error } @@ -163,7 +169,7 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector &devicePool, reinterpret_cast(&device._protocol), &dataLength); if (status != NC_OK || dataLength != sizeof(device._protocol)) { _log->warning("Failed to get device protocol"); - ncDeviceClose(&device._deviceHandle); + ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl()); return status != NC_OK ? status : NC_ERROR; // for dataLength error } @@ -173,7 +179,7 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector &devicePool, reinterpret_cast(&device._maxGraphNum), &dataLength); if (status != NC_OK || dataLength != sizeof(device._maxGraphNum)) { _log->warning("Failed to get maximum supported number of graphs"); - ncDeviceClose(&device._deviceHandle); + ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl()); return status != NC_OK ? status : NC_ERROR; // for dataLength error } @@ -184,7 +190,7 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector &devicePool, reinterpret_cast(&deviceName), &dataLength); if (status != NC_OK || dataLength > NC_MAX_NAME_SIZE) { _log->warning("Failed to get name of booted device"); - ncDeviceClose(&device._deviceHandle); + ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl()); return status != NC_OK ? status : NC_ERROR; // for dataLength error } else { device._name = deviceName; @@ -194,7 +200,7 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector &devicePool, if (status != NC_OK) { _log->warning("Failed to set configuration for Power Manager"); - ncDeviceClose(&device._deviceHandle); + ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl()); return status; } @@ -283,12 +289,12 @@ VPU_PACKED(bin_header { uint32_t frequency; };) -void MyriadExecutor::closeDevices(std::vector &devicePool) { +void MyriadExecutor::closeDevices(std::vector &devicePool, std::shared_ptr mvnc) { VPU_PROFILE(closeDevices); std::lock_guard lock(device_mutex); for (auto &device : devicePool) { if (device->_deviceHandle != nullptr) { - auto res = ncDeviceClose(&(device->_deviceHandle)); + auto res = ncDeviceClose(&(device->_deviceHandle), mvnc->watchdogHndl()); if (res != NC_OK) printf("ncDeviceClose failed (%d)\n", static_cast(res)); device->_deviceHandle = nullptr; diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executor.h b/inference-engine/src/vpu/myriad_plugin/myriad_executor.h index 9aa0d89888858a..d231c45b4439c7 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_executor.h +++ b/inference-engine/src/vpu/myriad_plugin/myriad_executor.h @@ -78,7 +78,8 @@ class MyriadExecutor { unsigned int _numStages = 0; public: - MyriadExecutor(bool forceReset, const LogLevel& vpuLogLevel, const Logger::Ptr& log); + MyriadExecutor(bool forceReset, std::shared_ptr mvnc, + const LogLevel& vpuLogLevel, const Logger::Ptr& log); ~MyriadExecutor() = default; /** @@ -87,7 +88,7 @@ class MyriadExecutor { */ DevicePtr openDevice(std::vector &devicePool, const MyriadConfig& config); - static void closeDevices(std::vector &devicePool); + static void closeDevices(std::vector &devicePool, std::shared_ptr mvnc); void allocateGraph(DevicePtr &device, GraphDesc &graphDesc, diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.cpp index 40c83c4125cab9..5ba91ef4f86d65 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.cpp +++ b/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.cpp @@ -11,20 +11,24 @@ using namespace vpu::MyriadPlugin; // Implementation of methods of class Mvnc //------------------------------------------------------------------------------ -Mvnc::Mvnc() : - _devicesPtr(new struct ncDeviceDescr_t[NC_MAX_DEVICES]) { +Mvnc::Mvnc() { + WatchdogHndl_t* watchdogHndl = nullptr; + if (watchdog_create(&watchdogHndl) != WD_ERRNO) { + THROW_IE_EXCEPTION << "Cannot create watchdog."; + } + + m_watcdogPtr = WatchdogUniquePtr(watchdogHndl, [](WatchdogHndl_t* watchdogHndl) { + watchdog_destroy(watchdogHndl); + }); } std::vector Mvnc::AvailableDevicesDesc() const { int deviceCount = 0; - if (ncAvailableDevices(_devicesPtr.get(), NC_MAX_DEVICES, &deviceCount) != NC_OK) { + std::vector availableDevices(NC_MAX_DEVICES); + if (ncAvailableDevices(&availableDevices[0], NC_MAX_DEVICES, &deviceCount) != NC_OK) { THROW_IE_EXCEPTION << "Cannot receive available devices."; } - - std::vector availableDevices; - for (int i = 0; i < deviceCount; ++i) { - availableDevices.push_back(_devicesPtr[i]); - } + availableDevices.resize(deviceCount); return availableDevices; } @@ -34,7 +38,7 @@ std::vector Mvnc::AvailableDevicesNames() const { std::vector availableDevices; for (size_t i = 0; i < _availableDevicesDesc.size(); ++i) { - availableDevices.emplace_back(std::string(_devicesPtr[i].name)); + availableDevices.emplace_back(std::string(_availableDevicesDesc[i].name)); } return availableDevices; diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.h b/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.h index 27673ce873fef1..43fcaed69d6e89 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.h +++ b/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.h @@ -4,15 +4,19 @@ #pragma once +#include +#include + #include #include #include #include -#include namespace vpu { namespace MyriadPlugin { +using WatchdogUniquePtr = std::unique_ptr>; + //------------------------------------------------------------------------------ // class IMvnc // This is a class interface for accessing devices. @@ -24,6 +28,8 @@ class IMvnc { virtual std::vector AvailableDevicesDesc() const = 0; virtual std::vector AvailableDevicesNames() const = 0; + virtual WatchdogHndl_t* watchdogHndl() = 0; + // Destructor virtual ~IMvnc() = default; }; @@ -35,19 +41,19 @@ class IMvnc { class Mvnc : public IMvnc { public: - // Constructor Mvnc(); + ~Mvnc() override = default; // Operations std::vector AvailableDevicesDesc() const override; std::vector AvailableDevicesNames() const override; - // Destructor - ~Mvnc() override = default; + WatchdogHndl_t* watchdogHndl() override { + return m_watcdogPtr.get(); + } private: - // Data section - std::unique_ptr _devicesPtr; + WatchdogUniquePtr m_watcdogPtr; }; } // namespace MyriadPlugin diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp index a4b575d1186d2e..46ccebf9756f14 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp +++ b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp @@ -41,7 +41,7 @@ ExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl( clonedNetwork = cloneNet(network); } - return std::make_shared(*clonedNetwork, _devicePool, parsedConfigCopy); + return std::make_shared(*clonedNetwork, _mvnc, _devicePool, parsedConfigCopy); } void Engine::SetConfig(const std::map &config) { @@ -95,9 +95,7 @@ void Engine::QueryNetwork( Engine::Engine(std::shared_ptr mvnc) : _mvnc(std::move(mvnc)), _metrics(std::make_shared()) { - if (!_mvnc) { - THROW_IE_EXCEPTION << "mvnc is invalid"; - } + VPU_THROW_UNLESS(_mvnc, "mvnc is null"); _pluginName = "MYRIAD"; @@ -126,7 +124,7 @@ InferenceEngine::ExecutableNetwork Engine::ImportNetwork( const auto executableNetwork = std::make_shared( - model, _devicePool, parsedConfigCopy); + model, _mvnc, _devicePool, parsedConfigCopy); return InferenceEngine::ExecutableNetwork{IExecutableNetwork::Ptr( new ExecutableNetworkBase(executableNetwork), diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h index cbbd12f4aafa62..ae6964f14e1e22 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h +++ b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h @@ -22,7 +22,7 @@ class Engine : public ie::InferencePluginInternal { explicit Engine(std::shared_ptr mvnc); ~Engine() override { - MyriadExecutor::closeDevices(_devicePool); + MyriadExecutor::closeDevices(_devicePool, _mvnc); } void SetConfig(const std::map& config) override; diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/select.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/select.cpp new file mode 100644 index 00000000000000..6ee640f4a121de --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/select.cpp @@ -0,0 +1,86 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "single_layer_tests/select.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +const std::vector inputPrecision = { + InferenceEngine::Precision::I8, + InferenceEngine::Precision::I16, + InferenceEngine::Precision::I32, + InferenceEngine::Precision::FP32 + // CPU plug-in doesn't support I64 and U64 precisions at the moment + // InferenceEngine::Precision::I64 +}; + +const std::vector>> noneShapes = { + {{1}, {1}, {1}}, + {{8}, {8}, {8}}, + {{4, 5}, {4, 5}, {4, 5}}, + {{3, 4, 5}, {3, 4, 5}, {3, 4, 5}}, + {{2, 3, 4, 5}, {2, 3, 4, 5}, {2, 3, 4, 5}}, + {{2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}} +}; + +const auto noneCases = ::testing::Combine( + ::testing::ValuesIn(noneShapes), + ::testing::ValuesIn(inputPrecision), + ::testing::Values(ngraph::op::AutoBroadcastSpec::NONE), + ::testing::Values(CommonTestUtils::DEVICE_CPU) +); + +const std::vector>> numpyShapes = { + {{1}, {1}, {1}}, + {{1}, {16}, {1}}, + {{1}, {1}, {16}}, + {{1}, {8}, {8}}, + {{8}, {1}, {8}}, + {{8}, {8}, {8}}, + {{4, 1}, {1}, {4, 8}}, + {{3, 8}, {8}, {3, 1}}, + {{8, 1}, {8, 1}, {8, 1}}, + {{1}, {5, 8}, {5, 8}}, + {{8, 1, 1}, {8, 1, 1}, {2, 5}}, + {{8, 1}, {6, 8, 1}, {6, 1, 1}}, + {{5, 1}, {8, 1, 7}, {5, 7}}, + {{2, 8, 1}, {2, 8, 9}, {2, 1, 9}}, + {{1, 4}, {8, 1, 1, 1}, {4}}, + {{5, 4, 1}, {8, 5, 1, 1}, {4, 1}}, + {{1, 4}, {6, 1, 8, 1}, {6, 1, 8, 4}}, + {{7, 3, 1, 8}, {7, 1, 1, 8}, {3, 2, 8}}, + {{1, 3, 1}, {8, 2, 3, 1}, {3, 9}}, + {{5, 1, 8}, {2, 1, 9, 8}, {2, 5, 9, 8}}, + {{6, 1, 1, 8}, {6, 7, 1, 8}, {2, 1}}, + {{5, 1, 1, 1}, {5, 7, 8, 6}, {1, 8, 6}}, + {{8, 1, 5}, {8, 1, 1, 1, 1}, {8, 7, 5}}, + {{8, 1, 1, 9}, {4, 8, 1, 1, 1}, {1, 1, 9}}, + {{5, 1, 2, 1}, {8, 1, 9, 1, 1}, {5, 1, 2, 1}}, + {{8, 1}, {2, 1, 1, 8, 1}, {9, 1, 1}}, + {{8, 5, 5, 5, 1}, {8, 1, 1, 1, 8}, {5, 5, 5, 8}}, + {{4}, {8, 5, 6, 1, 1}, {2, 4}}, + {{9, 9, 2, 8, 1}, {9, 1, 2, 8, 1}, {9, 1, 1, 1}}, + {{5, 3, 3}, {8, 1, 1, 3, 3}, {5, 1, 3}}, + {{5, 1, 8, 1}, {5, 5, 1, 8, 1}, {1}}, + {{3}, {6, 8, 1, 1, 3}, {6, 1, 5, 3, 3}}, + {{5, 1}, {3, 1, 4, 1, 8}, {1, 4, 5, 8}}, + {{2, 1, 5}, {8, 6, 2, 3, 1}, {5}}, + {{6}, {2, 1, 9, 8, 6}, {2, 4, 9, 8, 6}}, + {{5, 7, 1, 8, 1}, {5, 7, 1, 8, 4}, {8, 1}}, + {{7, 6, 5, 8}, {4, 7, 6, 5, 8}, {6, 1, 8}} +}; + +const auto numpyCases = ::testing::Combine( + ::testing::ValuesIn(numpyShapes), + ::testing::ValuesIn(inputPrecision), + ::testing::Values(ngraph::op::AutoBroadcastSpec::NUMPY), + ::testing::Values(CommonTestUtils::DEVICE_CPU) +); + +INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsSelect_none, SelectLayerTest, noneCases, SelectLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsSelect_numpy, SelectLayerTest, numpyCases, SelectLayerTest::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/select.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/select.cpp new file mode 100644 index 00000000000000..63018e38eab730 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/select.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "single_layer_tests/select.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +const std::vector inputPrecision = { + InferenceEngine::Precision::U8, + InferenceEngine::Precision::I16, + InferenceEngine::Precision::FP32 +}; + +const std::vector>> noneShapes = { + {{1}, {1}, {1}}, + {{8}, {8}, {8}}, + {{4, 5}, {4, 5}, {4, 5}}, + {{3, 4, 5}, {3, 4, 5}, {3, 4, 5}}, + {{2, 3, 4, 5}, {2, 3, 4, 5}, {2, 3, 4, 5}} +}; + +const auto noneCases = ::testing::Combine( + ::testing::ValuesIn(noneShapes), + ::testing::ValuesIn(inputPrecision), + ::testing::Values(ngraph::op::AutoBroadcastSpec::NONE), + ::testing::Values(CommonTestUtils::DEVICE_GPU) +); + +INSTANTIATE_TEST_CASE_P(smoke_CLDNN_TestsSelect_none, SelectLayerTest, noneCases, SelectLayerTest::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/select.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/select.hpp new file mode 100644 index 00000000000000..6afa81f7528126 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/select.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "ngraph_functions/select.hpp" + +namespace LayerTestsDefinitions { + +typedef std::tuple< + std::vector>, // mask, then, else shapes + InferenceEngine::Precision, // then, else precision + ngraph::op::AutoBroadcastSpec, // broadcast + std::string> select_test_params; // Device name + +class SelectLayerTest : public LayerTestsUtils::LayerTestsCommonClass { +public: + NGraphFunctions::Select layer; + std::vector> inputShapes; + ngraph::op::AutoBroadcastSpec broadcast; + + static std::string getTestCaseName(const testing::TestParamInfo &obj); + +protected: + void SetUp() override; +}; + +} // namespace LayerTestsDefinitions \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/select.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/select.cpp new file mode 100644 index 00000000000000..c74e382353fd53 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/select.cpp @@ -0,0 +1,99 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include + +#include +#include + +#include "functional_test_utils/blob_utils.hpp" +#include "functional_test_utils/precision_utils.hpp" +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/skip_tests_config.hpp" +#include "functional_test_utils/plugin_cache.hpp" + +#include "single_layer_tests/select.hpp" + +namespace LayerTestsDefinitions { + + std::string SelectLayerTest::getTestCaseName(const testing::TestParamInfo &obj) { + std::vector> dataShapes(3); + InferenceEngine::Precision dataType; + ngraph::op::AutoBroadcastSpec broadcast; + std::string targetDevice; + std::tie(dataShapes, dataType, broadcast, targetDevice) = obj.param; + std::ostringstream result; + result << "COND=BOOL_" << CommonTestUtils::vec2str(dataShapes[0]); + result << "_THEN=" << dataType.name() << "_" << CommonTestUtils::vec2str(dataShapes[1]); + result << "_ELSE=" << dataType.name() << "_" << CommonTestUtils::vec2str(dataShapes[2]); + result << "_" << broadcast.m_type; + result << "_targetDevice=" << targetDevice; + return result.str(); + } + + void SelectLayerTest::SetUp() { + inputShapes.resize(NGraphFunctions::Select::numOfInputs); + std::tie(inputShapes, inputPrecision, broadcast, targetDevice) = this->GetParam(); + layer = NGraphFunctions::Select(FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision), inputShapes, broadcast); + } + + TEST_P(SelectLayerTest, CompareWithRefImpl) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + InferenceEngine::CNNNetwork cnnNet(layer.fnPtr); + + auto outputName = cnnNet.getOutputsInfo().begin()->first; + + auto ie = PluginCache::get().ie(); + auto execNet = ie->LoadNetwork(cnnNet, targetDevice); + auto req = execNet.CreateInferRequest(); + + std::vector inBlobs; + + std::vector range = {2, 30, 30}; + std::vector startFrom = {0, 0, 30}; + int i = 0; + for (const auto &inputItem : cnnNet.getInputsInfo()) { + auto currentBlob = FuncTestUtils::createAndFillBlob(inputItem.second->getTensorDesc(), range[i], startFrom[i]); + req.SetBlob(inputItem.first, currentBlob); + inBlobs.push_back(currentBlob); + i++; + } + + std::vector castedBlobs = inBlobs; + std::vector inRawData; + for (size_t i = 0; i < castedBlobs.size(); i++) { + castedBlobs[i] = FuncTestUtils::copyBlobWithCast(inBlobs[i]); + inRawData.push_back(castedBlobs[i]->cbuffer().as()); + } + + req.Infer(); + + auto outBlob = req.GetBlob(outputName); + auto resShape = outBlob->getTensorDesc().getDims(); + const auto& outPrecision = outBlob->getTensorDesc().getPrecision(); + + size_t outElementsCount = std::accumulate(begin(resShape), end(resShape), 1, std::multiplies()); + std::vector refOutData = layer.RefImpl(inRawData, inputShapes, resShape); + + if (outPrecision != InferenceEngine::Precision::I32 && outPrecision != InferenceEngine::Precision::FP32) + THROW_IE_EXCEPTION << "Test for select layer doesn't support output precision different from I32 or FP32"; + + if (outPrecision == InferenceEngine::Precision::I32) { + std::vector convRefOutData(outElementsCount); + for (size_t i = 0; i < outElementsCount; i++) + convRefOutData[i] = static_cast(refOutData[i]); + FuncTestUtils::compareRawBuffers(outBlob->cbuffer().as(), convRefOutData.data(), outElementsCount, outElementsCount); + } else { + auto thr = FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32); + FuncTestUtils::compareRawBuffers(outBlob->cbuffer().as(), refOutData.data(), outElementsCount, outElementsCount, thr); + } + + layer.fnPtr.reset(); + } + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/blob_utils.hpp b/inference-engine/tests/ie_test_utils/functional_test_utils/blob_utils.hpp index 406c5b1d76b5fc..dc560327fff874 100644 --- a/inference-engine/tests/ie_test_utils/functional_test_utils/blob_utils.hpp +++ b/inference-engine/tests/ie_test_utils/functional_test_utils/blob_utils.hpp @@ -224,6 +224,12 @@ InferenceEngine::Blob::Ptr inline copyBlobWithCast(const InferenceEngine::Blob:: case InferenceEngine::Precision::U8: newBlob = FuncTestUtils::convertBlobPrecision(blob); break; + case InferenceEngine::Precision::I32: + newBlob = FuncTestUtils::convertBlobPrecision(blob); + break; + case InferenceEngine::Precision::BOOL: + newBlob = FuncTestUtils::convertBlobPrecision(blob); + break; default: THROW_IE_EXCEPTION << "Conversion from blob with precision " << blob->getTensorDesc().getPrecision().name() << " not implemented yet!"; @@ -247,6 +253,8 @@ InferenceEngine::Blob::Ptr inline createAndFillBlob(const InferenceEngine::Tenso CASE(InferenceEngine::Precision::I16); CASE(InferenceEngine::Precision::I64); CASE(InferenceEngine::Precision::BIN); + CASE(InferenceEngine::Precision::I32); + CASE(InferenceEngine::Precision::BOOL); #undef CASE default: THROW_IE_EXCEPTION << "Wrong precision specified: " << td.getPrecision().name(); diff --git a/inference-engine/tests/ngraph_functions/include/ngraph_functions/select.hpp b/inference-engine/tests/ngraph_functions/include/ngraph_functions/select.hpp new file mode 100644 index 00000000000000..36c1baca682b4d --- /dev/null +++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/select.hpp @@ -0,0 +1,51 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace NGraphFunctions { + +class Select { +public: + enum { CONDITION, THEN, ELSE, numOfInputs }; + std::shared_ptr fnPtr; + + Select() = default; + + explicit Select(ngraph::element::Type inType, const std::vector> &inputShapes, ngraph::op::AutoBroadcastSpec broadcast); + + template + std::vector RefImpl(const std::vector &inData, const std::vector> &inDataShapes, + const std::vector &outputShapes) { + size_t outElementsCount = std::accumulate(begin(outputShapes), end(outputShapes), 1, std::multiplies()); + + std::vector shapes; + for (auto shape : inDataShapes) + shapes.push_back(ngraph::Shape(shape)); + + size_t maskElementsCount = std::accumulate(begin(inDataShapes[CONDITION]), end(inDataShapes[CONDITION]), 1, std::multiplies()); + std::vector mask(maskElementsCount); + for (size_t i = 0; i < maskElementsCount; i++) + mask[i] = static_cast(inData[CONDITION][i]); + + std::vector dstData(outElementsCount); + ngraph::runtime::reference::select(mask.data(), inData[THEN], inData[ELSE], dstData.data(), shapes[CONDITION], shapes[THEN], shapes[ELSE], + broadcastType); + + return dstData; + } + +private: + ngraph::op::AutoBroadcastSpec broadcastType; +}; + +} // namespace NGraphFunctions diff --git a/inference-engine/tests/ngraph_functions/src/select.cpp b/inference-engine/tests/ngraph_functions/src/select.cpp new file mode 100644 index 00000000000000..fc2127f8722c85 --- /dev/null +++ b/inference-engine/tests/ngraph_functions/src/select.cpp @@ -0,0 +1,32 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include "ngraph_functions/select.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" + +namespace NGraphFunctions { + + Select::Select(ngraph::element::Type inType, const std::vector> &inputShapes, ngraph::op::AutoBroadcastSpec broadcast) { + ngraph::ParameterVector paramNodesVector; + + auto paramNode = std::make_shared(ngraph::element::Type_t::boolean, ngraph::Shape(inputShapes[CONDITION])); + paramNodesVector.push_back(paramNode); + for (size_t i = 1; i < inputShapes.size(); i++) { + paramNode = std::make_shared(inType, ngraph::Shape(inputShapes[i])); + paramNodesVector.push_back(paramNode); + } + auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(paramNodesVector)); + broadcastType = broadcast; + + auto SelectNode = std::make_shared(paramOuts[CONDITION], paramOuts[THEN], paramOuts[ELSE], broadcastType); + + auto result = std::make_shared(SelectNode); + + fnPtr = std::make_shared(ngraph::ResultVector{result}, paramNodesVector, "select"); + } + +} // namespace NGraphFunctions \ No newline at end of file diff --git a/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/watchdog_tests.cpp b/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/watchdog_tests.cpp index 3d331635055cde..11ec8f44dc6861 100644 --- a/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/watchdog_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/watchdog_tests.cpp @@ -7,136 +7,93 @@ #include #include #include -#include #include using namespace ::testing; using namespace InferenceEngine; +using ms = std::chrono::milliseconds; + class MockWatchdogDevice : public Watchdog::IDevice { public: using time_point = Watchdog::IDevice::time_point; - MOCK_QUALIFIED_METHOD1(setInterval, noexcept, void(const std::chrono::milliseconds)); MOCK_QUALIFIED_METHOD1(keepAlive, noexcept, void(const time_point &)); MOCK_QUALIFIED_METHOD1(dueIn, const noexcept, std::chrono::milliseconds (const time_point ¤t_time)); MOCK_QUALIFIED_METHOD0(isTimeout, const noexcept, bool ()); MOCK_QUALIFIED_METHOD0(getHandle, const noexcept, void* ()); }; -struct wd_context_opaque_private { - void * magic = reinterpret_cast (0xdeadbeaf); - Watchdog::IDevice * actual = nullptr; - bool destroyed = false; -}; - - class MVNCWatchdogTests: public TestsCommon { protected: - devicePrivate_t d; - wd_context ctx, ctx1; + WatchdogHndl_t* m_watchdogHndl = nullptr; + WdDeviceHndl_t deviceHndl, deviceHndl1; StrictMock mockWatchee, mockWatchee1; - wd_context_opaque_private opaque, opaque1; void SetUp() override { - opaque.actual = &mockWatchee; - ctx.opaque = &opaque; - - opaque1.actual = &mockWatchee1; - ctx1.opaque = &opaque1; + deviceHndl.m_device = &mockWatchee; + deviceHndl1.m_device = &mockWatchee1; - pthread_mutex_init(&d.dev_stream_m, nullptr); + ASSERT_EQ(WD_ERRNO, watchdog_create(&m_watchdogHndl)); } + void TearDown() override { - pthread_mutex_destroy(&d.dev_stream_m); + watchdog_destroy(m_watchdogHndl); + } + + void setExpectations(StrictMock& mock){ + EXPECT_CALL(mock, keepAlive(_)).Times(AtLeast(0)); + EXPECT_CALL(mock, dueIn(_)).WillRepeatedly(Return(ms(20000))); + EXPECT_CALL(mock, isTimeout()).WillRepeatedly(Return(false)); + EXPECT_CALL(mock, getHandle()).WillRepeatedly(Return(&mock)); } }; -using ms = std::chrono::milliseconds; TEST_F(MVNCWatchdogTests, canRegisterExternalWatchee) { + setExpectations(mockWatchee); - int handle = 1; - EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle)); - // do not expect that any ping happened before we remove the thread - // this can be changed for example registering succeed only if first ping succeed - EXPECT_CALL(mockWatchee, keepAlive(_)).Times(AtLeast(0)); - EXPECT_CALL(mockWatchee, setInterval(ms(1))).Times(1); - EXPECT_CALL(mockWatchee, isTimeout()).WillRepeatedly(Return(false)); - EXPECT_CALL(mockWatchee, dueIn(_)).WillRepeatedly(Return(ms(20000))); - - d.wd_interval = 1; - - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl)); // allowing thread spin std::this_thread::sleep_for(std::chrono::milliseconds(1000)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx)); -} -// TODO: implement logic -TEST_F(MVNCWatchdogTests, DISABLED_removeDeviceIfXLINKSessionNotIninitialized) { - - d.wd_interval = 10; - ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx)); - ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d)); - - std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl)); } -#if defined(__APPLE__) && !defined(NDEBUG) -TEST_F(MVNCWatchdogTests, DISABLED_canNotBeRegisteredTwice) { -#else TEST_F(MVNCWatchdogTests, canNotBeRegisteredTwice) { -#endif + setExpectations(mockWatchee); - d.wd_interval = 10; + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl)); + ASSERT_NE(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl)); - ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx)); - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d)); - ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d)); // allowing thread spin std::this_thread::sleep_for(std::chrono::milliseconds(1000)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx)); -} - -TEST_F(MVNCWatchdogTests, canUnRegisterNotInitialized) { - - ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx)); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl)); } -TEST_F(MVNCWatchdogTests, canUnRegisterIfInterval0) { +TEST_F(MVNCWatchdogTests, canNotUnRegisterNotInitialized) { + EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&mockWatchee)); - d.wd_interval = 0; - - ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx)); - ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx)); + ASSERT_NE(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl)); } -#if defined(__APPLE__) && !defined(NDEBUG) -TEST_F(MVNCWatchdogTests, DISABLED_failUnRegisterTwice) { -#else TEST_F(MVNCWatchdogTests, failUnRegisterTwice) { -#endif + setExpectations(mockWatchee); - d.wd_interval = 10; + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl)); - ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx)); - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d)); // allowing thread spin std::this_thread::sleep_for(std::chrono::milliseconds(1000)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx)); - ASSERT_NE(WD_ERRNO, watchdog_unregister_device(&ctx)); + + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl)); + ASSERT_NE(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl)); } TEST_F(MVNCWatchdogTests, canRemoveOneDeviceFromQueueInCaseOfTimeout) { - int handle = 1; int x = 0; int y = 0; int z = 0; - EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle)); + EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&mockWatchee)); EXPECT_CALL(mockWatchee, keepAlive(_)).Times(AtLeast(1)); - EXPECT_CALL(mockWatchee, setInterval(ms(10))).Times(1); EXPECT_CALL(mockWatchee, isTimeout()).WillRepeatedly(Invoke([&z, &y]() { // will sleep at least 100 ms and avoid second keep alive call y = 100; @@ -151,9 +108,8 @@ TEST_F(MVNCWatchdogTests, canRemoveOneDeviceFromQueueInCaseOfTimeout) { return std::chrono::milliseconds(y); })); - EXPECT_CALL(mockWatchee1, getHandle()).WillRepeatedly(Return(&handle)); + EXPECT_CALL(mockWatchee1, getHandle()).WillRepeatedly(Return(&mockWatchee1)); EXPECT_CALL(mockWatchee1, keepAlive(_)).Times(AtLeast(2)); - EXPECT_CALL(mockWatchee1, setInterval(ms(10))).Times(1); EXPECT_CALL(mockWatchee1, isTimeout()).WillRepeatedly(Invoke([&x]() { // allow every second time to wait x = x == 0 ? 100 : 0; @@ -163,201 +119,134 @@ TEST_F(MVNCWatchdogTests, canRemoveOneDeviceFromQueueInCaseOfTimeout) { return std::chrono::milliseconds(x); })); - - d.wd_interval = 10; - - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d)); - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx1, &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl1)); std::this_thread::sleep_for(ms(1000)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx1)); -} - -TEST_F(MVNCWatchdogTests, canNotStartWatchdogIfIntervalInvalid) { - - opaque.actual = &mockWatchee; - - int handle = 1; - - EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle)); - - d.wd_interval = 0; - ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d)); - - d.wd_interval = -1; - ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d)); - - // if fo some reason thread started we will get unxpected updatePongInterval calls - std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl)); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl1)); } TEST_F(MVNCWatchdogTests, canGetPingsOnRegularBasis) { - - int handle = 1; int x = 0; - EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle)); + EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&mockWatchee)); // since interval is small keepAlive can happen several times once EXPECT_CALL(mockWatchee, keepAlive(_)).Times(AtLeast(2)); - EXPECT_CALL(mockWatchee, setInterval(ms(10))).Times(1); EXPECT_CALL(mockWatchee, isTimeout()).WillRepeatedly(Return(false)); EXPECT_CALL(mockWatchee, dueIn(_)).WillRepeatedly(Invoke([&x](const MockWatchdogDevice::time_point ¤t_time){ x = x == 0 ? 100 : 0; return std::chrono::milliseconds(x); })); - - d.wd_interval = 10; - - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl)); std::this_thread::sleep_for(ms(1000)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx)); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl)); } TEST_F(MVNCWatchdogTests, canWakeUpWatchdogWhenAddAndRemoveDevice) { - - int handle = 1, handle1 = 2; - - EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle)); + EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&mockWatchee)); EXPECT_CALL(mockWatchee, keepAlive(_)).Times(1); - EXPECT_CALL(mockWatchee, setInterval(ms(10))).Times(1); EXPECT_CALL(mockWatchee, isTimeout()).WillRepeatedly(Return(false)); // without wake this will sleep for ever EXPECT_CALL(mockWatchee, dueIn(_)).WillRepeatedly(Return(ms(20000))); - EXPECT_CALL(mockWatchee1, getHandle()).WillRepeatedly(Return(&handle1)); + EXPECT_CALL(mockWatchee1, getHandle()).WillRepeatedly(Return(&mockWatchee1)); EXPECT_CALL(mockWatchee1, keepAlive(_)).Times(1); - EXPECT_CALL(mockWatchee1, setInterval(ms(10))).Times(1); EXPECT_CALL(mockWatchee1, isTimeout()).WillRepeatedly(Return(false)); EXPECT_CALL(mockWatchee1, dueIn(_)).WillRepeatedly(Return(ms(20000))); - - d.wd_interval = 10; - - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d)); - + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl)); std::this_thread::sleep_for(std::chrono::milliseconds(2000)); - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx1, &d)); - + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl1)); std::this_thread::sleep_for(std::chrono::milliseconds(2000)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx1)); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl)); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl1)); } TEST_F(MVNCWatchdogTests, stressWatchDog) { - const int num_watchdog_device = 10; - - watchdog_init_context(nullptr); - StrictMock mockWatchee[num_watchdog_device]; - int handle[num_watchdog_device]; - wd_context ctx[num_watchdog_device]; - wd_context_opaque_private opaque[num_watchdog_device]; + WdDeviceHndl_t deviceHndl[num_watchdog_device]; for (int i = 0; i != num_watchdog_device; i++) { - handle[i] = i; - - EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(handle + i)); + EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(&mockWatchee[i])); // since interval is big keepAlive happens only once EXPECT_CALL(mockWatchee[i], keepAlive(_)).Times(1); - EXPECT_CALL(mockWatchee[i], setInterval(ms(10))).Times(1); EXPECT_CALL(mockWatchee[i], isTimeout()).WillRepeatedly(Return(false)); EXPECT_CALL(mockWatchee[i], dueIn(_)).WillRepeatedly(Return(ms(20000))); - } - d.wd_interval = 10; + deviceHndl[i].m_device = &mockWatchee[i]; + } for (int k = 0; k != num_watchdog_device; k++) { - opaque[k].actual = &mockWatchee[k]; - ctx[k].opaque = &opaque[k]; - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k])); } std::this_thread::sleep_for(std::chrono::milliseconds(2000)); for (int k = 0; k != num_watchdog_device; k++) { - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k])); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k])); } std::this_thread::sleep_for(std::chrono::milliseconds(100)); } TEST_F(MVNCWatchdogTests, stressWatchDog1) { - const int num_watchdog_device = 10; const int num_watchdog_device_half = num_watchdog_device / 2; - watchdog_init_context(nullptr); - StrictMock mockWatchee[num_watchdog_device]; - int handle[num_watchdog_device]; - wd_context ctx[num_watchdog_device]; - wd_context_opaque_private opaque[num_watchdog_device]; + WdDeviceHndl_t deviceHndl[num_watchdog_device]; for (int i = 0; i != num_watchdog_device; i++) { - handle[i] = i; - - EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(handle + i)); + EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(&mockWatchee[i])); // since interval is big keepAlive happens only once EXPECT_CALL(mockWatchee[i], keepAlive(_)).Times(1); - EXPECT_CALL(mockWatchee[i], setInterval(ms(10))).Times(1); EXPECT_CALL(mockWatchee[i], isTimeout()).WillRepeatedly(Return(false)); EXPECT_CALL(mockWatchee[i], dueIn(_)).WillRepeatedly(Return(ms(20000))); - } - d.wd_interval = 10; - for (int k = 0; k != num_watchdog_device; k++) { - opaque[k].actual = &mockWatchee[k]; - ctx[k].opaque = &opaque[k]; + deviceHndl[i].m_device = &mockWatchee[i]; } for (int k = 0; k != num_watchdog_device_half; k++) { - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k])); } std::this_thread::sleep_for(std::chrono::milliseconds(2000)); for (int k = 0; k != num_watchdog_device_half; k++) { - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k + num_watchdog_device_half], &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k + num_watchdog_device_half])); std::this_thread::sleep_for(std::chrono::milliseconds(20)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k])); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k])); std::this_thread::sleep_for(std::chrono::milliseconds(20)); } std::this_thread::sleep_for(std::chrono::milliseconds(2000)); for (int k = 0; k != num_watchdog_device_half; k++) { - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k + num_watchdog_device_half])); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k + num_watchdog_device_half])); } std::this_thread::sleep_for(std::chrono::milliseconds(100)); } TEST_F(MVNCWatchdogTests, stressWatchDog2) { - const int num_watchdog_device = 30; const int num_watchdog_device_half1 = num_watchdog_device / 3; const int num_watchdog_device_half2 = 2 * num_watchdog_device / 3; - watchdog_init_context(nullptr); - StrictMock mockWatchee[num_watchdog_device]; - int handle[num_watchdog_device]; - wd_context ctx[num_watchdog_device]; - wd_context_opaque_private opaque[num_watchdog_device]; + WdDeviceHndl_t deviceHndl[num_watchdog_device]; for (int i = 0; i != num_watchdog_device; i++) { - handle[i] = i; - - EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(handle + i)); + EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(&mockWatchee[i])); // since interval is big keepAlive happens only once if (i >= num_watchdog_device_half2) { @@ -366,42 +255,38 @@ TEST_F(MVNCWatchdogTests, stressWatchDog2) { EXPECT_CALL(mockWatchee[i], keepAlive(_)).Times(1); } - EXPECT_CALL(mockWatchee[i], setInterval(ms(10))).Times(1); EXPECT_CALL(mockWatchee[i], isTimeout()).WillRepeatedly(Return(false)); EXPECT_CALL(mockWatchee[i], dueIn(_)).WillRepeatedly(Return(ms(20000))); - } - d.wd_interval = 10; - for (int k = 0; k != num_watchdog_device; k++) { - opaque[k].actual = &mockWatchee[k]; - ctx[k].opaque = &opaque[k]; + deviceHndl[i].m_device = &mockWatchee[i]; } for (int k = 0; k != num_watchdog_device_half1; k++) { - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k])); } std::this_thread::sleep_for(std::chrono::milliseconds(2000)); for (int k = 0; k != num_watchdog_device_half1; k++) { - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k])); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k])); } std::this_thread::sleep_for(std::chrono::milliseconds(2000)); for (int k = num_watchdog_device_half1; k != num_watchdog_device_half2; k++) { - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k])); //this might lead to UB, for example thread might restart but after that device get removed, so giving more time std::this_thread::sleep_for(std::chrono::milliseconds(1000)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k])); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k])); } for (int k = num_watchdog_device_half2; k != num_watchdog_device; k++) { - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k])); //this might lead to UB, for example thread might restart but after that device get removed, so giving more time //so our expectations for number of calls are not set for last third - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k])); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k])); } std::this_thread::sleep_for(std::chrono::milliseconds(3000)); } + diff --git a/inference-engine/tests_deprecated/unit/engines/vpu/myriad_tests/helpers/myriad_mvnc_stub.h b/inference-engine/tests_deprecated/unit/engines/vpu/myriad_tests/helpers/myriad_mvnc_stub.h index 8109a1bc240bba..2e146fb307f652 100644 --- a/inference-engine/tests_deprecated/unit/engines/vpu/myriad_tests/helpers/myriad_mvnc_stub.h +++ b/inference-engine/tests_deprecated/unit/engines/vpu/myriad_tests/helpers/myriad_mvnc_stub.h @@ -20,5 +20,7 @@ class MvncStub : public IMvnc { MOCK_QUALIFIED_METHOD0(AvailableDevicesNames, const, std::vector()); MOCK_QUALIFIED_METHOD0(AvailableDevicesDesc, const, std::vector()); + MOCK_METHOD0(watchdogHndl, WatchdogHndl_t*()); + ~MvncStub() = default; }; diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/cnn_ngraph_impl_tests.cpp b/inference-engine/tests_deprecated/unit/inference_engine_tests/cnn_ngraph_impl_tests.cpp index 7fbc02cd68a742..83f961f6af75d2 100644 --- a/inference-engine/tests_deprecated/unit/inference_engine_tests/cnn_ngraph_impl_tests.cpp +++ b/inference-engine/tests_deprecated/unit/inference_engine_tests/cnn_ngraph_impl_tests.cpp @@ -464,6 +464,100 @@ TEST_F(CNNNGraphImplTests, ReadFromCNNNetReader) { ASSERT_EQ(2, network.layerCount()); } +TEST_F(CNNNGraphImplTests, ReadMeanImageFromCNNNetReader) { + std::string model = R"V0G0N( + + + + + + + + + + + + + + + + + + 1 + 3 + 22 + 22 + + + + + + + 1 + 3 + 22 + 22 + + + + + 1 + 3 + 22 + 22 + + + + + + + 1 + 3 + 22 + 22 + + + + + + + + + +)V0G0N"; + InferenceEngine::Core core; + size_t hwSize = 22*22; + size_t dataSize = hwSize*3; + Blob::Ptr data = make_shared_blob(TensorDesc(Precision::FP32, {dataSize}, Layout::C)); + data->allocate(); + { + auto lockData = data->buffer(); + float *dataPtr = lockData.as(); + + for (size_t i = 0; i < dataSize; ++i) { + dataPtr[i] = i; + } + } + CNNNetwork network = core.ReadNetwork(model, data); + ASSERT_EQ(3, network.layerCount()); + auto inputInfo = network.getInputsInfo().begin()->second; + ASSERT_NE(inputInfo, nullptr); + auto preProc = inputInfo->getPreProcess(); + ASSERT_EQ(3, preProc.getNumberOfChannels()); + ASSERT_EQ(preProc.getMeanVariant(), MeanVariant::MEAN_IMAGE); + + for (size_t i = 0; i < preProc.getNumberOfChannels(); i++) { + auto chMeanImg = preProc[i]; + ASSERT_NE(chMeanImg, nullptr); + ASSERT_NE(chMeanImg->meanData, nullptr); + auto lockData = chMeanImg->meanData->cbuffer(); + auto *dataPtr = lockData.as(); + for (size_t j = 0; j < hwSize; j++) { + ASSERT_EQ(dataPtr[j], hwSize*i + j); + } + } +} + TEST_F(CNNNGraphImplTests, CanChangeInputPrecision) { std::shared_ptr ngraph; { diff --git a/inference-engine/thirdparty/clDNN/api/device.hpp b/inference-engine/thirdparty/clDNN/api/device.hpp index 7e49aa03ac944e..4789324f4f5e47 100644 --- a/inference-engine/thirdparty/clDNN/api/device.hpp +++ b/inference-engine/thirdparty/clDNN/api/device.hpp @@ -29,6 +29,12 @@ namespace cldnn { /// @defgroup cpp_device GPU Device /// @{ +/// @brief Enumeration of supported device types +enum class device_type { + integrated_gpu = 0, + discrete_gpu = 1 +}; + /// @brief Information about the device properties and capabilities. struct device_info { uint32_t cores_count; ///< Number of available HW cores. @@ -55,6 +61,8 @@ struct device_info { std::string dev_name; ///< Device ID string std::string driver_version; ///< Version of OpenCL driver + + device_type dev_type; ///< Defines type of current GPU device (integrated or discrete) }; struct device_impl; diff --git a/inference-engine/thirdparty/clDNN/src/gpu/device_info.cpp b/inference-engine/thirdparty/clDNN/src/gpu/device_info.cpp index d4072dc4ae9661..7285436cc62ec2 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/device_info.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/device_info.cpp @@ -26,13 +26,127 @@ #include #include +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#include +#include +#include +#else +#include +#include +#include +#include +#endif namespace cldnn { namespace gpu { +int driver_dev_id() { + const std::vector unused_ids = { + 0x4905, 0x4906, 0x4907, 0x4908 + }; + std::vector result; + +#ifdef _WIN32 + { + HDEVINFO device_info_set = SetupDiGetClassDevsA(&GUID_DEVCLASS_DISPLAY, NULL, NULL, DIGCF_PRESENT); + if (device_info_set == INVALID_HANDLE_VALUE) + return 0; + + SP_DEVINFO_DATA devinfo_data; + std::memset(&devinfo_data, 0, sizeof(devinfo_data)); + devinfo_data.cbSize = sizeof(devinfo_data); + + for (DWORD dev_idx = 0; SetupDiEnumDeviceInfo(device_info_set, dev_idx, &devinfo_data); dev_idx++) { + const size_t kBufSize = 512; + char buf[kBufSize]; + if (!SetupDiGetDeviceInstanceIdA(device_info_set, &devinfo_data, buf, kBufSize, NULL)) { + continue; + } + + char* vendor_pos = std::strstr(buf, "VEN_"); + if (vendor_pos != NULL && std::stoi(vendor_pos + 4, NULL, 16) == 0x8086) { + char* device_pos = strstr(vendor_pos, "DEV_"); + if (device_pos != NULL) { + result.push_back(std::stoi(device_pos + 4, NULL, 16)); + } + } + } + + if (device_info_set) { + SetupDiDestroyDeviceInfoList(device_info_set); + } + } +#elif defined(__linux__) + { + std::string dev_base{ "/sys/devices/pci0000:00/0000:00:02.0/" }; + std::ifstream ifs(dev_base + "vendor"); + if (ifs.good()) { + int ven_id; + ifs >> std::hex >> ven_id; + ifs.close(); + if (ven_id == 0x8086) { + ifs.open(dev_base + "device"); + if (ifs.good()) { + int res = 0; + ifs >> std::hex >> res; + result.push_back(res); + } + } + } + } +#endif + + auto id_itr = result.begin(); + while (id_itr != result.end()) { + if (std::find(unused_ids.begin(), unused_ids.end(), *id_itr) != unused_ids.end()) + id_itr = result.erase(id_itr); + else + id_itr++; + } + + if (result.empty()) + return 0; + else + return result.back(); +} + +static device_type get_device_type(const cl::Device& device) { + auto unified_mem = device.getInfo(); + + return unified_mem ? device_type::integrated_gpu : device_type::discrete_gpu; +} + +static bool get_imad_support(const cl::Device& device) { + std::string dev_name = device.getInfo(); + + if (dev_name.find("Gen12") != std::string::npos || + dev_name.find("Xe") != std::string::npos) + return true; + + if (get_device_type(device) == device_type::integrated_gpu) { + const std::vector imad_ids = { + 0x9A40, 0x9A49, 0x9A59, 0x9AD9, + 0x9A60, 0x9A68, 0x9A70, 0x9A78, + 0x9A7F, 0x9AF8, 0x9AC0, 0x9AC9 + }; + int dev_id = driver_dev_id(); + if (dev_id == 0) + return false; + + if (std::find(imad_ids.begin(), imad_ids.end(), dev_id) != imad_ids.end()) + return true; + } else { + return true; + } + + return false; +} device_info_internal::device_info_internal(const cl::Device& device) { dev_name = device.getInfo(); driver_version = device.getInfo(); + dev_type = get_device_type(device); compute_units_count = device.getInfo(); @@ -61,10 +175,9 @@ device_info_internal::device_info_internal(const cl::Device& device) { supports_subgroups_short = extensions.find("cl_intel_subgroups_short") != std::string::npos; - supports_imad = true; + supports_imad = get_imad_support(device); supports_immad = false; - dev_type = static_cast(device.getInfo()); vendor_id = static_cast(device.getInfo()); supports_usm = extensions.find("cl_intel_unified_shared_memory") != std::string::npos; diff --git a/inference-engine/thirdparty/clDNN/src/gpu/device_info.h b/inference-engine/thirdparty/clDNN/src/gpu/device_info.h index 4ca0aaa2ec4f32..af49a4f8cca4c1 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/device_info.h +++ b/inference-engine/thirdparty/clDNN/src/gpu/device_info.h @@ -26,7 +26,6 @@ namespace gpu { struct device_info_internal : cldnn::device_info { std::uint32_t compute_units_count; - uint32_t dev_type; uint32_t vendor_id; uint8_t supports_usm; @@ -49,7 +48,8 @@ struct device_info_internal : cldnn::device_info { supports_immad, supports_usm, dev_name, - driver_version + driver_version, + dev_type }; } }; diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.cpp b/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.cpp index 98228ace61197d..bb5227c3980326 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.cpp @@ -38,16 +38,31 @@ static constexpr auto INTEL_PLATFORM_VENDOR = "Intel(R) Corporation"; std::map ocl_builder::get_available_devices(void* user_context, void* user_device) const { bool host_out_of_order = true; // Change to false, if debug requires in-order queue. + std::vector dev_orig, dev_sorted; if (user_context != nullptr) { - return build_device_list_from_user_context(host_out_of_order, user_context); + dev_orig = build_device_list_from_user_context(host_out_of_order, user_context); } else if (user_device != nullptr) { - return build_device_list_from_user_device(host_out_of_order, user_device); + dev_orig = build_device_list_from_user_device(host_out_of_order, user_device); } else { - return build_device_list(host_out_of_order); + dev_orig = build_device_list(host_out_of_order); } + + std::map ret; + for (auto& dptr : dev_orig) { + auto flag = dptr->get_device().getInfo(); + if (flag != 0) + dev_sorted.insert(dev_sorted.begin(), dptr); + else + dev_sorted.push_back(dptr); + } + uint32_t idx = 0; + for (auto& dptr : dev_sorted) { + ret[std::to_string(idx++)] = dptr; + } + return ret; } -std::map ocl_builder::build_device_list(bool out_out_order) const { +std::vector ocl_builder::build_device_list(bool out_out_order) const { cl_uint n = 0; // Get number of platforms availible cl_int err = clGetPlatformIDs(0, NULL, &n); @@ -62,8 +77,7 @@ std::map ocl_builder::build_device_list(bool out_ throw std::runtime_error("[CLDNN ERROR]. clGetPlatformIDs error " + std::to_string(err)); } - uint32_t idx = 0; - std::map ret; + std::vector ret; for (auto& id : platform_ids) { cl::Platform platform = cl::Platform(id); @@ -74,7 +88,8 @@ std::map ocl_builder::build_device_list(bool out_ platform.getDevices(CL_DEVICE_TYPE_ALL, &devices); for (auto& device : devices) { if (!does_device_match_config(out_out_order, device)) continue; - ret.insert(get_device(idx++, device, id)); + ret.emplace_back(device_impl::ptr{ new device_impl(device, cl::Context(device), + id, device_info_internal(device)), false}); } } if (ret.empty()) { @@ -83,15 +98,16 @@ std::map ocl_builder::build_device_list(bool out_ return ret; } -std::map ocl_builder::build_device_list_from_user_context(bool out_out_order, void* user_context) const { +std::vector ocl_builder::build_device_list_from_user_context(bool out_out_order, void* user_context) const { cl::Context ctx = cl::Context(static_cast(user_context), true); auto all_devices = ctx.getInfo(); - std::map ret; - uint32_t idx = 0; + std::vector ret; for (auto& device : all_devices) { if (!does_device_match_config(out_out_order, device)) continue; - ret.insert(get_device(idx++, device, device.getInfo())); + ret.emplace_back(device_impl::ptr{ new device_impl(device, cl::Context(device), + device.getInfo(), + device_info_internal(device)), false}); } if (ret.empty()) { @@ -100,7 +116,7 @@ std::map ocl_builder::build_device_list_from_use return ret; } -std::map ocl_builder::build_device_list_from_user_device(bool out_out_order, void* user_device) const { +std::vector ocl_builder::build_device_list_from_user_device(bool out_out_order, void* user_device) const { cl_uint n = 0; // Get number of platforms availible cl_int err = clGetPlatformIDs(0, NULL, &n); @@ -115,8 +131,7 @@ std::map ocl_builder::build_device_list_from_use throw std::runtime_error("[CLDNN ERROR]. clGetPlatformIDs error " + std::to_string(err)); } - uint32_t idx = 0; - std::map ret; + std::vector ret; for (auto& id : platform_ids) { cl::PlatformVA platform = cl::PlatformVA(id); @@ -137,7 +152,18 @@ std::map ocl_builder::build_device_list_from_use for (auto& device : devices) { if (!does_device_match_config(out_out_order, device)) continue; - ret.insert(get_device_shared(idx++, device, id, user_device)); + cl_context_properties props[] = { + #ifdef WIN32 + CL_CONTEXT_D3D11_DEVICE_KHR, + #else + CL_CONTEXT_VA_API_DISPLAY_INTEL, + #endif + (intptr_t)user_device, + CL_CONTEXT_INTEROP_USER_SYNC, CL_FALSE, + CL_CONTEXT_PLATFORM, (cl_context_properties)id, + 0 }; + ret.emplace_back(device_impl::ptr{ new device_impl(device, cl::Context(device, props), + id, device_info_internal(device)), false }); } } if (ret.empty()) { @@ -146,38 +172,6 @@ std::map ocl_builder::build_device_list_from_use return ret; } -std::pair ocl_builder::get_device(const uint32_t index, - const cl::Device& dev_to_add, - const cl_platform_id platform) const { - return { - std::to_string(index), - device_impl::ptr{ new device_impl(dev_to_add, cl::Context(dev_to_add), platform, device_info_internal(dev_to_add)), - false} - }; -} - -std::pair ocl_builder::get_device_shared(const uint32_t index, - const cl::Device& dev_to_add, - const cl_platform_id platform, - void* user_device) const { - cl_context_properties props[] = { -#ifdef WIN32 - CL_CONTEXT_D3D11_DEVICE_KHR, -#else - CL_CONTEXT_VA_API_DISPLAY_INTEL, -#endif - (intptr_t)user_device, - CL_CONTEXT_INTEROP_USER_SYNC, CL_FALSE, - CL_CONTEXT_PLATFORM, (cl_context_properties)platform, - 0 }; - - return { - std::to_string(index), - device_impl::ptr{ new device_impl(dev_to_add, cl::Context(dev_to_add, props), platform, device_info_internal(dev_to_add)), - false } - }; -} - bool ocl_builder::does_device_match_config(bool out_of_order, const cl::Device& device) const { // Is it intel gpu if (device.getInfo() != device_type || diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.h b/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.h index c17f69c653222c..d013d0e78563d7 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.h +++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.h @@ -43,14 +43,10 @@ class ocl_builder { uint32_t get_device_type() const { return device_type; } uint32_t get_device_vendor() const { return device_vendor; } private: - std::pair get_device(const uint32_t index, - const cl::Device& dev_to_add, const cl_platform_id platform) const; - std::pair get_device_shared(const uint32_t index, - const cl::Device& dev_to_add, const cl_platform_id platform, void* user_device) const; bool does_device_match_config(bool out_of_order, const cl::Device& device) const; - std::map build_device_list(bool out_out_order) const; - std::map build_device_list_from_user_context(bool out_out_order, void* user_context) const; - std::map build_device_list_from_user_device(bool out_out_order, void* user_device) const; + std::vector build_device_list(bool out_out_order) const; + std::vector build_device_list_from_user_context(bool out_out_order, void* user_context) const; + std::vector build_device_list_from_user_device(bool out_out_order, void* user_device) const; }; } // namespace gpu diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp b/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp index 5b035577a9b9a7..e6126c8f924a6a 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp @@ -115,7 +115,6 @@ gpu_toolkit::gpu_toolkit(const device_impl& device_impl, const configuration& co << " profiling: " << std::boolalpha << _configuration.enable_profiling << "\n" << " meaningful names: " << std::boolalpha << _configuration.meaningful_kernels_names << "\n" << " dump custom program: " << std::boolalpha << _configuration.dump_custom_program << "\n" - << " device type: " << std::to_string(device_info.dev_type) << "\n" << " vendor type: " << std::hex << std::setfill('0') << std::setw(4) << std::right << std::to_string(device_info.vendor_id) << "\n" << std::dec << std::setfill(' ') << std::right diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp index 397d65cf11d44b..544bc72af0c193 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp @@ -249,6 +249,10 @@ void prepare_buffer_fusing::run(program_impl& p) { if (usr_layout.format == format::b_fs_yx_fsv16 && (opt_lower_pad % 16 != 0 || opt_upper_pad % 16 != 0)) return; + if (input_layout.data_padding.lower_size().batch[0] != 0 || input_layout.data_padding.upper_size().batch[0] != 0 || + input_layout.data_padding.lower_size().spatial[0] != 0 || input_layout.data_padding.upper_size().spatial[0] != 0 || + input_layout.data_padding.lower_size().spatial[1] != 0 || input_layout.data_padding.upper_size().spatial[1] != 0) + return; } if (format == format::bfyx && crop_size.batch[0] == input_layout.size.batch[0] && diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp index 0005da4aae99fb..64befaa5f32e88 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp @@ -20,6 +20,7 @@ #include #include "api/crop.hpp" #include +#include #include #include #include @@ -587,6 +588,46 @@ TEST(crop_gpu, basic_in1x4x1x1_split) { EXPECT_EQ(output_ptr_2[i], out2[i]); } +TEST(crop_gpu, basic_in1x4x1x1_crop_pad) { + const auto& engine = get_test_engine(); + + auto batch_num = 1; + auto feature_num = 4; + auto x_size = 1; + auto y_size = 1; + + auto crop_batch_num = 1; + auto crop_feature_num_1 = 3; + auto crop_x_size = 1; + auto crop_y_size = 1; + auto feature_offset_1 = 0; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + + padding in_pad({0, 0, 1, 1}, {0, 0, 1, 1}); + auto padded_layout = input.get_layout().with_padding(in_pad); + topology topology; + topology.add(input_layout("input", input.get_layout())); + topology.add(reorder("input_reorder", "input", padded_layout)); + topology.add(crop("crop1", "input_reorder", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_1)), { tensor(feature(feature_offset_1), spatial(0,0),batch(0)) })); + topology.add(reorder("out_reorder", "crop1", format::bfyx, data_types::f32)); + + std::vector input_vec = { -1.f, 2.f, -3.f, 4.f }; + std::vector out1 = { -1.f, 2.f,-3.f }; + set_values(input, input_vec); + build_options bo; + bo.set_option(build_option::optimize_data(true)); + + network network(engine, topology, bo); + network.set_input_data("input", input); + auto outputs = network.execute(); + + auto output = outputs.at("out_reorder").get_memory(); + auto output_ptr = output.pointer(); + + for (size_t i = 0; i < out1.size();i++) + EXPECT_EQ(output_ptr[i], out1[i]); +} + TEST(crop_gpu, basic_int_in1x4x1x1_split) { // Tests split with crop implementation // _CROP_1(1x3x1x1,offset(0x0x0x0)) diff --git a/inference-engine/thirdparty/mkl-dnn/include/mkldnn.h b/inference-engine/thirdparty/mkl-dnn/include/mkldnn.h index 4f030a2aab74da..fa176404540327 100644 --- a/inference-engine/thirdparty/mkl-dnn/include/mkldnn.h +++ b/inference-engine/thirdparty/mkl-dnn/include/mkldnn.h @@ -545,18 +545,9 @@ mkldnn_status_t MKLDNN_API mkldnn_post_ops_get_params_binarization( */ mkldnn_status_t MKLDNN_API mkldnn_post_ops_append_quantization( mkldnn_post_ops_t post_ops, mkldnn_alg_kind_t alg, - int crop_low_count, const float* crop_low, int crop_high_count, const float* crop_high, - int input_scale_count, const float* input_scale, int input_shift_count, const float* input_shift, - int output_scale_count, const float* output_scale, int output_shift_count, const float* output_shift); - -/** Gets the quantization parameters of the post operation with index @p index in - * the sequence of @p post_ops. - */ -mkldnn_status_t MKLDNN_API mkldnn_post_ops_get_params_quantization( - const_mkldnn_post_ops_t post_ops, int index, mkldnn_alg_kind_t *alg, - int* crop_low_count, const float** crop_low, int* crop_high_count, const float** crop_high, - int* input_scale_count, const float** input_scale, int* input_shift_count, const float** input_shift, - int* output_scale_count, const float** output_scale, int* output_shift_count, const float** output_shift); + const void* crop_low, const void* crop_high, + const void* input_scale, const void* input_shift, + const void* output_scale, const void* output_shift); /** @} */ diff --git a/inference-engine/thirdparty/mkl-dnn/include/mkldnn.hpp b/inference-engine/thirdparty/mkl-dnn/include/mkldnn.hpp index 7a678ce1c529a2..836eabcc2b7d31 100644 --- a/inference-engine/thirdparty/mkl-dnn/include/mkldnn.hpp +++ b/inference-engine/thirdparty/mkl-dnn/include/mkldnn.hpp @@ -473,48 +473,13 @@ struct post_ops: public handle { } void append_quantization(algorithm alg, - const std::vector &crop_low, const std::vector &crop_high, - const std::vector &input_scale, const std::vector &input_shift, - const std::vector &output_scale, const std::vector &output_shift) { - - error::wrap_c_api(mkldnn_post_ops_append_quantization(get(), convert_to_c(alg), crop_low.size(), &crop_low[0], crop_high.size(), &crop_high[0], - input_scale.size(), &input_scale[0], input_shift.size(), &input_shift[0], output_scale.size(), &output_scale[0], output_shift.size(), &output_shift[0]), + const void* crop_low, const void* crop_high, + const void* input_scale, const void* input_shift, + const void* output_scale, const void* output_shift) { + error::wrap_c_api(mkldnn_post_ops_append_quantization(get(), convert_to_c(alg), crop_low, crop_high, + input_scale, input_shift, output_scale, output_shift), "could not append quantization"); } - - void get_params_quantization(int index, algorithm &alg, - std::vector &crop_low, std::vector &crop_high, - std::vector &input_scale, std::vector &input_shift, - std::vector &output_scale, std::vector &output_shift) const { - mkldnn_alg_kind_t c_alg; - int crop_low_count, crop_high_count, input_scale_count, input_shift_count, output_scale_count, output_shift_count; - const float *crop_low_data, *crop_high_data, *input_scale_data, *input_shift_data, *output_scale_data, *output_shift_data; - - error::wrap_c_api(mkldnn_post_ops_get_params_quantization(get(), index, &c_alg, - &crop_low_count, &crop_low_data, &crop_high_count, &crop_high_data, - &input_scale_count, &input_scale_data, &input_shift_count, &input_shift_data, - &output_scale_count, &output_scale_data, &output_shift_count, &output_shift_data), - "could not get int weights zero_points"); - - crop_low.resize(crop_low_count); - for (int c = 0; c < crop_low_count; ++c) - crop_low[c] = crop_low_data[c]; - crop_high.resize(crop_high_count); - for (int c = 0; c < crop_high_count; ++c) - crop_high[c] = crop_high_data[c]; - input_scale.resize(input_scale_count); - for (int c = 0; c < input_scale_count; ++c) - input_scale[c] = input_scale_data[c]; - input_shift.resize(input_shift_count); - for (int c = 0; c < input_shift_count; ++c) - input_shift[c] = input_shift_data[c]; - output_scale.resize(output_scale_count); - for (int c = 0; c < output_scale_count; ++c) - output_scale[c] = output_scale_data[c]; - output_shift.resize(output_shift_count); - for (int c = 0; c < output_shift_count; ++c) - output_shift[c] = output_shift_data[c]; - } }; #ifndef DOXYGEN_SHOULD_SKIP_THIS diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/primitive_attr.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/primitive_attr.cpp index e251a97d6ffdfc..1d0a533ad05674 100644 --- a/inference-engine/thirdparty/mkl-dnn/src/common/primitive_attr.cpp +++ b/inference-engine/thirdparty/mkl-dnn/src/common/primitive_attr.cpp @@ -174,9 +174,9 @@ status_t post_ops_t::append_binarization(alg_kind_t alg, const float* thresholds } status_t post_ops_t::append_quantization(alg_kind_t alg, - int crop_low_count, const float* crop_low, int crop_high_count, const float* crop_high, - int input_scale_count, const float* input_scale, int input_shift_count, const float* input_shift, - int output_scale_count, const float* output_scale, int output_shift_count, const float* output_shift) { + const void* crop_low, const void* crop_high, + const void* input_scale, const void* input_shift, + const void* output_scale, const void* output_shift) { using namespace mkldnn::impl::alg_kind; bool known_alg = one_of(alg, quantization_quantize_dequantize, quantization_quantize); if (!known_alg) @@ -185,24 +185,14 @@ status_t post_ops_t::append_quantization(alg_kind_t alg, if (len_ == capacity) return out_of_memory; - bool ok = crop_low_count > 0 && crop_high_count > 0 && input_scale_count > 0 && input_shift_count > 0 && output_scale_count > 0 && output_shift_count > 0; - if (!ok) - return invalid_arguments; - entry_[len_].kind = primitive_kind::quantization; entry_[len_].quantization.alg = alg; - entry_[len_].quantization.crop_low_data = new shifts_t(); - entry_[len_].quantization.crop_low_data->set(crop_low_count, 1 << 1, crop_low); - entry_[len_].quantization.crop_high_data = new shifts_t(); - entry_[len_].quantization.crop_high_data->set(crop_high_count, 1 << 1, crop_high); - entry_[len_].quantization.input_scale_data = new scales_t(); - entry_[len_].quantization.input_scale_data->set(input_scale_count, 1 << 1, input_scale); - entry_[len_].quantization.input_shift_data = new shifts_t(); - entry_[len_].quantization.input_shift_data->set(input_shift_count, 1 << 1, input_shift); - entry_[len_].quantization.output_scale_data = new scales_t(); - entry_[len_].quantization.output_scale_data->set(output_scale_count, 1 << 1, output_scale); - entry_[len_].quantization.output_shift_data = new shifts_t(); - entry_[len_].quantization.output_shift_data->set(output_shift_count, 1 << 1, output_shift); + entry_[len_].quantization.crop_low_data = reinterpret_cast*>(crop_low); + entry_[len_].quantization.crop_high_data = reinterpret_cast*>(crop_high); + entry_[len_].quantization.input_scale_data = reinterpret_cast(input_scale); + entry_[len_].quantization.input_shift_data = reinterpret_cast*>(input_shift); + entry_[len_].quantization.output_scale_data = reinterpret_cast(output_scale); + entry_[len_].quantization.output_shift_data = reinterpret_cast*>(output_shift); len_++; @@ -559,43 +549,13 @@ status_t mkldnn_post_ops_get_params_binarization(const post_ops_t *post_ops, int } status_t mkldnn_post_ops_append_quantization(post_ops_t *post_ops, alg_kind_t kind, - int crop_low_count, const float* crop_low, int crop_high_count, const float* crop_high, - int input_scale_count, const float* input_scale, int input_shift_count, const float* input_shift, - int output_scale_count, const float* output_scale, int output_shift_count, const float* output_shift) { + const void* crop_low, const void* crop_high, + const void* input_scale, const void* input_shift, + const void* output_scale, const void* output_shift) { if (post_ops == nullptr) return invalid_arguments; - return post_ops->append_quantization(kind, crop_low_count, crop_low, crop_high_count, crop_high, - input_scale_count, input_scale, input_shift_count, input_shift, output_scale_count, output_scale, output_shift_count, output_shift); -} - -status_t mkldnn_post_ops_get_params_quantization(const post_ops_t *post_ops, int index, alg_kind_t* alg, - int* crop_low_count, const float** crop_low, int* crop_high_count, const float** crop_high, - int* input_scale_count, const float** input_scale, int* input_shift_count, const float** input_shift, - int* output_scale_count, const float** output_scale, int* output_shift_count, const float** output_shift) { - bool ok = true - && simple_get_params_check(post_ops, index, primitive_kind::quantization) - && !any_null(alg, crop_low_count, crop_low, crop_high_count, crop_high, - input_scale_count, input_scale, input_shift_count, input_shift, output_scale_count, output_scale, output_shift_count, output_shift); - if (!ok) - return invalid_arguments; - - const auto &e = post_ops->entry_[index].quantization; - *alg = e.alg; - *crop_low_count = e.crop_low_data->count_; - *crop_high_count = e.crop_high_data->count_; - *input_scale_count = e.input_scale_data->count_; - *input_shift_count = e.input_shift_data->count_; - *output_scale_count = e.output_scale_data->count_; - *output_shift_count = e.output_shift_data->count_; - *crop_low = e.crop_low_data->shifts_; - *crop_high = e.crop_high_data->shifts_; - *input_scale = e.input_scale_data->scales_; - *input_shift = e.input_shift_data->shifts_; - *output_scale = e.output_scale_data->scales_; - *output_shift = e.output_shift_data->shifts_; - - return success; + return post_ops->append_quantization(kind, crop_low, crop_high, input_scale, input_shift, output_scale, output_shift); } template struct mkldnn::impl::shifts_t; diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/primitive_attr.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/primitive_attr.hpp index 206765949b7f88..10cac671959fc5 100644 --- a/inference-engine/thirdparty/mkl-dnn/src/common/primitive_attr.hpp +++ b/inference-engine/thirdparty/mkl-dnn/src/common/primitive_attr.hpp @@ -174,12 +174,12 @@ struct mkldnn_post_ops: public mkldnn::impl::c_compatible { } binarization; struct { mkldnn::impl::alg_kind_t alg; - mkldnn::impl::shifts_t* crop_low_data; - mkldnn::impl::shifts_t* crop_high_data; - mkldnn::impl::scales_t* input_scale_data; - mkldnn::impl::shifts_t* input_shift_data; - mkldnn::impl::scales_t* output_scale_data; - mkldnn::impl::shifts_t* output_shift_data; + const mkldnn::impl::shifts_t* crop_low_data; + const mkldnn::impl::shifts_t* crop_high_data; + const mkldnn::impl::scales_t* input_scale_data; + const mkldnn::impl::shifts_t* input_shift_data; + const mkldnn::impl::scales_t* output_scale_data; + const mkldnn::impl::shifts_t* output_shift_data; } quantization; }; @@ -224,20 +224,6 @@ struct mkldnn_post_ops: public mkldnn::impl::c_compatible { mkldnn_post_ops(): len_(0) {} -// ~mkldnn_post_ops() { -// for (int i = 0; i < len_; i++) { -// auto &post_op = entry_[i]; -// if (post_op.is_quantization()) { -// delete post_op.quantization.crop_low_data; -// delete post_op.quantization.crop_high_data; -// delete post_op.quantization.input_scale_data; -// delete post_op.quantization.input_shift_data; -// delete post_op.quantization.output_scale_data; -// delete post_op.quantization.output_shift_data; -// } -// } -// } - mkldnn::impl::status_t append_sum(float scale, mkldnn::impl::data_type_t data_type); mkldnn::impl::status_t append_eltwise(float scale, mkldnn::impl::alg_kind_t alg, float alpha, float beta); @@ -250,9 +236,9 @@ struct mkldnn_post_ops: public mkldnn::impl::c_compatible { mkldnn::impl::status_t append_binarization(mkldnn::impl::alg_kind_t alg, const float* weights_data, const float* output_mask_data); mkldnn::impl::status_t append_quantization(mkldnn::impl::alg_kind_t alg, - int crop_low_count, const float* crop_low, int crop_high_count, const float* crop_high, - int input_scale_count, const float* input_scale, int input_shift_count, const float* input_shift, - int output_scale_count, const float* output_scale, int output_shift_count, const float* output_shif); + const void* crop_low, const void* crop_high, + const void* input_scale, const void* input_shift, + const void* output_scale, const void* output_shift); int find(mkldnn::impl::primitive_kind_t kind, int start = 0, int stop = -1) const { diff --git a/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt b/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt index a0347ba7e1a716..580c8faf9c8a38 100644 --- a/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt +++ b/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt @@ -16,10 +16,10 @@ add_library(${TARGET_NAME} STATIC ${MVNC_SOURCES}) target_include_directories(${TARGET_NAME} PUBLIC "include" + ${WATCHDOG_INCLUDE} PRIVATE ${XLINK_INCLUDE} - ${XLINK_PLATFORM_INCLUDE} - ${WATCHDOG_INCLUDE}) + ${XLINK_PLATFORM_INCLUDE}) target_compile_definitions(${TARGET_NAME} PRIVATE diff --git a/inference-engine/thirdparty/movidius/mvnc/include/mvnc.h b/inference-engine/thirdparty/movidius/mvnc/include/mvnc.h index cb791eecb01e87..a3f6d2d3f11b10 100644 --- a/inference-engine/thirdparty/movidius/mvnc/include/mvnc.h +++ b/inference-engine/thirdparty/movidius/mvnc/include/mvnc.h @@ -10,6 +10,8 @@ extern "C" { #endif +#include "watchdog/watchdog.h" + #define NC_THERMAL_BUFFER_SIZE 100 #define NC_DEBUG_BUFFER_SIZE 120 #define NC_MAX_DEVICES (32) @@ -159,6 +161,12 @@ struct ncDeviceDescr_t { char name[NC_MAX_NAME_SIZE]; }; +typedef struct ncDeviceOpenParams { + WatchdogHndl_t* watchdogHndl; + int watchdogInterval; + const char* customFirmwareDirectory; +} ncDeviceOpenParams_t; + typedef enum { NC_FIFO_HOST_RO = 0, // fifo can be read through the API but can not be // written ( graphs can read and write data ) @@ -201,7 +209,7 @@ MVNC_EXPORT_API ncStatus_t ncSetDeviceConnectTimeout(int deviceConnectTimeoutSec * If NULL or empty, default path searching behavior will be used. */ MVNC_EXPORT_API ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr, - struct ncDeviceDescr_t in_ncDeviceDesc, int watchdogInterval, const char* customFirmwareDirectory); + struct ncDeviceDescr_t in_ncDeviceDesc, ncDeviceOpenParams_t deviceOpenParams); /** * @brief Returns a description of all available devices in the system @@ -215,7 +223,7 @@ MVNC_EXPORT_API ncStatus_t ncAvailableDevices(struct ncDeviceDescr_t *deviceDesc /** * @brief Close device and destroy handler */ -MVNC_EXPORT_API ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandle); +MVNC_EXPORT_API ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandle, WatchdogHndl_t* watchdogHndl); // Graph MVNC_EXPORT_API ncStatus_t ncGraphCreate(const char* name, struct ncGraphHandle_t **graphHandle); diff --git a/inference-engine/thirdparty/movidius/mvnc/include/ncPrivateTypes.h b/inference-engine/thirdparty/movidius/mvnc/include/ncPrivateTypes.h index ef9ce2ee621b96..e539788e6760ab 100644 --- a/inference-engine/thirdparty/movidius/mvnc/include/ncPrivateTypes.h +++ b/inference-engine/thirdparty/movidius/mvnc/include/ncPrivateTypes.h @@ -54,7 +54,7 @@ struct _devicePrivate_t { deviceCapabilities_t dev_attr; ncDeviceState_t state; uint32_t device_id; - wd_context watchdog_ctx; + WdDeviceHndl_t* watchdog_device; int wd_interval; }; diff --git a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdog.h b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdog.h index 596d43f01e6d7a..09c3d7dde80fb1 100644 --- a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdog.h +++ b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdog.h @@ -5,48 +5,42 @@ #ifndef MVNC_WATCHDOG_H #define MVNC_WATCHDOG_H -#include #ifdef __cplusplus -# define WD_API extern "C" -# else -# define WD_API +extern "C" +{ #endif -/** -* @brief default ping interval is 1 second -*/ -#define WATCHDOG_PING_INTERVAL_MS 1000 +typedef struct _WatchdogHndl_t WatchdogHndl_t; -typedef struct wd_context_tag { - void * opaque; -} wd_context; +typedef struct _WdDeviceHndl_t { + void* m_device; +} WdDeviceHndl_t; typedef enum { WD_ERRNO = 0, WD_NOTINITIALIZED, - WD_DUPLICATE, WD_FAIL } wd_error_t; -/** - * @brief initializes watchdog context, required to be called before any other WD API calls - * @return - */ -WD_API wd_error_t watchdog_init_context(wd_context *ctx); +wd_error_t watchdog_create(WatchdogHndl_t** out_watchdogHndl); +void watchdog_destroy(WatchdogHndl_t* watchdogHndl); /** * @brief Creates watchdog thread, if not created, and registers new watchee device, and initialise opaque handle to it. * To avoid a memory leak, the registered device must be unregister with watchdog_unregister_device(). - * @param d - newly connected device descriptor + * @param deviceHandle - newly connected device descriptor * @return */ -WD_API wd_error_t watchdog_register_device(wd_context *ctx, devicePrivate_t *d); +wd_error_t watchdog_register_device(WatchdogHndl_t* watchdogHndl, WdDeviceHndl_t* deviceHandle); /** * @brief remove watch_dog device from the list, and might stop watchdog worker thread * @return result of operation */ -WD_API wd_error_t watchdog_unregister_device(wd_context *ctx); +wd_error_t watchdog_unregister_device(WatchdogHndl_t* watchdogHndl, WdDeviceHndl_t* deviceHandle); +#ifdef __cplusplus +} +#endif #endif // MVNC_WATCHDOG_H diff --git a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdogPrivate.hpp b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdogPrivate.hpp index 0c5e91be5ad10b..99b516fc477968 100644 --- a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdogPrivate.hpp +++ b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdogPrivate.hpp @@ -5,6 +5,17 @@ #pragma once #include +#include +#include +#include +#include + +#define MVLOG_UNIT_NAME watchdog +#include "XLinkLog.h" + +#if defined(_WIN32) +#include "win_synchapi.h" +#endif // defined(_WIN32) namespace Watchdog { @@ -12,15 +23,11 @@ namespace Watchdog { * @brief represents watchdog device interface to be registered within watchdog worker */ class IDevice { - public: +public: using time_point = std::chrono::steady_clock::time_point; virtual ~IDevice() = default; - /** - * @brief depending on implementation watchdog device shouldn't have interval longer than that - */ - virtual void setInterval(const std::chrono::milliseconds msInterval) noexcept = 0; /** * @brief watchdog request device to keep alive with current timestamp */ @@ -39,4 +46,47 @@ class IDevice { virtual void *getHandle() const noexcept = 0; }; +class AutoScope { +public: + explicit AutoScope(const std::function& func) : _func(func) {} + ~AutoScope() { _func(); } + + AutoScope(const AutoScope&) = delete; + AutoScope(AutoScope&&) = delete; + AutoScope& operator=(const AutoScope&) = delete; + AutoScope& operator=(AutoScope&&) = delete; +private: + std::function _func; +}; + +class CustomUniqueLock { +public: + explicit CustomUniqueLock(pthread_mutex_t* mutex) + :m_mutex(mutex) { + if(m_mutex == nullptr) { + throw std::runtime_error("mutex should not be null"); + } + + int rc = pthread_mutex_lock(m_mutex); + if (rc != 0) { + throw std::runtime_error(std::string("failed to lock mutex. rc: ") + strerror(rc)); + } + }; + + ~CustomUniqueLock() { + int rc = pthread_mutex_unlock(m_mutex); + if (rc != 0) { + mvLog(MVLOG_ERROR, "failed to unlock mutex. rc: %s", strerror(rc)); + } + } + + CustomUniqueLock(const CustomUniqueLock&) = delete; + CustomUniqueLock(const CustomUniqueLock&&) = delete; + CustomUniqueLock& operator=(const CustomUniqueLock&) = delete; + CustomUniqueLock& operator=(const CustomUniqueLock&&) = delete; + +private: + pthread_mutex_t* m_mutex = nullptr; +}; + } // namespace Watchdog diff --git a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/xlink_device.h b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/xlink_device.h new file mode 100644 index 00000000000000..bff0b59b4a083b --- /dev/null +++ b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/xlink_device.h @@ -0,0 +1,25 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifndef MVNC_XLINK_DEVICE_H +#define MVNC_XLINK_DEVICE_H + +#include "mvnc.h" +#include "watchdog.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +#define WATCHDOG_MAX_PING_INTERVAL_MS 1000 + +wd_error_t xlink_device_create(WdDeviceHndl_t** out_deviceHandle, devicePrivate_t* pDevice); +void xlink_device_destroy(WdDeviceHndl_t* deviceHandle); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inference-engine/thirdparty/movidius/mvnc/src/mvnc_api.c b/inference-engine/thirdparty/movidius/mvnc/src/mvnc_api.c index 4001024dea5dad..39007833881d4f 100644 --- a/inference-engine/thirdparty/movidius/mvnc/src/mvnc_api.c +++ b/inference-engine/thirdparty/movidius/mvnc/src/mvnc_api.c @@ -36,6 +36,7 @@ #include "XLinkMacros.h" #include "XLinkStringUtils.h" #include "watchdog.h" +#include "xlink_device.h" #define THERMAL_BUFFER_SIZE 100 #define THERMAL_THROTTLING_BUFFER_SIZE (THERMAL_BUFFER_SIZE + sizeof(int)) @@ -660,7 +661,7 @@ ncStatus_t ncSetDeviceConnectTimeout(int deviceConnectTimeoutSec) { } ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr, - struct ncDeviceDescr_t in_ncDeviceDesc, int watchdogInterval, const char* customFirmwareDirectory) { + struct ncDeviceDescr_t in_ncDeviceDesc, ncDeviceOpenParams_t deviceOpenParams) { //---------------------------------------------------------- // Check input @@ -669,7 +670,11 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr, deviceDesc_t in_deviceDesc = {0}; copyNcDeviceDescrToXLink(&in_ncDeviceDesc, &in_deviceDesc); + int watchdogInterval = deviceOpenParams.watchdogInterval; + const char* customFirmwareDirectory = deviceOpenParams.customFirmwareDirectory; + CHECK_HANDLE_CORRECT_RC(deviceHandlePtr, NC_INVALID_PARAMETERS); + CHECK_HANDLE_CORRECT_RC(deviceOpenParams.watchdogHndl, NC_INVALID_PARAMETERS); if (watchdogInterval < 0) { mvLog(MVLOG_ERROR, "Invalid watchdogInterval"); return NC_INVALID_PARAMETERS; @@ -1094,8 +1099,12 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr, d->device_mon_stream_id = deviceMonitorStreamId; #if !(defined(NO_BOOT)) - watchdog_init_context(&d->watchdog_ctx); - watchdog_register_device(&d->watchdog_ctx, d); + wd_error_t wd_rc = xlink_device_create(&d->watchdog_device, d); + if (wd_rc) { + mvLog(MVLOG_WARN, "watchdog is not started for device %p", d->xlink); + } else { + watchdog_register_device(deviceOpenParams.watchdogHndl, d->watchdog_device); + } #endif getDevAttributes(d); @@ -1110,7 +1119,10 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr, CHECK_STREAM_ID(graphMonitorStreamId, { printfOverXLinkClose(d); // TODO NO_BOOT case - watchdog_unregister_device(&d->watchdog_ctx); + if (d->watchdog_device != NULL) { + watchdog_unregister_device(deviceOpenParams.watchdogHndl, d->watchdog_device); + xlink_device_destroy(d->watchdog_device); + } CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_data_m)); CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_stream_m)); CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->graph_stream_m)); @@ -1124,7 +1136,10 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr, #else CHECK_STREAM_ID(graphMonitorStreamId, { // TODO NO_BOOT case - watchdog_unregister_device(&d->watchdog_ctx); + if (d->watchdog_device != NULL) { + watchdog_unregister_device(deviceOpenParams.watchdogHndl, d->watchdog_device); + xlink_device_destroy(d->watchdog_device); + } CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_data_m)); CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_stream_m)); CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->graph_stream_m)); @@ -1654,7 +1669,7 @@ static ncStatus_t destroyDeviceHandle(struct ncDeviceHandle_t **deviceHandlePtr) } -ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandlePtr) { +ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandlePtr, WatchdogHndl_t* watchdogHndl) { int found = 0; XLinkError_t rc = X_LINK_SUCCESS; @@ -1732,7 +1747,10 @@ ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandlePtr) { #endif #if !defined(NO_BOOT) - watchdog_unregister_device(&d->watchdog_ctx); + if (d->watchdog_device != NULL) { + watchdog_unregister_device(watchdogHndl, d->watchdog_device); + xlink_device_destroy(d->watchdog_device); + } #endif // Save all devices before reset diff --git a/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp b/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp index 35e0316b2a23f8..444d1c0e2480af 100644 --- a/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp +++ b/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp @@ -2,32 +2,20 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "watchdog.h" +#include "watchdogPrivate.hpp" + #include -#include #include #include #include -#include -#include -#include -#include +#include #include -#include -#include -#include -#include -#include -#include +#include +#include #define MVLOG_UNIT_NAME watchdog #include "XLinkLog.h" -#include "XLink.h" -#include "XLinkPrivateDefines.h" -#include "XLinkErrorUtils.h" - -#if defined(_WIN32) -#include "win_synchapi.h" -#endif // defined(_WIN32) namespace { @@ -36,555 +24,405 @@ using namespace chrono; using namespace Watchdog; /** - * @brief implementation of watchdog device using xlink representation of it + * @brief when device just added into watchdog, it should not be due interval at all */ -class XLinkDevice : public IDevice { - _devicePrivate_t privateDevice; - using time_point = std::chrono::steady_clock::time_point; - time_point lastPongTime = time_point::min(); - time_point lastPingTime = time_point::min(); - enum : int { deviceHangTimeout = 12000}; - +class NoDueOnFirstCall : public IDevice { public: - explicit XLinkDevice(devicePrivate_t *pDevice) - : privateDevice(*pDevice) { - setInterval(milliseconds(privateDevice.wd_interval)); - } + NoDueOnFirstCall(IDevice* original) : m_originalPtr(original) {} - void setInterval(const std::chrono::milliseconds msInterval) noexcept override { - privateDevice.wd_interval = std::max(static_cast(msInterval.count()), WATCHDOG_PING_INTERVAL_MS); + void keepAlive(const time_point& current_time) noexcept override { + m_originalPtr->keepAlive(current_time); + m_firstCall = true; } - void keepAlive(const time_point ¤t_time) noexcept override { - bool bPong = sendPingMessage(); - // we consider that as first pong time even if it wasn't happen as beginning of boot - if (lastPongTime == time_point::min()) { - lastPongTime = current_time; - } - - lastPingTime = current_time; - - int diff = duration_cast(current_time - lastPongTime).count(); - - if (bPong) { - lastPongTime = current_time; - mvLog(MVLOG_INFO, "[%p] device, ping succeed after %d ms\n", privateDevice.xlink, diff); - } else { - mvLog(MVLOG_WARN, "[%p] device, no response for %d ms\n", privateDevice.xlink, diff); - } - } - - milliseconds dueIn(const time_point ¤t_time) const noexcept override { - if (lastPingTime == time_point::min()) - return milliseconds::zero(); - - // overdue - if (current_time - lastPingTime > std::chrono::milliseconds(privateDevice.wd_interval)) { + milliseconds dueIn(const time_point& current_time) const noexcept override { + if (!m_firstCall) { return milliseconds::zero(); } - return duration_cast(lastPingTime + std::chrono::milliseconds(privateDevice.wd_interval) - current_time); + return m_originalPtr->dueIn(current_time); } - /** - * @brief means device is hanging - */ bool isTimeout() const noexcept override { - if (lastPongTime > lastPingTime) return false; - if (lastPingTime - lastPongTime > milliseconds(deviceHangTimeout)) { - // cleaning xlink connection - allowing abort all semaphores waiting in other threads - XLinkResetAll(); - return true; - } - return false; + return m_originalPtr->isTimeout(); } - /** - * @brief gets some opaque handle that clearly destinguesh one device previate_t from another - */ - void *getHandle() const noexcept override { - return privateDevice.xlink; + void* getHandle() const noexcept override { + return m_originalPtr->getHandle(); } private: - bool sendPingMessage() { - XLinkError_t rc = X_LINK_SUCCESS; - XLINK_RET_ERR_IF(pthread_mutex_lock(&privateDevice.dev_stream_m), false); - - deviceCommand_t config = {}; - config.type = DEVICE_WATCHDOG_PING; - - // xlink ping acknowledge interval shouldn't be more then expected ping interval - rc = XLinkWriteDataWithTimeout(privateDevice.device_mon_stream_id, (const uint8_t*)&config, sizeof(config), deviceHangTimeout); - - if(pthread_mutex_unlock(&privateDevice.dev_stream_m) != 0) { - mvLog(MVLOG_ERROR, "Failed to unlock privateDevice.dev_stream_m"); - } - - if (rc != X_LINK_SUCCESS) { - mvLog(MVLOG_ERROR, "Failed send ping message: %s", XLinkErrorToStr(rc)); - return false; - } - return true; - } + IDevice* m_originalPtr; + bool m_firstCall = false; }; -/** - * @brief when device just added into watchdog, it should not be due interval at all - */ -class NoDueOnFirstCall : public IDevice { - std::shared_ptr original; - bool bFirstCall = false; - public: - NoDueOnFirstCall(const std::shared_ptr & original) : original(original) {} - void setInterval(const std::chrono::milliseconds msInterval) noexcept override { - original->setInterval(msInterval); - } - void keepAlive(const time_point ¤t_time) noexcept override { - original->keepAlive(current_time); - bFirstCall = true; - } - std::chrono::milliseconds dueIn(const time_point ¤t_time) const noexcept override { - if (!bFirstCall) { - return milliseconds::zero(); - } - return original->dueIn(current_time); - } - bool isTimeout() const noexcept override { - return original->isTimeout(); - } - void *getHandle() const noexcept override { - return original->getHandle(); - } -}; - -class CustomUniqueLock { +class WatchdogImpl { public: - explicit CustomUniqueLock(pthread_mutex_t* mutex) - :m_mutex(mutex) { - if(m_mutex == nullptr) { - throw std::runtime_error("mutex should not be null"); - } + WatchdogImpl(); + ~WatchdogImpl(); - int rc = pthread_mutex_lock(m_mutex); - if (rc != 0) { - throw std::runtime_error(std::string("failed to lock mutex. rc: ") + strerror(rc)); - } - }; - - ~CustomUniqueLock() { - int rc = pthread_mutex_unlock(m_mutex); - if (rc != 0) { - mvLog(MVLOG_ERROR, "failed to unlock mutex. rc: %s", strerror(rc)); - } - } + bool registerDevice(IDevice* device); + bool removeDevice(IDevice* device); - CustomUniqueLock(const CustomUniqueLock&) = delete; - CustomUniqueLock(const CustomUniqueLock&&) = delete; - CustomUniqueLock& operator=(const CustomUniqueLock&) = delete; - CustomUniqueLock& operator=(const CustomUniqueLock&&) = delete; + WatchdogImpl(const WatchdogImpl&) = delete; + WatchdogImpl(WatchdogImpl&&) = delete; + WatchdogImpl& operator = (const WatchdogImpl&) = delete; + WatchdogImpl& operator = (WatchdogImpl&&) = delete; private: - pthread_mutex_t* m_mutex = nullptr; -}; + void waitFor(const milliseconds sleepInterval); + void watchdogRoutine() noexcept; -static void * WD_OPAQUE_MAGIC = reinterpret_cast(0xdeadbeaf); - -struct wd_context_opaque { - void * magic = WD_OPAQUE_MAGIC; - IDevice * actual = nullptr; - bool destroyed = false; - void *handleCached = nullptr; -}; - -class WatchdogImpl { - using wd_context_as_tuple = std::tuple, bool*, void*>; +private: + using Devices = std::vector>; + using DevicesMap = std::unordered_map>; - using Devices = std::list; Devices watchedDevices; + DevicesMap removedDevices; std::atomic_bool threadRunning {false}; pthread_mutex_t routineLock; pthread_cond_t wakeUpPingThread; std::thread poolThread; +}; - WatchdogImpl(const WatchdogImpl&) = delete; - WatchdogImpl(WatchdogImpl&&) = delete; - WatchdogImpl& operator = (const WatchdogImpl&) = delete; - WatchdogImpl& operator = (WatchdogImpl&&) = delete; - - class AutoScope { - public: - explicit AutoScope(const std::function& func) : _func(func) {} - ~AutoScope() { _func(); } +//------------- Watchdog implementation ------------- - AutoScope(const AutoScope&) = delete; - AutoScope& operator=(const AutoScope&) = delete; - private: - std::function _func; - }; +WatchdogImpl::WatchdogImpl() { + int rc = pthread_mutex_init(&routineLock, NULL); + if (rc != 0) { + throw std::runtime_error("failed to initialize \"routineLock\" mutex. rc: " + std::to_string(rc)); + } -private: +#if !(defined(__APPLE__) || defined(_WIN32)) + pthread_condattr_t attr; + rc = pthread_condattr_init(&attr); + if (rc != 0) { + throw std::runtime_error("failed to initialize condition variable attribute. rc: " + std::to_string(rc)); + } - WatchdogImpl() { - int rc = pthread_mutex_init(&routineLock, NULL); - if (rc != 0) { - throw std::runtime_error("failed to initialize \"routineLock\" mutex. rc: " + std::to_string(rc)); - } + AutoScope attrDestroy([&attr]{ + if (pthread_condattr_destroy(&attr) != 0) + mvLog(MVLOG_ERROR, "Failed to destroy condition variable attribute."); + }); -#if !(defined(__APPLE__) || defined(_WIN32)) - pthread_condattr_t attr; - rc = pthread_condattr_init(&attr); - if (rc != 0) { - throw std::runtime_error("failed to initialize condition variable attribute. rc: " + std::to_string(rc)); - } - AutoScope attrDestroy([&attr]{ - if (pthread_condattr_destroy(&attr) != 0) - mvLog(MVLOG_ERROR, "Failed to destroy condition variable attribute."); - }); + rc = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); + if (rc != 0) { + throw std::runtime_error("failed to set condition variable clock. rc: " + std::to_string(rc)); + } - rc = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); - if (rc != 0) { - throw std::runtime_error("failed to set condition variable clock. rc: " + std::to_string(rc)); - } + rc = pthread_cond_init(&wakeUpPingThread, &attr); +#else + rc = pthread_cond_init(&wakeUpPingThread, NULL); #endif // !(defined(__APPLE__) || defined(_WIN32)) - rc = pthread_cond_init(&wakeUpPingThread, NULL); - if (rc != 0) { - throw std::runtime_error("failed to initialize \"wakeUpPingThread\" condition variable. rc: " + std::to_string(rc)); - } + if (rc != 0) { + throw std::runtime_error("failed to initialize \"wakeUpPingThread\" condition variable. rc: " + std::to_string(rc)); } +} -public: +WatchdogImpl::~WatchdogImpl() { + mvLog(MVLOG_INFO, "watchdog terminated\n"); + try + { + CustomUniqueLock lock {&routineLock}; + for (auto &item : watchedDevices) { + mvLog(MVLOG_WARN, "[%p] device, stop watching due to watchdog termination\n", item->getHandle()); + } + } catch (const std::exception & ex) { + mvLog(MVLOG_ERROR, "error %s", ex.what()); + } catch (...) { + mvLog(MVLOG_ERROR, "unknown error"); + } - static WatchdogImpl &instance() { - static WatchdogImpl watchdog; - return watchdog; + threadRunning = false; + int rc = pthread_cond_broadcast(&wakeUpPingThread); + if (rc != 0) { + mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc); } + if (poolThread.joinable()) { + poolThread.join(); + } - ~WatchdogImpl() { - mvLog(MVLOG_INFO, "watchdog terminated\n"); - try - { - CustomUniqueLock lock {&routineLock}; - for (auto &item : watchedDevices) { - *std::get<1>(item) = true; - mvLog(MVLOG_WARN, "[%p] device, stop watching due to watchdog termination\n", std::get<2>(item)); - } - } catch (const std::exception & ex) { - mvLog(MVLOG_ERROR, "error %s", ex.what()); - } catch (...) { - mvLog(MVLOG_ERROR, "unknown error"); - } + rc = pthread_mutex_destroy(&routineLock); + if (rc != 0) { + mvLog(MVLOG_WARN, "failed to destroy the \"routineLock\". rc=%d", rc); + } - threadRunning = false; - int rc = pthread_cond_broadcast(&wakeUpPingThread); - if (rc != 0) { - mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc); - } + rc = pthread_cond_destroy(&wakeUpPingThread); + if (rc != 0) { + mvLog(MVLOG_WARN, "failed to destroy the \"wakeUpPingThread\". rc=%d", rc); + } +} - rc = pthread_mutex_destroy(&routineLock); - if (rc != 0) { - mvLog(MVLOG_WARN, "failed to destroy the \"routineLock\". rc=%d", rc); - } +bool WatchdogImpl::registerDevice(IDevice* device) { + mvLog(MVLOG_INFO, "register device: %p\n", &device); - rc = pthread_cond_destroy(&wakeUpPingThread); - if (rc != 0) { - mvLog(MVLOG_WARN, "failed to destroy the \"wakeUpPingThread\". rc=%d", rc); - } + CustomUniqueLock lock {&routineLock}; + if (!threadRunning) { if (poolThread.joinable()) { poolThread.join(); } - } - -public: - void *register_device(std::shared_ptr device) { - CustomUniqueLock lock {&routineLock}; - std::unique_ptr ctx (new wd_context_opaque); - - // rare case of exact pointer address collision - if (ctx.get() == WD_OPAQUE_MAGIC) { - std::unique_ptr ctx2(new wd_context_opaque); - ctx.reset(ctx2.release()); - } - - if (!threadRunning) { - if (poolThread.joinable()) { - poolThread.join(); - } - threadRunning = true; + threadRunning = true; - poolThread = std::thread([this]() { - if (pthread_setname_np( + poolThread = std::thread([this]() { + if (pthread_setname_np( #ifndef __APPLE__ - pthread_self(), + pthread_self(), #endif - "WatchdogThread") != 0) { - perror("Setting name for watchdog thread failed"); - } - watchdog_routine(); - }); - } else { - // wake up thread - int rc = pthread_cond_broadcast(&wakeUpPingThread); - if (rc != 0) { - mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc); + "WatchdogThread") != 0) { + perror("Setting name for watchdog thread failed"); } - } - - ctx->handleCached = device->getHandle(); - watchedDevices.emplace_back(device, &ctx->destroyed, ctx->handleCached); + watchdogRoutine(); + }); + } - ctx->actual = std::get<0>(watchedDevices.back()).get(); + auto it = std::find_if(std::begin(watchedDevices), + std::end(watchedDevices), + [&device](const std::shared_ptr& item) { + return item->getHandle() == device->getHandle(); + }); - return ctx.release(); + bool found = it != std::end(watchedDevices); + if (!found) { + watchedDevices.emplace_back(std::make_shared(device)); } - void *register_device(devicePrivate_t *device) { - return register_device(std::make_shared(std::make_shared(device))); + int rc = pthread_cond_broadcast(&wakeUpPingThread); + if (rc != 0) { + mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc); } - bool remove_device(void *opaque) { - mvLog(MVLOG_INFO, "remove_device : %p\n", opaque); - auto ptr = reinterpret_cast(opaque); - if (ptr == nullptr) { - return false; - } + return !found; +} - bool bFound = false; - { - CustomUniqueLock lock {&routineLock}; +bool WatchdogImpl::removeDevice(IDevice* device) { + mvLog(MVLOG_INFO, "remove device: %p\n", &device); - // thread already removed - if (ptr->destroyed) { - delete ptr; - return true; - } + CustomUniqueLock lock {&routineLock}; - auto idx = std::find_if(std::begin(watchedDevices), - std::end(watchedDevices), - [ptr](const wd_context_as_tuple &item) { - return std::get<0>(item)->getHandle() == ptr->actual->getHandle(); - }); - bFound = idx != std::end(watchedDevices); - if(bFound) { - watchedDevices.erase(idx); - delete ptr; - } - } + auto it = std::find_if(std::begin(watchedDevices), + std::end(watchedDevices), + [&device](const std::shared_ptr& item) { + return item->getHandle() == device->getHandle(); + }); - // wake up thread since we might select removed device as nex to be ping, and there is no more devices available - int rc = pthread_cond_broadcast(&wakeUpPingThread); - if (rc != 0) { - mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc); - } + bool removed = it != std::end(watchedDevices); + if (removed) { + watchedDevices.erase(it); + } else if (removedDevices.count(device->getHandle())) { + removedDevices.erase(device->getHandle()); + removed = true; + } - return bFound; + // wake up thread since we might select removed device as nex to be ping, and there is no more devices available + int rc = pthread_cond_broadcast(&wakeUpPingThread); + if (rc != 0) { + mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc); } - private: - /// @note: We are using here pthread_cond_timedwait as a replacement for condition_variable::wait_for, - /// as libstdc++ has bug not using monotonic clock. When GCC 10.x became minimum supported version, - /// that code could be removed. - void wait_for(const milliseconds sleepInterval) { - struct timespec timeToWait = {0, 0}; + return removed; +} + +void WatchdogImpl::waitFor(const milliseconds sleepInterval) { + struct timespec timeToWait = {0, 0}; - const auto sec = std::chrono::duration_cast(sleepInterval); + const auto sec = std::chrono::duration_cast(sleepInterval); #if (defined(__APPLE__) || defined(_WIN32)) - timeToWait.tv_sec = sec.count(); - timeToWait.tv_nsec = - std::chrono::duration_cast(sleepInterval).count() - - std::chrono::nanoseconds(sec).count(); + timeToWait.tv_sec = sec.count(); + timeToWait.tv_nsec = + std::chrono::duration_cast(sleepInterval).count() - + std::chrono::nanoseconds(sec).count(); #else - clock_gettime(CLOCK_MONOTONIC, &timeToWait); - const auto secondInNanoSeconds = 1000000000L; - const auto nsecSum = std::chrono::duration_cast(sleepInterval).count() - - std::chrono::nanoseconds(sec).count() + timeToWait.tv_nsec; - timeToWait.tv_sec += sec.count() + nsecSum / secondInNanoSeconds; - timeToWait.tv_nsec = nsecSum % secondInNanoSeconds; + clock_gettime(CLOCK_MONOTONIC, &timeToWait); + const auto secondInNanoSeconds = 1000000000L; + const auto nsecSum = std::chrono::duration_cast(sleepInterval).count() - + std::chrono::nanoseconds(sec).count() + timeToWait.tv_nsec; + timeToWait.tv_sec += sec.count() + nsecSum / secondInNanoSeconds; + timeToWait.tv_nsec = nsecSum % secondInNanoSeconds; #endif // (defined(__APPLE__) || defined(_WIN32)) #if defined(__APPLE__) - const auto rc = pthread_cond_timedwait_relative_np(&wakeUpPingThread, &routineLock, &timeToWait); + const auto rc = pthread_cond_timedwait_relative_np(&wakeUpPingThread, &routineLock, &timeToWait); #else - const auto rc = pthread_cond_timedwait(&wakeUpPingThread, &routineLock, &timeToWait); + const auto rc = pthread_cond_timedwait(&wakeUpPingThread, &routineLock, &timeToWait); #endif // defined(__APPLE__) - if (rc != 0 && rc != ETIMEDOUT) { - throw std::runtime_error("Failed to perform wait in a loop for " + std::to_string(sleepInterval.count()) + " ms. rc: " + std::to_string(rc)); - } + + if (rc != 0 && rc != ETIMEDOUT) { + throw std::runtime_error("Failed to perform wait in a loop for " + std::to_string(sleepInterval.count()) + " ms. rc: " + std::to_string(rc)); } +} - void watchdog_routine() noexcept { - try { - mvLog(MVLOG_INFO, "thread started\n"); - - milliseconds sleepInterval; - - CustomUniqueLock lock {&routineLock}; - - do { - for (auto deviceIt = watchedDevices.begin(); deviceIt != watchedDevices.end(); ) { - auto &device = std::get<0>(*deviceIt); - auto isReady = device->dueIn(steady_clock::now()).count() == 0; - if (isReady) { - auto now = high_resolution_clock::now(); - device->keepAlive(steady_clock::now()); - mvLog(MVLOG_DEBUG, "ping completed in %ld ms\n", duration_cast(high_resolution_clock ::now()-now).count()); - } - if (device->isTimeout()) { - mvLog(MVLOG_ERROR, "[%p] device, not respond, removing from watchdog\n", device->getHandle()); - // marking device as deleted, to prevent double resource free from wd_unregister_device - *std::get<1>(*deviceIt) = true; - deviceIt = watchedDevices.erase(deviceIt); - } - else { - ++deviceIt; - } +void WatchdogImpl::watchdogRoutine() noexcept { + try { + mvLog(MVLOG_INFO, "thread started\n"); + + milliseconds sleepInterval; + CustomUniqueLock lock{&routineLock}; + + do { + for (auto deviceIt = watchedDevices.begin(); deviceIt != watchedDevices.end();) { + auto &device = *deviceIt; + auto isReady = device->dueIn(steady_clock::now()).count() <= 0; + if (isReady) { + auto now = steady_clock::now(); + device->keepAlive(steady_clock::now()); + mvLog(MVLOG_DEBUG, "ping completed in %ld ms\n", + duration_cast(steady_clock::now() - now).count()); } - auto currentTime = steady_clock::now(); - auto minInterval = std::min_element(watchedDevices.begin(), - watchedDevices.end(), - [¤tTime] (const Devices::value_type & device1, const Devices::value_type & device2) { - return std::get<0>(device1)->dueIn(currentTime).count() - < std::get<0>(device2)->dueIn(currentTime).count(); - }); - // if for some reason we have empty devices list but watchdog is active - if (minInterval == watchedDevices.end()) { - mvLog(MVLOG_INFO, "no active devices to watch, stopping Watchdog thread\n"); - threadRunning = false; - break; + if (device->isTimeout()) { + mvLog(MVLOG_ERROR, "[%p] device, not respond, removing from watchdog\n", device->getHandle()); + // marking device as deleted, to prevent double resource free from wd_unregister_device + removedDevices[device->getHandle()] = device; + deviceIt = watchedDevices.erase(deviceIt); + } else { + ++deviceIt; } - // TODO: no timer coalescing feature, to minimized thread wakes - sleepInterval = std::get<0>(*minInterval)->dueIn(currentTime); - if (sleepInterval.count() <= 0) - continue; - - mvLog(MVLOG_DEBUG, "sleep interval = %ld ms\n", sleepInterval.count()); - wait_for(sleepInterval); - - mvLog(MVLOG_DEBUG, "waiting completed in %ld ms\n", - duration_cast(steady_clock::now() - currentTime).count()); - } while (threadRunning); - } catch (const std::exception & ex) { - mvLog(MVLOG_ERROR, "error %s", ex.what()); - } catch (...) { - mvLog(MVLOG_ERROR, "unknown error"); - } + } + auto currentTime = steady_clock::now(); + auto minInterval = std::min_element(watchedDevices.begin(), watchedDevices.end(), + [¤tTime](const Devices::value_type& device1, + const Devices::value_type& device2) { + return device1->dueIn(currentTime).count() < + device2->dueIn(currentTime).count(); + }); + // if for some reason we have empty devices list but watchdog is active + if (minInterval == watchedDevices.end()) { + mvLog(MVLOG_INFO, "no active devices to watch, stopping Watchdog thread\n"); + threadRunning = false; + break; + } + + sleepInterval = (*minInterval)->dueIn(currentTime); + if (sleepInterval.count() <= 0) { + continue; + } + + mvLog(MVLOG_DEBUG, "sleep interval = %ld ms\n", sleepInterval.count()); + + waitFor(sleepInterval); - mvLog(MVLOG_INFO, "thread ended\n"); + mvLog(MVLOG_DEBUG, "waiting completed in %ld ms\n", + duration_cast(steady_clock::now() - currentTime).count()); + + } while (threadRunning); + } catch (const std::exception &ex) { + mvLog(MVLOG_ERROR, "error %s", ex.what()); + } catch (...) { + mvLog(MVLOG_ERROR, "unknown error"); } -}; + + mvLog(MVLOG_INFO, "thread ended\n"); +} } // namespace -WD_API wd_error_t watchdog_init_context(wd_context *ctx) { +struct _WatchdogHndl_t { + WatchdogImpl* m_watchdog; +}; + +wd_error_t watchdog_create(WatchdogHndl_t** out_watchdogHndl) { + if (out_watchdogHndl == nullptr) { + return WD_NOTINITIALIZED; + } + + *out_watchdogHndl = nullptr; + auto tmpWdHndl = + static_cast(malloc(sizeof(WatchdogHndl_t))); + if(tmpWdHndl == nullptr) { + return WD_FAIL; + } + try { - mvLogLevelSet(MVLOG_ERROR); - mvLogDefaultLevelSet(MVLOG_ERROR); - if (!ctx) { - return WD_NOTINITIALIZED; - } - // opaque pointer initialized - if (ctx->opaque == WD_OPAQUE_MAGIC) { - mvLog(MVLOG_INFO, "watchdog context (%p) already initialized \n", ctx); - } else { - ctx->opaque = WD_OPAQUE_MAGIC; - } + tmpWdHndl->m_watchdog = new WatchdogImpl(); + *out_watchdogHndl = tmpWdHndl; return WD_ERRNO; - } catch (...) { - mvLog(MVLOG_ERROR, "failed initialize watchdog context: %p\n", ctx); + } catch (const std::exception& ex) { + mvLog(MVLOG_ERROR, "error %s", ex.what()); + } catch (...) { + mvLog(MVLOG_ERROR, "unknown error"); } + + free(tmpWdHndl); return WD_FAIL; } -WD_API wd_error_t watchdog_register_device(wd_context * ctx, devicePrivate_t *device) { - try { - if (!ctx) { - mvLog(MVLOG_ERROR, "watchdog context is null\n"); - return WD_NOTINITIALIZED; - } - // opaque pointer initialized - if (ctx->opaque == nullptr) { - mvLog(MVLOG_ERROR, "watchdog context (%p) not initialized \n", ctx); - return WD_NOTINITIALIZED; - } - if (device && device->wd_interval <= 0) { - mvLog(MVLOG_ERROR, "watchdog interval should be > 0, but was (%d)\n", device->wd_interval); - return WD_NOTINITIALIZED; - } - // opaque pointer initialized - if (ctx->opaque != WD_OPAQUE_MAGIC) { - auto watchee = reinterpret_cast(ctx->opaque); - // NOTE: magic field used to pass preallocated watchee - since this function only used by plugin, this is not a backdoor - if (watchee->magic == WD_OPAQUE_MAGIC) { - // actually this can represent already registered context, so need to check - // since we are adding NoDue wrapper, lets check for it - if (nullptr != dynamic_cast(watchee->actual)) { - mvLog(MVLOG_ERROR, "watchdog context (%p) already registered within watchdog\n", ctx); - return WD_DUPLICATE; - } +void watchdog_destroy(WatchdogHndl_t* watchdogHndl) { + if (watchdogHndl == nullptr) { + return; + } - // transferring interval from context - if (device) { - watchee->actual->setInterval(milliseconds(device->wd_interval)); - } - ctx->opaque = WatchdogImpl::instance().register_device( - shared_ptr(new NoDueOnFirstCall(shared_ptr(watchee->actual, [](IDevice*){})))); + if (watchdogHndl->m_watchdog != nullptr) { + delete(watchdogHndl->m_watchdog); + } - if (ctx->opaque == nullptr) { - mvLog(MVLOG_ERROR, "watchdog context (%p) not initialized \n", ctx); - } else { - return WD_ERRNO; - } - } - mvLog(MVLOG_ERROR, "watchdog context (%p) not initialized \n", ctx); - return WD_NOTINITIALIZED; - } + free(watchdogHndl); +} + +wd_error_t watchdog_register_device(WatchdogHndl_t* watchdogHndl, WdDeviceHndl_t* deviceHandle) { + if (watchdogHndl == nullptr) { + mvLog(MVLOG_ERROR, "watchdog handle is null\n"); + return WD_NOTINITIALIZED; + } + + if (deviceHandle == nullptr) { + mvLog(MVLOG_ERROR, "watchdog device handle is null\n"); + return WD_NOTINITIALIZED; + } - if (device && device->wd_interval > 0) { - ctx->opaque = WatchdogImpl::instance().register_device(device); - } else { - ctx->opaque = nullptr; + if (deviceHandle->m_device == nullptr) { + mvLog(MVLOG_ERROR, "watchdog device not initialized. handle=%p\n", deviceHandle); + return WD_NOTINITIALIZED; + } + + try { + WatchdogImpl* watchdog = watchdogHndl->m_watchdog; + auto device = reinterpret_cast(deviceHandle->m_device); + if (!watchdog->registerDevice(device)) { + mvLog(MVLOG_WARN, "cannot register device\n"); + return WD_FAIL; } return WD_ERRNO; } catch (const std::exception & ex) { mvLog(MVLOG_ERROR, "failed to register device: %s\n", ex.what()); } catch (...) { - mvLog(MVLOG_ERROR, "failed to register device context (%p)\n", ctx); + mvLog(MVLOG_ERROR, "failed to register device (%p)\n", deviceHandle); } + return WD_FAIL; } -WD_API wd_error_t watchdog_unregister_device(wd_context *ctx) { - try { - if (ctx == nullptr || ctx->opaque == nullptr) { - return WD_NOTINITIALIZED; - } else { - if (ctx->opaque != WD_OPAQUE_MAGIC) { - auto watchee = reinterpret_cast(ctx->opaque); - // NOTE: magic field used to pass preallocated watchee - since this function only used by plugin, this is not a backdoor - if (watchee->magic == WD_OPAQUE_MAGIC) { - if (!WatchdogImpl::instance().remove_device(ctx->opaque)) { - mvLog(MVLOG_WARN, "cannot remove device\n"); - return WD_FAIL; - } - } - } - } +wd_error_t watchdog_unregister_device(WatchdogHndl_t* watchdogHndl, WdDeviceHndl_t* deviceHandle) { + if (watchdogHndl == nullptr) { + mvLog(MVLOG_ERROR, "watchdog handle is null\n"); + return WD_NOTINITIALIZED; + } - if (ctx != nullptr) { - // opaque pointer deleted - ctx->opaque = nullptr; - } + if (deviceHandle == nullptr) { + mvLog(MVLOG_ERROR, "watchdog device handle is null\n"); + return WD_NOTINITIALIZED; + } + + if (deviceHandle->m_device == nullptr) { + mvLog(MVLOG_ERROR, "watchdog device not initialized. handle=%p\n", deviceHandle); + return WD_NOTINITIALIZED; + } + try { + WatchdogImpl* watchdog = watchdogHndl->m_watchdog; + auto device = reinterpret_cast(deviceHandle->m_device); + if (!watchdog->removeDevice(device)) { + mvLog(MVLOG_WARN, "cannot remove device\n"); + return WD_FAIL; + } return WD_ERRNO; } catch (const std::exception & ex) { - mvLog(MVLOG_WARN, "error %s", ex.what()); + mvLog(MVLOG_ERROR, "error %s", ex.what()); } catch (...) { - mvLog(MVLOG_WARN, "unknown error"); + mvLog(MVLOG_ERROR, "unknown error"); } return WD_FAIL; diff --git a/inference-engine/thirdparty/movidius/mvnc/src/watchdog/xlink_device.cpp b/inference-engine/thirdparty/movidius/mvnc/src/watchdog/xlink_device.cpp new file mode 100644 index 00000000000000..56623257d9cbb0 --- /dev/null +++ b/inference-engine/thirdparty/movidius/mvnc/src/watchdog/xlink_device.cpp @@ -0,0 +1,173 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "xlink_device.h" +#include "watchdog.h" +#include "watchdogPrivate.hpp" + +#include "XLink.h" +#include "XLinkPrivateDefines.h" +#include "XLinkErrorUtils.h" + +#include + +#include + +namespace { + +using namespace std; +using namespace chrono; +using namespace Watchdog; + +class XLinkDevice : public IDevice { +public: + explicit XLinkDevice(devicePrivate_t* pDevice); + + void keepAlive(const time_point& current_time) noexcept override; + + milliseconds dueIn(const time_point& current_time) const noexcept override; + bool isTimeout() const noexcept override; + + /** + * @brief gets some opaque handle that clearly distinguish one device private_t from another + */ + void* getHandle() const noexcept override; + + ~XLinkDevice() = default; + +private: + bool sendPingMessage(); + +private: + const int kDeviceHangTimeout = 12000; + + _devicePrivate_t m_devicePrivate; + + time_point m_lastPongTime = time_point::min(); + time_point m_lastPingTime = time_point::min(); +}; + +//----------------- XLinkDevice implementation --------------------- + +XLinkDevice::XLinkDevice(devicePrivate_t* pDevice) + : m_devicePrivate(*pDevice) { + if (m_devicePrivate.wd_interval <= 0) { + throw runtime_error( + "watchdog interval should be > 0, but was " + std::to_string(m_devicePrivate.wd_interval)); + } + m_devicePrivate.wd_interval = std::max(m_devicePrivate.wd_interval, WATCHDOG_MAX_PING_INTERVAL_MS); +} + +void XLinkDevice::keepAlive(const time_point ¤t_time) noexcept { + bool bPong = sendPingMessage(); + // we consider that as first pong time even if it wasn't happen as beginning of boot + if (m_lastPongTime == time_point::min()) { + m_lastPongTime = current_time; + } + + m_lastPingTime = current_time; + + int diff = duration_cast(current_time - m_lastPongTime).count(); + + if (bPong) { + m_lastPongTime = current_time; + mvLog(MVLOG_INFO, "[%p] device, ping succeed after %d ms\n", m_devicePrivate.xlink, diff); + } else { + mvLog(MVLOG_WARN, "[%p] device, no response for %d ms\n", m_devicePrivate.xlink, diff); + } +} + +milliseconds XLinkDevice::dueIn(const time_point& current_time) const noexcept { + if (m_lastPingTime == time_point::min()) { + return milliseconds::zero(); + } + + // overdue + if (current_time - m_lastPingTime > std::chrono::milliseconds(m_devicePrivate.wd_interval)) { + return milliseconds::zero(); + } + + return duration_cast(m_lastPingTime + + std::chrono::milliseconds(m_devicePrivate.wd_interval) - current_time); +} + +bool XLinkDevice::isTimeout() const noexcept { + if (m_lastPongTime > m_lastPingTime) { + return false; + } + + if (m_lastPingTime - m_lastPongTime > milliseconds(kDeviceHangTimeout)) { + // cleaning xlink connection - allowing abort all semaphores waiting in other threads + XLinkResetAll(); + return true; + } + + return false; +} + +void* XLinkDevice::getHandle() const noexcept { + return m_devicePrivate.xlink; +} + +bool XLinkDevice::sendPingMessage() { + XLINK_RET_ERR_IF(pthread_mutex_lock(&m_devicePrivate.dev_stream_m), false); + + deviceCommand_t config = {}; + config.type = DEVICE_WATCHDOG_PING; + + // xlink ping acknowledge interval shouldn't be more then expected ping interval + XLinkError_t rc = XLinkWriteDataWithTimeout(m_devicePrivate.device_mon_stream_id, + (const uint8_t*)&config, sizeof(config), kDeviceHangTimeout); + + if(pthread_mutex_unlock(&m_devicePrivate.dev_stream_m) != 0) { + mvLog(MVLOG_ERROR, "Failed to unlock m_devicePrivate.dev_stream_m"); + } + + if (rc != X_LINK_SUCCESS) { + mvLog(MVLOG_ERROR, "Failed send ping message: %s", XLinkErrorToStr(rc)); + return false; + } + + return true; +} + +} // namespace + +wd_error_t xlink_device_create(WdDeviceHndl_t** out_deviceHandle, devicePrivate_t* pDevice) { + if (out_deviceHandle == nullptr || pDevice == nullptr) { + return WD_NOTINITIALIZED; + } + + *out_deviceHandle = nullptr; + auto tmpWdDeviceHndl = + static_cast(malloc(sizeof(WdDeviceHndl_t))); + if(tmpWdDeviceHndl == nullptr) { + return WD_FAIL; + } + + try { + tmpWdDeviceHndl->m_device = new XLinkDevice(pDevice); + *out_deviceHandle = tmpWdDeviceHndl; + return WD_ERRNO; + } catch (const std::exception& ex) { + mvLog(MVLOG_ERROR, "error %s", ex.what()); + } catch (...) { + mvLog(MVLOG_ERROR, "unknown error"); + } + + free(tmpWdDeviceHndl); + return WD_FAIL; +} + +void xlink_device_destroy(WdDeviceHndl_t* deviceHandle) { + if (deviceHandle == nullptr) { + return; + } + + if (deviceHandle->m_device != nullptr) { + delete(reinterpret_cast(deviceHandle->m_device)); + } + + free(deviceHandle); +} diff --git a/model-optimizer/extensions/front/tf/activation_ext.py b/model-optimizer/extensions/front/tf/activation_ext.py index 0ac443605862fc..52ecb01ed93aa1 100644 --- a/model-optimizer/extensions/front/tf/activation_ext.py +++ b/model-optimizer/extensions/front/tf/activation_ext.py @@ -14,7 +14,7 @@ limitations under the License. """ from extensions.ops.activation_ops import Abs, Elu, Erf, Exp, ReLU, LeakyReLU, LogicalNot, ReLU6, Sigmoid, \ - Sin, Sinh, Cos, Cosh, Tan, Tanh + Sin, Sinh, Cos, Cosh, Tan, Tanh, Ceiling from mo.front.extractor import FrontExtractorOp