diff --git a/.hailort.jpg b/.hailort.jpg index 84d29889..cd625648 100644 Binary files a/.hailort.jpg and b/.hailort.jpg differ diff --git a/CMakeLists.txt b/CMakeLists.txt index 55d54df7..7d02330b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,12 @@ cmake_minimum_required(VERSION 3.0.0) find_program(CCACHE_PROGRAM ccache) +find_program(CLACHE_PROGRAM clcache) + if(CCACHE_PROGRAM) set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}") +elseif(CLCACHE_PROGRAM) + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CLCACHE_PROGRAM}") endif() project(HailoRT) diff --git a/common/include/context_switch_defs.h b/common/include/context_switch_defs.h index d7fe8f23..d3ce3664 100644 --- a/common/include/context_switch_defs.h +++ b/common/include/context_switch_defs.h @@ -56,6 +56,13 @@ extern "C" { #define CONTEXT_SWITCH_DEFS__WRITE_ACTION_BY_TYPE_MAX_SIZE (4) +// TODO HRT-12512: Update variable when / If DDR has it's own CMA region +#define CONTEXT_SWITCH_DEFS__START_M4_MAPPED_DDR_ADDRESS (0x80000000) +#define CONTEXT_SWITCH_DEFS__END_M4_MAPPED_DDR_ADDRESS (0x90000000) +#define CONTEXT_SWITCH_DEFS__START_M4_MAPPED_DDR_ADDRESS_AFTER_LUT (0x50000000) +#define CONTEXT_SWITCH_DEFS__DDR_ADDRESS_MASK (0x0FFFFFFF) +#define CONTEXT_SWITCH_DEFS__INVALID_DDR_CONTEXTS_BUFFER_ADDRESS (0) + #pragma pack(push, 1) typedef struct { @@ -207,7 +214,7 @@ typedef struct { uint32_t kernel_done_count; } CONTEXT_SWITCH_DEFS__enable_lcu_action_non_default_data_t; -/* Default action - kernel_done_address and kernel_done_count has default values */ +/* Default action - kernel_done_address, kernel_done_count have default values */ typedef struct { uint8_t packed_lcu_id; uint8_t network_index; diff --git a/common/include/control_protocol.h b/common/include/control_protocol.h index 9e022bdc..60ea8e31 100644 --- a/common/include/control_protocol.h +++ b/common/include/control_protocol.h @@ -81,6 +81,7 @@ extern "C" { /* Value to represent an operation should be performed on all streams. */ #define CONTROL_PROTOCOL__ALL_DATAFLOW_MANAGERS (0xFF) +#define CONTROL_PROTOCOL__MAX_CONTEXT_SIZE (3072) #define CONTROL_PROTOCOL__OPCODES_VARIABLES \ CONTROL_PROTOCOL__OPCODE_X(HAILO_CONTROL_OPCODE_IDENTIFY, true, CPU_ID_APP_CPU)\ @@ -868,15 +869,18 @@ typedef struct { typedef struct { bool preliminary_run_asap; + bool batch_register_config; + bool can_fast_batch_switch; } CONTROL_PROTOCOL__INFER_FEATURE_LIST_t; typedef struct { - uint8_t dynamic_contexts_count; + uint16_t dynamic_contexts_count; CONTROL_PROTOCOL__INFER_FEATURE_LIST_t infer_features; CONTROL_PROTOCOL__VALIDATION_FEATURE_LIST_t validation_features; uint8_t networks_count; uint16_t csm_buffer_size; uint16_t batch_size[CONTROL_PROTOCOL__MAX_NETWORKS_PER_NETWORK_GROUP]; + uint32_t external_action_list_address; uint32_t boundary_channels_bitmap[CONTROL_PROTOCOL__MAX_VDMA_ENGINES_COUNT]; } CONTROL_PROTOCOL__application_header_t; @@ -954,10 +958,10 @@ typedef struct { #pragma warning(disable: 4200) #endif typedef struct { - uint32_t is_first_control_per_context_length; - uint8_t is_first_control_per_context; - uint32_t is_last_control_per_context_length; - uint8_t is_last_control_per_context; + uint32_t is_first_chunk_per_context_length; + uint8_t is_first_chunk_per_context; + uint32_t is_last_chunk_per_context_length; + uint8_t is_last_chunk_per_context; uint32_t context_type_length; uint8_t context_type; // CONTROL_PROTOCOL__context_switch_context_type_t uint32_t context_network_data_length; @@ -988,7 +992,7 @@ typedef struct { uint32_t context_type_length; uint8_t context_type; // CONTROL_PROTOCOL__context_switch_context_type_t uint32_t context_index_length; - uint8_t context_index; + uint16_t context_index; uint32_t action_list_offset_length; uint16_t action_list_offset; } CONTROL_PROTOCOL__download_context_action_list_request_t; @@ -1160,7 +1164,7 @@ typedef struct { bool break_at_any_batch_index; uint16_t batch_index; bool break_at_any_context_index; - uint8_t context_index; + uint16_t context_index; bool break_at_any_action_index; uint16_t action_index; } CONTROL_PROTOCOL__context_switch_breakpoint_data_t; @@ -1470,15 +1474,21 @@ typedef enum { CONTROL_PROTOCOL__CONTEXT_SWITCH_INDEX_COUNT, } CONTROL_PROTOCOL__context_switch_context_index_t; -#define CONTROL_PROTOCOL__MAX_CONTEXTS_PER_NETWORK_GROUP (64) +#define CONTROL_PROTOCOL__MAX_CONTEXTS_PER_NETWORK_GROUP (1024) +// This struct will be used for both ControlActionList and DDRActionlist (in order to keep flow in FW as similar as possible) +// The context_network_data array will never have more data than CONTROL_PROTOCOL__CONTEXT_NETWORK_DATA_SINGLE_CONTROL_MAX_SIZE +// In case of ControlActionList - this is verified when sending and receiving control. We make it larger here to be +// able to hold DDRActionList Contexts without needing to copy or do more processing in fw. +// In both cases this struct holds a chunk of the context - in ControlActionList - it will be as much of the context a +// Single control message is able to carry and in DDRActionlist will be the whole context typedef struct { - bool is_first_control_per_context; - bool is_last_control_per_context; + bool is_first_chunk_per_context; + bool is_last_chunk_per_context; uint8_t context_type; // CONTROL_PROTOCOL__context_switch_context_type_t uint32_t context_network_data_length; - uint8_t context_network_data[CONTROL_PROTOCOL__CONTEXT_NETWORK_DATA_SINGLE_CONTROL_MAX_SIZE]; -} CONTROL_PROTOCOL__context_switch_context_info_single_control_t; + uint8_t context_network_data[CONTROL_PROTOCOL__MAX_CONTEXT_SIZE]; +} CONTROL_PROTOCOL__context_switch_context_info_chunk_t; CASSERT(sizeof(CONTROL_PROTOCOL__context_switch_context_index_t)<=UINT8_MAX, control_protocol_h); CASSERT(sizeof(CONTROL_PROTOCOL__context_switch_context_type_t)<=UINT8_MAX, control_protocol_h); diff --git a/common/include/d2h_events.h b/common/include/d2h_events.h index 1e402d6c..5d26cd5e 100644 --- a/common/include/d2h_events.h +++ b/common/include/d2h_events.h @@ -123,14 +123,14 @@ typedef struct { #define D2H_EVENT_HEALTH_MONITOR_CPU_ECC_EVENT_PARAMETER_COUNT (1) -/* D2H_EVENT_context_switch_breakpoint_reached_event_massage_t should be the same as +/* D2H_EVENT_context_switch_breakpoint_reached_event_message_t should be the same as * CONTROL_PROTOCOL__context_switch_breakpoint_data_t and hailo_context_switch_breakpoint_reached_notification_message_t */ typedef struct { uint8_t application_index; uint16_t batch_index; - uint8_t context_index; + uint16_t context_index; uint16_t action_index; -} D2H_EVENT_context_switch_breakpoint_reached_event_massage_t; +} D2H_EVENT_context_switch_breakpoint_reached_event_message_t; #define D2H_EVENT_CONTEXT_SWITCH_BREAKPOINT_REACHED_EVENT_PARAMETER_COUNT (4) @@ -151,7 +151,7 @@ typedef struct { uint32_t exit_status; uint8_t application_index; uint16_t batch_index; - uint8_t context_index; + uint16_t context_index; uint16_t action_index; } D2H_EVENT_context_switch_run_time_error_event_message_t; @@ -166,7 +166,7 @@ typedef union { D2H_EVENT_health_monitor_overcurrent_alert_event_message_t health_monitor_overcurrent_alert_event; D2H_EVENT_health_monitor_lcu_ecc_error_event_message_t health_monitor_lcu_ecc_error_event; D2H_EVENT_health_monitor_cpu_ecc_event_message_t health_monitor_cpu_ecc_event; - D2H_EVENT_context_switch_breakpoint_reached_event_massage_t context_switch_breakpoint_reached_event; + D2H_EVENT_context_switch_breakpoint_reached_event_message_t context_switch_breakpoint_reached_event; D2H_EVENT_health_monitor_clock_changed_event_message_t health_monitor_clock_changed_event; D2H_EVENT_hw_infer_mamager_infer_done_message_t hw_infer_manager_infer_done_event; D2H_EVENT_context_switch_run_time_error_event_message_t context_switch_run_time_error_event; diff --git a/common/include/firmware_status.h b/common/include/firmware_status.h index b33f5a1b..1aa6bf5d 100644 --- a/common/include/firmware_status.h +++ b/common/include/firmware_status.h @@ -527,6 +527,7 @@ Updating rules: FIRMWARE_STATUS__X(QSPI_STATUS_MISALIGNED_ADDRESS)\ FIRMWARE_STATUS__X(QSPI_STATUS_BLOCK_ERASE_FAILED)\ FIRMWARE_STATUS__X(QSPI_STATUS_CLEAR_AHB_REMAP_FAILED)\ + FIRMWARE_STATUS__X(QSPI_STATUS_NOT_SUPPORTED)\ \ FIRMWARE_MODULE__X(FIRMWARE_MODULE__PCIE_SERVICE)\ FIRMWARE_STATUS__X(PCIE_SERVICE_STATUS_INVALID_PARAMETERS)\ @@ -763,6 +764,7 @@ Updating rules: FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_WRITE_DATA_BY_TYPE_ACTION_INVALID_TYPE)\ FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_WRITE_DATA_BY_TYPE_ACTION_INVALID_MEMORY_SPACE)\ FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_REACHED_TIMEOUT_WHILE_WAITING_FOR_BATCH_SWITCH_CONTEXT_TO_END)\ + FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_INVALID_EXTERNAL_ACTION_LIST_ADDRESS)\ \ FIRMWARE_MODULE__X(FIRMWARE_MODULE__D2H_EVENT_MANAGER)\ FIRMWARE_STATUS__X(HAILO_D2H_EVENT_MANAGER_STATUS_MESSAGE_HIGH_PRIORITY_QUEUE_CREATE_FAILED)\ @@ -1080,6 +1082,7 @@ Updating rules: FIRMWARE_STATUS__X(BURST_CREDITS_TASK_STATUS_FAILED_TO_FIND_STREAM_INDEX)\ FIRMWARE_STATUS__X(BURST_CREDITS_TASK_STATUS_TASK_NO_CONFIGURED_ACTIONS)\ FIRMWARE_STATUS__X(BURST_CREDITS_TASK_STATUS_TASK_EXPECTED_HIGHER_BATCH)\ + FIRMWARE_STATUS__X(BURST_CREDITS_TASK_STATUS_TASK_REACHED_TIMEOUT_WAITING_FOR_DEACTIVATION)\ \ FIRMWARE_MODULE__X(FIRMWARE_MODULE__TASK_SYNC_EVENTS)\ FIRMWARE_STATUS__X(TASK_SYNC_EVENTS_STATUS_START_TASK_WHILE_IT_IS_RUNNING)\ @@ -1111,13 +1114,19 @@ Updating rules: FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_RECEIVED_UNEXPECTED_INTERRUPT)\ FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_INVALID_NETWORK_INDEX)\ FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_INVALID_KERNEL_DONE_COUNT)\ + FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_INVALID_EXTENSION)\ \ FIRMWARE_MODULE__X(FIRMWARE_MODULE__HW_INFER_MANAGER)\ FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_NETWORK_GROUP_NOT_CONFIGURED_BEFORE_INFER_START)\ FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_NETWORK_GROUP_ALREADY_ACTIVATED)\ FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_STATE_MACHINE_NOT_IN_RESET_STATE_BEFORE_DEACTIVATE)\ FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_INVALID_STATE)\ - + \ + FIRMWARE_MODULE__X(FIRMWARE_MODULE__INFINITE_CONTEXT_LOADER)\ + FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_EVENT_BITS_NOT_CLEARED_BEFORE_COPY_CALL)\ + FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_TIMEOUT_OCCURED_WAITING_FOR_COPY)\ + FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_NOT_SUPPORTED)\ + FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_NOT_MODULE_NOT_INITIALIZED)\ typedef enum { #define FIRMWARE_MODULE__X(module) module, diff --git a/common/include/utils.h b/common/include/utils.h index 860d1fac..c2d50a7d 100644 --- a/common/include/utils.h +++ b/common/include/utils.h @@ -54,6 +54,10 @@ #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #endif +#ifndef DIV_ROUND_DOWN +#define DIV_ROUND_DOWN(n,d) ((n) / (d)) +#endif + #ifndef ROUND_UNSIGNED_FLOAT #define ROUND_UNSIGNED_FLOAT(n) ((n - (uint32_t)(n)) > 0.5) ? (uint32_t)(n + 1) : (uint32_t)(n) #endif diff --git a/hailort/.gitignore b/hailort/.gitignore index e544d4c6..80fa472a 100644 --- a/hailort/.gitignore +++ b/hailort/.gitignore @@ -1,3 +1,4 @@ +build/ +dist/ /external/ cmake/external/*/ -prepare_externals/build/ diff --git a/hailort/CMakeLists.txt b/hailort/CMakeLists.txt index 307878f8..32e115bf 100644 --- a/hailort/CMakeLists.txt +++ b/hailort/CMakeLists.txt @@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) option(HAILO_BUILD_PYBIND "Build Python binding" OFF) option(HAILO_BUILD_EMULATOR "Build hailort for emulator" OFF) option(HAILO_BUILD_UT "Build Unit Tests" OFF) +option(HAILO_BUILD_DMABUF_TESTS "Build DMA buffer tests. Relevant only if HAILO_BUILD_UT is ON" OFF) option(HAILO_BUILD_HW_DEBUG_TOOL "Build hw debug tool" OFF) option(HAILO_BUILD_GSTREAMER "Compile gstreamer plugins" OFF) option(HAILO_BUILD_EXAMPLES "Build examples" OFF) @@ -30,8 +31,8 @@ endif() # Set firmware version add_definitions( -DFIRMWARE_VERSION_MAJOR=4 ) -add_definitions( -DFIRMWARE_VERSION_MINOR=16 ) -add_definitions( -DFIRMWARE_VERSION_REVISION=2 ) +add_definitions( -DFIRMWARE_VERSION_MINOR=17 ) +add_definitions( -DFIRMWARE_VERSION_REVISION=0 ) if(HAILO_BUILD_SERVICE) add_definitions( -DHAILO_SUPPORT_MULTI_PROCESS ) endif() diff --git a/hailort/LICENSE-3RD-PARTY.md b/hailort/LICENSE-3RD-PARTY.md index 4868b784..473d5a0d 100644 --- a/hailort/LICENSE-3RD-PARTY.md +++ b/hailort/LICENSE-3RD-PARTY.md @@ -1,16 +1,17 @@ -| Package | Copyright (c) | License | Version | Notes | References | -|:---------------------------------|:----------------------------------|:-------------------|:---------------|:----------------------------------------------|:------------------------------------------------------------------------------| -| CLI11 | University of Cincinnati | 3-Clause BSD | 2.2.0 | Fork | https://github.com/hailo-ai/CLI11 | -| Catch2 | Catch2 Authors | BSL-1.0 | 2.13.7 | Cloned entire package | https://github.com/catchorg/Catch2 | -| protobuf | Google Inc. | BSD | 21.12 | Cloned entire package | https://github.com/protocolbuffers/protobuf | -| pybind11 | Wenzel Jakob | BSD | 2.10.1 | Cloned entire package | https://github.com/pybind/pybind11 | -| spdlog | Gabi Melman | MIT | 1.6.1 | Cloned entire package | https://github.com/gabime/spdlog | -| folly | Facebook, Inc. and its affiliates | Apache License 2.0 | v2020.08.17.00 | Copied only the file `folly/TokenBucket.h` | https://github.com/facebook/folly | -| nlohmann_json_cmake_fetchcontent | ArthurSonzogni | MIT License | v3.9.1 | Cloned entire package | https://github.com/ArthurSonzogni/nlohmann_json_cmake_fetchcontent | -| readerwriterqueue | Cameron Desrochers | Simplified BSD | 1.0.3 | Cloned entire package | https://github.com/cameron314/readerwriterqueue | -| DotWriter | John Vilk | MIT License | master | Fork | https://github.com/hailo-ai/DotWriter | -| benchmark | Google Inc. | Apache License 2.0 | 1.6.0 | Cloned entire package | https://github.com/google/benchmark.git | -| md5 | Alexander Peslyak | cut-down BSD | - | Copied code from website | http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 | -| pevents | Mahmoud Al-Qudsi | MIT License | master | Cloned entire package | https://github.com/neosmart/pevents.git | -| grpc | Google Inc. | Apache License 2.0 | 1.46.3 | Cloned entire package | https://github.com/grpc/grpc | -| stb | Sean Barrett | MIT License | 0.97 | Copied only the file `stb/stb_image_resize.h` | https://github.com/nothings/stb | \ No newline at end of file +| Package | Copyright (c) | License | Version | Notes | References | +|:---------------------------------|:----------------------------------|:---------------------------|:---------------|:----------------------------------------------|:------------------------------------------------------------------------------| +| CLI11 | University of Cincinnati | 3-Clause BSD | 2.2.0 | Fork | https://github.com/hailo-ai/CLI11 | +| Catch2 | Catch2 Authors | BSL-1.0 | 2.13.7 | Cloned entire package | https://github.com/catchorg/Catch2 | +| protobuf | Google Inc. | BSD | 21.12 | Cloned entire package | https://github.com/protocolbuffers/protobuf | +| pybind11 | Wenzel Jakob | BSD | 2.10.1 | Cloned entire package | https://github.com/pybind/pybind11 | +| spdlog | Gabi Melman | MIT | 1.6.1 | Cloned entire package | https://github.com/gabime/spdlog | +| folly | Facebook, Inc. and its affiliates | Apache License 2.0 | v2020.08.17.00 | Copied only the file `folly/TokenBucket.h` | https://github.com/facebook/folly | +| nlohmann_json_cmake_fetchcontent | ArthurSonzogni | MIT License | v3.9.1 | Cloned entire package | https://github.com/ArthurSonzogni/nlohmann_json_cmake_fetchcontent | +| readerwriterqueue | Cameron Desrochers | Simplified BSD | 1.0.3 | Cloned entire package | https://github.com/cameron314/readerwriterqueue | +| DotWriter | John Vilk | MIT License | master | Fork | https://github.com/hailo-ai/DotWriter | +| benchmark | Google Inc. | Apache License 2.0 | 1.6.0 | Cloned entire package | https://github.com/google/benchmark.git | +| md5 | Alexander Peslyak | cut-down BSD | - | Copied code from website | http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 | +| pevents | Mahmoud Al-Qudsi | MIT License | master | Cloned entire package | https://github.com/neosmart/pevents.git | +| grpc | Google Inc. | Apache License 2.0 | 1.46.3 | Cloned entire package | https://github.com/grpc/grpc | +| stb | Sean Barrett | MIT License | 0.97 | Copied only the file `stb/stb_image_resize.h` | https://github.com/nothings/stb | +| eigen | | Mozilla Public License 2.0 | 3.4.0 | Cloned entire package | https://gitlab.com/libeigen/eigen | \ No newline at end of file diff --git a/hailort/cmake/external/eigen.cmake b/hailort/cmake/external/eigen.cmake new file mode 100644 index 00000000..1bb1f66f --- /dev/null +++ b/hailort/cmake/external/eigen.cmake @@ -0,0 +1,28 @@ +cmake_minimum_required(VERSION 3.11.0) + +include(FetchContent) + +FetchContent_Declare( + eigen + GIT_REPOSITORY https://gitlab.com/libeigen/eigen + GIT_TAG 3147391d946bb4b6c68edd901f2add6ac1f31f8c # Version 3.4.0 + GIT_SHALLOW TRUE + SOURCE_DIR ${HAILO_EXTERNAL_DIR}/eigen-src + SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/eigen-subbuild +) + + +# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent +FetchContent_GetProperties(eigen) +if(NOT eigen_POPULATED) + FetchContent_Populate(eigen) + option(EIGEN_BUILD_DOC OFF) + option(BUILD_TESTING OFF) + option(EIGEN_LEAVE_TEST_IN_ALL_TARGET OFF) + option(EIGEN_BUILD_PKGCONFIG OFF) + option(CMAKE_Fortran_COMPILER OFF) + + if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS) + add_subdirectory(${eigen_SOURCE_DIR} ${eigen_BINARY_DIR} EXCLUDE_FROM_ALL) + endif() +endif() \ No newline at end of file diff --git a/hailort/common/barrier.cpp b/hailort/common/barrier.cpp index 4342170f..db3c1209 100644 --- a/hailort/common/barrier.cpp +++ b/hailort/common/barrier.cpp @@ -36,6 +36,9 @@ void Barrier::arrive_and_wait() void Barrier::terminate() { m_is_activated.store(false); + { + std::unique_lock lock(m_mutex); + } m_cv.notify_all(); } diff --git a/hailort/common/device_measurements.cpp b/hailort/common/device_measurements.cpp index d27966a6..ae15885f 100644 --- a/hailort/common/device_measurements.cpp +++ b/hailort/common/device_measurements.cpp @@ -134,7 +134,7 @@ hailo_status PowerMeasurement::start_measurement() CHECK_SUCCESS(status, "Failed to start power measurement"); m_is_thread_running = true; - m_thread = std::thread([this] () { + m_thread = std::thread([this] () -> hailo_status { const bool clear_power_measurement_history = true; while (m_is_thread_running.load()) { std::this_thread::sleep_for(DEFAULT_MEASUREMENTS_INTERVAL); diff --git a/hailort/common/os/posix/os_utils.cpp b/hailort/common/os/posix/os_utils.cpp index 6ea226e1..3e73605a 100644 --- a/hailort/common/os/posix/os_utils.cpp +++ b/hailort/common/os/posix/os_utils.cpp @@ -100,7 +100,7 @@ size_t OsUtils::get_dma_able_alignment() // TODO: implement on qnx (HRT-12356) - only needed when async api is implemented on qnx // TODO - URT-13534 - use sys call for QNX OS to get page size #elif defined(__QNX__) - return OS_UTILS__QNX_PAGE_SIZE + return OS_UTILS__QNX_PAGE_SIZE; #endif } diff --git a/hailort/common/os/posix/socket.cpp b/hailort/common/os/posix/socket.cpp index 4a964fb2..0260e9be 100644 --- a/hailort/common/os/posix/socket.cpp +++ b/hailort/common/os/posix/socket.cpp @@ -230,7 +230,7 @@ hailo_status Socket::send_to(const uint8_t *src_buffer, size_t src_buffer_size, } else if (EPIPE == errno) { // When socket is aborted from another thread sendto will return errno EPIPE LOGGER__INFO("Udp send aborted!"); - return HAILO_STREAM_ABORTED_BY_USER; + return HAILO_STREAM_ABORT; } else { LOGGER__ERROR("Udp failed to send data, errno:{}.", errno); return HAILO_ETH_SEND_FAILURE; @@ -272,7 +272,7 @@ hailo_status Socket::recv_from(uint8_t *dest_buffer, size_t dest_buffer_size, in } else if ((0 == number_of_received_bytes) && (0 != dest_buffer_size)) { LOGGER__INFO("Udp socket was aborted"); - return HAILO_STREAM_ABORTED_BY_USER; + return HAILO_STREAM_ABORT; } if (result_src_addr_size > src_addr_size) { diff --git a/hailort/common/runtime_statistics_internal.hpp b/hailort/common/runtime_statistics_internal.hpp index 707a7079..c089fda8 100644 --- a/hailort/common/runtime_statistics_internal.hpp +++ b/hailort/common/runtime_statistics_internal.hpp @@ -15,10 +15,69 @@ #include #include #include +#include +#include +#include namespace hailort { +class AccumulatorResultsHelper final +{ +public: + AccumulatorResultsHelper() = delete; + + static const uint32_t DEFAULT_FLOATING_POINT_PRECISION = 4; + + static std::string format_results(const AccumulatorResults &results, bool verbose = false, + uint32_t precision = DEFAULT_FLOATING_POINT_PRECISION) + { + std::stringstream stream; + stream << format_statistic(results.count(), "count") << ", "; + stream << format_statistic(results.mean(), "mean", precision); + if (verbose) { + stream << ", "; + stream << format_statistic(results.min(), "min", precision) << ", "; + stream << format_statistic(results.max(), "max", precision) << ", "; + stream << format_statistic(results.var(), "var", precision) << ", "; + stream << format_statistic(results.sd(), "sd", precision) << ", "; + stream << format_statistic(results.mean_sd(), "mean_sd", precision); + } + return stream.str(); + } + + static std::string format_statistic(const Expected &statistic, const std::string &name = "", + uint32_t precision = DEFAULT_FLOATING_POINT_PRECISION) + { + return format_statistic(statistic, name, precision); + } + + static std::string format_statistic(const Expected &statistic, const std::string &name = "") + { + return format_statistic(statistic, name); + } + +private: + template::value, int> = 0> + static std::string format_statistic(const Expected &statistic, const std::string &name, + uint32_t precision = DEFAULT_FLOATING_POINT_PRECISION) + { + static const std::string NO_VALUE = "-"; + std::stringstream stream; + if (!name.empty()) { + stream << name << "="; + } + + if (statistic.has_value()) { + stream << std::fixed << std::setprecision(precision) << statistic.value(); + } else { + stream << NO_VALUE; + } + + return stream.str(); + } +}; + template::value, int> = 0> class FullAccumulator : public Accumulator { diff --git a/hailort/common/utils.hpp b/hailort/common/utils.hpp index 57046ed5..dfde750f 100644 --- a/hailort/common/utils.hpp +++ b/hailort/common/utils.hpp @@ -12,10 +12,13 @@ #ifndef HAILO_UTILS_H_ #define HAILO_UTILS_H_ -#include -#include +#include "hailo/hailort.h" +#include "hailo/expected.hpp" + #include "common/logger_macros.hpp" #include + +#include #include #include #include @@ -166,6 +169,17 @@ _ISEMPTY( \ #define CONSTRUCT_MSG(dft_fmt, ...) _CONSTRUCT_MSG(ISEMPTY(__VA_ARGS__), dft_fmt, "" __VA_ARGS__) +inline hailo_status get_status(hailo_status status) +{ + return status; +} + +template +inline hailo_status get_status(const Expected &exp) +{ + return exp.status(); +} + #define _CHECK(cond, ret_val, ...) \ do { \ if (!(cond)) { \ @@ -175,39 +189,31 @@ _ISEMPTY( \ } while(0) /** Returns ret_val when cond is false */ -#define CHECK(cond, ret_val, ...) _CHECK((cond), (ret_val), CONSTRUCT_MSG("CHECK failed", ##__VA_ARGS__)) -#define CHECK_AS_EXPECTED(cond, ret_val, ...) \ - _CHECK((cond), (make_unexpected(ret_val)), CONSTRUCT_MSG("CHECK_AS_EXPECTED failed", ##__VA_ARGS__)) - -#define CHECK_ARG_NOT_NULL(arg) _CHECK(nullptr != (arg), HAILO_INVALID_ARGUMENT, "CHECK_ARG_NOT_NULL for {} failed", #arg) +#define CHECK(cond, ret_val, ...) \ + _CHECK((cond), make_unexpected(ret_val), CONSTRUCT_MSG("CHECK failed", ##__VA_ARGS__)) +#define CHECK_AS_EXPECTED CHECK -#define CHECK_ARG_NOT_NULL_AS_EXPECTED(arg) _CHECK(nullptr != (arg), make_unexpected(HAILO_INVALID_ARGUMENT), "CHECK_ARG_NOT_NULL_AS_EXPECTED for {} failed", #arg) +#define CHECK_ARG_NOT_NULL(arg) _CHECK(nullptr != (arg), make_unexpected(HAILO_INVALID_ARGUMENT), "CHECK_ARG_NOT_NULL for {} failed", #arg) +#define CHECK_ARG_NOT_NULL_AS_EXPECTED CHECK_ARG_NOT_NULL -#define CHECK_NOT_NULL(arg, status) _CHECK(nullptr != (arg), status, "CHECK_NOT_NULL for {} failed", #arg) +#define CHECK_NOT_NULL(arg, status) _CHECK(nullptr != (arg), make_unexpected(status), "CHECK_NOT_NULL for {} failed", #arg) +#define CHECK_NOT_NULL_AS_EXPECTED CHECK_NOT_NULL -#define CHECK_NOT_NULL_AS_EXPECTED(arg, status) _CHECK(nullptr != (arg), make_unexpected(status), "CHECK_NOT_NULL_AS_EXPECTED for {} failed", #arg) - -#define _CHECK_SUCCESS(status, is_default, fmt, ...) \ +#define _CHECK_SUCCESS(res, is_default, fmt, ...) \ do { \ - const auto &__check_success_status = (status); \ + const auto &__check_success_status = get_status(res); \ _CHECK( \ - HAILO_SUCCESS == __check_success_status, \ - __check_success_status, \ + (HAILO_SUCCESS == __check_success_status), \ + make_unexpected(__check_success_status), \ _CONSTRUCT_MSG(is_default, "CHECK_SUCCESS failed with status={}", fmt, __check_success_status, ##__VA_ARGS__) \ ); \ } while(0) #define CHECK_SUCCESS(status, ...) _CHECK_SUCCESS(status, ISEMPTY(__VA_ARGS__), "" __VA_ARGS__) +#define CHECK_SUCCESS_AS_EXPECTED CHECK_SUCCESS -#define _CHECK_SUCCESS_AS_EXPECTED(status, is_default, fmt, ...) \ - do { \ - const auto &__check_success_status = (status); \ - _CHECK( \ - HAILO_SUCCESS == __check_success_status, \ - make_unexpected(__check_success_status), \ - _CONSTRUCT_MSG(is_default, "CHECK_SUCCESS_AS_EXPECTED failed with status={}", fmt, __check_success_status, ##__VA_ARGS__) \ - ); \ - } while(0) -#define CHECK_SUCCESS_AS_EXPECTED(status, ...) _CHECK_SUCCESS_AS_EXPECTED(status, ISEMPTY(__VA_ARGS__), "" __VA_ARGS__) +#define _CHECK_EXPECTED _CHECK_SUCCESS +#define CHECK_EXPECTED(obj, ...) _CHECK_EXPECTED(obj, ISEMPTY(__VA_ARGS__), "" __VA_ARGS__) +#define CHECK_EXPECTED_AS_STATUS CHECK_EXPECTED // Define macro CHECK_IN_DEBUG - that checks cond in debug with CHECK macro but in release does nothing and will get optimized out #ifdef NDEBUG @@ -258,28 +264,30 @@ _ISEMPTY( \ #define CHECK_GRPC_STATUS_AS_EXPECTED(status) _CHECK_GRPC_STATUS(status, make_unexpected(HAILO_RPC_FAILED), SERVICE_WARNING_MSG) #endif -#define _CHECK_EXPECTED(obj, is_default, fmt, ...) \ - do { \ - const auto &__check_expected_obj = (obj); \ - _CHECK( \ - __check_expected_obj.has_value(), \ - make_unexpected(__check_expected_obj.status()), \ - _CONSTRUCT_MSG(is_default, "CHECK_EXPECTED failed with status={}", fmt, __check_expected_obj.status(), ##__VA_ARGS__) \ - ); \ - } while(0) -#define CHECK_EXPECTED(obj, ...) _CHECK_EXPECTED(obj, ISEMPTY(__VA_ARGS__), "" __VA_ARGS__) +#define __HAILO_CONCAT(x, y) x ## y +#define _HAILO_CONCAT(x, y) __HAILO_CONCAT(x, y) +#define _TRY(expected_var_name, var_decl, expr, ...) \ + auto expected_var_name = (expr); \ + CHECK_EXPECTED(expected_var_name, __VA_ARGS__); \ + var_decl = expected_var_name.release() -#define _CHECK_EXPECTED_AS_STATUS(obj, is_default, fmt, ...) \ - do { \ - const auto &__check_expected_obj = (obj); \ - _CHECK( \ - __check_expected_obj.has_value(), \ - __check_expected_obj.status(), \ - _CONSTRUCT_MSG(is_default, "CHECK_EXPECTED_AS_STATUS failed with status={}", fmt, __check_expected_obj.status(), ##__VA_ARGS__) \ - ); \ - } while(0) -#define CHECK_EXPECTED_AS_STATUS(obj, ...) _CHECK_EXPECTED_AS_STATUS(obj, ISEMPTY(__VA_ARGS__), "" __VA_ARGS__) +/** + * The TRY macro is used to allow easier validation and access for variables returned as Expected. + * If the expression returns an Expected with status HAILO_SUCCESS, the macro will release the expected and assign + * the var_decl. + * Otherwise, the macro will cause current function to return the failed status. + * + * Usage example: + * + * Expected func() { + * TRY(auto var, return_5()); + * // Now var is int with value 5 + * + * // func will return Unexpected with status HAILO_INTERNAL_FAILURE + * TRY(auto var2, return_error(HAILO_INTERNAL_FAILURE), "Failed doing stuff {}", 5); + */ +#define TRY(var_decl, expr, ...) _TRY(_HAILO_CONCAT(__expected, __COUNTER__), var_decl, expr, __VA_ARGS__) #ifndef _MSC_VER #define IGNORE_DEPRECATION_WARNINGS_BEGIN _Pragma("GCC diagnostic push") \ diff --git a/hailort/drivers/common/hailo_ioctl_common.h b/hailort/drivers/common/hailo_ioctl_common.h index 4d7dc6c4..0911f422 100644 --- a/hailort/drivers/common/hailo_ioctl_common.h +++ b/hailort/drivers/common/hailo_ioctl_common.h @@ -14,7 +14,10 @@ #define SIZE_OF_VDMA_DESCRIPTOR (16) #define VDMA_DEST_CHANNELS_START (16) -#define CHANNEL_IRQ_TIMESTAMPS_SIZE (128 * 2) // Should be same as MAX_IRQ_TIMESTAMPS_SIZE (hailort_driver.hpp) +#define HAILO_VDMA_MAX_ONGOING_TRANSFERS (128) +#define HAILO_VDMA_MAX_ONGOING_TRANSFERS_MASK (HAILO_VDMA_MAX_ONGOING_TRANSFERS - 1) + +#define CHANNEL_IRQ_TIMESTAMPS_SIZE (HAILO_VDMA_MAX_ONGOING_TRANSFERS * 2) #define CHANNEL_IRQ_TIMESTAMPS_SIZE_MASK (CHANNEL_IRQ_TIMESTAMPS_SIZE - 1) #define INVALID_DRIVER_HANDLE_VALUE ((uintptr_t)-1) @@ -35,14 +38,13 @@ typedef ULONG uint32_t; typedef UCHAR uint8_t; typedef USHORT uint16_t; typedef ULONGLONG uint64_t; -typedef uint64_t u64; -typedef uint32_t u32; -typedef uint16_t u16; -typedef uint8_t u8; #endif /* !defined(__cplusplus) && defined(NTDDI_VERSION) */ #ifdef _MSC_VER + +#include + #if !defined(bool) && !defined(__cplusplus) typedef uint8_t bool; #endif // !defined(bool) && !defined(__cplusplus) @@ -51,6 +53,48 @@ typedef uint8_t bool; #define INT_MAX 0x7FFFFFFF #endif // !defined(INT_MAX) + +// {d88d31f1-fede-4e71-ac2a-6ce0018c1501} +DEFINE_GUID (GUID_DEVINTERFACE_HailoKM, + 0xd88d31f1,0xfede,0x4e71,0xac,0x2a,0x6c,0xe0,0x01,0x8c,0x15,0x01); + +#define HAILO_GENERAL_IOCTL_MAGIC 0 +#define HAILO_VDMA_IOCTL_MAGIC 1 +#define HAILO_NON_LINUX_IOCTL_MAGIC 2 + +#define HAILO_IOCTL_COMPATIBLE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x802, METHOD_BUFFERED, FILE_ANY_ACCESS) + + +typedef struct tCompatibleHailoIoctlParam +{ + union { + struct { + ULONG Size : 16; + ULONG Code : 8; + ULONG Type : 6; + ULONG Read : 1; + ULONG Write : 1; + } bits; + ULONG value; + } u; +} tCompatibleHailoIoctlParam; + +static ULONG FORCEINLINE _IOC_(ULONG nr, ULONG type, ULONG size, bool read, bool write) +{ + struct tCompatibleHailoIoctlParam param; + param.u.bits.Code = nr; + param.u.bits.Size = size; + param.u.bits.Type = type; + param.u.bits.Read = read ? 1 : 0; + param.u.bits.Write = write ? 1 : 0; + return param.u.value; +} + +#define _IOW_(type,nr,size) _IOC_(nr, type, sizeof(size), true, false) +#define _IOR_(type,nr,size) _IOC_(nr, type, sizeof(size), false, true) +#define _IOWR_(type,nr,size) _IOC_(nr, type, sizeof(size), true, true) +#define _IO_(type,nr) _IOC_(nr, type, 0, false, false) + #elif defined(__linux__) // #ifdef _MSC_VER #ifndef __KERNEL__ // include the userspace headers only if this file is included by user space program @@ -149,11 +193,17 @@ struct hailo_vdma_buffer_unmap_params { /* structure used in ioctl HAILO_DESC_LIST_CREATE */ struct hailo_desc_list_create_params { size_t desc_count; // in + uint16_t desc_page_size; // in bool is_circular; // in uintptr_t desc_handle; // out uint64_t dma_address; // out }; +/* structure used in ioctl HAILO_DESC_LIST_RELEASE */ +struct hailo_desc_list_release_params { + uintptr_t desc_handle; // in +}; + /* structure used in ioctl HAILO_NON_LINUX_DESC_LIST_MMAP */ struct hailo_non_linux_desc_list_mmap_params { uintptr_t desc_handle; // in @@ -164,8 +214,9 @@ struct hailo_non_linux_desc_list_mmap_params { /* structure used in ioctl HAILO_DESC_LIST_BIND_VDMA_BUFFER */ struct hailo_desc_list_bind_vdma_buffer_params { size_t buffer_handle; // in + size_t buffer_size; // in + size_t buffer_offset; // in uintptr_t desc_handle; // in - uint16_t desc_page_size; // in uint8_t channel_index; // in uint32_t starting_desc; // in }; @@ -189,6 +240,7 @@ struct hailo_vdma_interrupts_channel_data { uint16_t host_num_processed; uint8_t host_error; // Channel errors bits on source side uint8_t device_error; // Channel errors bits on dest side + bool validation_success; // If the validation of the channel was successful }; struct hailo_vdma_interrupts_wait_params { @@ -272,26 +324,6 @@ struct hailo_memory_transfer_params { uint8_t buffer[MAX_MEMORY_TRANSFER_LENGTH]; // in/out }; -/* structure used in ioctl HAILO_VDMA_CHANNEL_READ_REGISTER */ -struct hailo_vdma_channel_read_register_params { - uint8_t engine_index; // in - uint8_t channel_index; // in - enum hailo_dma_data_direction direction; // in - size_t offset; // in - size_t reg_size; // in, can be either 1, 2 or 4 - uint32_t data; // out -}; - -/* structure used in ioctl HAILO_VDMA_CHANNEL_WRITE_REGISTER */ -struct hailo_vdma_channel_write_register_params { - uint8_t engine_index; // in - uint8_t channel_index; // in - enum hailo_dma_data_direction direction; // in - size_t offset; // in - size_t reg_size; // in, can be either 1, 2 or 4 - uint32_t data; // in -}; - /* structure used in ioctl HAILO_VDMA_BUFFER_SYNC */ enum hailo_vdma_buffer_sync_type { HAILO_SYNC_FOR_CPU, @@ -362,21 +394,103 @@ struct hailo_read_log_params { size_t read_bytes; // out }; +/* structure used in ioctl HAILO_VDMA_LOW_MEMORY_BUFFER_ALLOC */ struct hailo_allocate_low_memory_buffer_params { size_t buffer_size; // in uintptr_t buffer_handle; // out }; +/* structure used in ioctl HAILO_VDMA_LOW_MEMORY_BUFFER_FREE */ +struct hailo_free_low_memory_buffer_params { + uintptr_t buffer_handle; // in +}; + struct hailo_mark_as_in_use_params { bool in_use; // out }; +/* structure used in ioctl HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC */ struct hailo_allocate_continuous_buffer_params { size_t buffer_size; // in uintptr_t buffer_handle; // out uint64_t dma_address; // out }; +/* structure used in ioctl HAILO_VDMA_CONTINUOUS_BUFFER_FREE */ +struct hailo_free_continuous_buffer_params { + uintptr_t buffer_handle; // in +}; + +/* structures used in ioctl HAILO_VDMA_LAUNCH_TRANSFER */ +struct hailo_vdma_transfer_buffer { + size_t mapped_buffer_handle; // in + uint32_t offset; // in + uint32_t size; // in +}; + +enum hailo_vdma_interrupts_domain { + HAILO_VDMA_INTERRUPTS_DOMAIN_NONE = 0, + HAILO_VDMA_INTERRUPTS_DOMAIN_DEVICE = (1 << 0), + HAILO_VDMA_INTERRUPTS_DOMAIN_HOST = (1 << 1), + + /** Max enum value to maintain ABI Integrity */ + HAILO_VDMA_INTERRUPTS_DOMAIN_MAX_ENUM = INT_MAX, +}; + +// We allow maximum 2 buffers per transfer since we may have an extra buffer +// to make sure each buffer is aligned to page size. +#define HAILO_MAX_BUFFERS_PER_SINGLE_TRANSFER (2) + +struct hailo_vdma_launch_transfer_params { + uint8_t engine_index; // in + uint8_t channel_index; // in + + uintptr_t desc_handle; // in + uint32_t starting_desc; // in + + bool should_bind; // in, if false, assumes buffer already bound. + uint8_t buffers_count; // in + struct hailo_vdma_transfer_buffer + buffers[HAILO_MAX_BUFFERS_PER_SINGLE_TRANSFER]; // in + + enum hailo_vdma_interrupts_domain first_interrupts_domain; // in + enum hailo_vdma_interrupts_domain last_interrupts_domain; // in + + bool is_debug; // in, if set program hw to send + // more info (e.g desc complete status) + + uint32_t descs_programed; // out, amount of descriptors programed. +}; + +#ifdef _MSC_VER +struct tCompatibleHailoIoctlData +{ + tCompatibleHailoIoctlParam Parameters; + ULONG_PTR Value; + union { + struct hailo_memory_transfer_params MemoryTransfer; + struct hailo_vdma_interrupts_enable_params VdmaInterruptsEnable; + struct hailo_vdma_interrupts_disable_params VdmaInterruptsDisable; + struct hailo_vdma_interrupts_read_timestamp_params VdmaInterruptsReadTimestamps; + struct hailo_vdma_interrupts_wait_params VdmaInterruptsWait; + struct hailo_vdma_buffer_sync_params VdmaBufferSync; + struct hailo_fw_control FirmwareControl; + struct hailo_vdma_buffer_map_params VdmaBufferMap; + struct hailo_vdma_buffer_unmap_params VdmaBufferUnmap; + struct hailo_desc_list_create_params DescListCreate; + struct hailo_desc_list_release_params DescListReleaseParam; + struct hailo_desc_list_bind_vdma_buffer_params DescListBind; + struct hailo_d2h_notification D2HNotification; + struct hailo_device_properties DeviceProperties; + struct hailo_driver_info DriverInfo; + struct hailo_non_linux_desc_list_mmap_params DescListMmap; + struct hailo_read_log_params ReadLog; + struct hailo_mark_as_in_use_params MarkAsInUse; + struct hailo_vdma_launch_transfer_params LaunchTransfer; + } Buffer; +}; +#endif // _MSC_VER + #pragma pack(pop) enum hailo_general_ioctl_code { @@ -407,8 +521,6 @@ enum hailo_vdma_ioctl_code { HAILO_VDMA_INTERRUPTS_DISABLE_CODE, HAILO_VDMA_INTERRUPTS_WAIT_CODE, HAILO_VDMA_INTERRUPTS_READ_TIMESTAMPS_CODE, - HAILO_VDMA_CHANNEL_READ_REGISTER_CODE, - HAILO_VDMA_CHANNEL_WRITE_REGISTER_CODE, HAILO_VDMA_BUFFER_MAP_CODE, HAILO_VDMA_BUFFER_UNMAP_CODE, HAILO_VDMA_BUFFER_SYNC_CODE, @@ -420,6 +532,7 @@ enum hailo_vdma_ioctl_code { HAILO_MARK_AS_IN_USE_CODE, HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC_CODE, HAILO_VDMA_CONTINUOUS_BUFFER_FREE_CODE, + HAILO_VDMA_LAUNCH_TRANSFER_CODE, // Must be last HAILO_VDMA_IOCTL_MAX_NR, @@ -430,24 +543,23 @@ enum hailo_vdma_ioctl_code { #define HAILO_VDMA_INTERRUPTS_WAIT _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_INTERRUPTS_WAIT_CODE, struct hailo_vdma_interrupts_wait_params) #define HAILO_VDMA_INTERRUPTS_READ_TIMESTAMPS _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_INTERRUPTS_READ_TIMESTAMPS_CODE, struct hailo_vdma_interrupts_read_timestamp_params) -#define HAILO_VDMA_CHANNEL_READ_REGISTER _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_CHANNEL_READ_REGISTER_CODE, struct hailo_vdma_channel_read_register_params) -#define HAILO_VDMA_CHANNEL_WRITE_REGISTER _IOR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_CHANNEL_WRITE_REGISTER_CODE, struct hailo_vdma_channel_write_register_params) - #define HAILO_VDMA_BUFFER_MAP _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_BUFFER_MAP_CODE, struct hailo_vdma_buffer_map_params) #define HAILO_VDMA_BUFFER_UNMAP _IOR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_BUFFER_UNMAP_CODE, struct hailo_vdma_buffer_unmap_params) #define HAILO_VDMA_BUFFER_SYNC _IOR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_BUFFER_SYNC_CODE, struct hailo_vdma_buffer_sync_params) #define HAILO_DESC_LIST_CREATE _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_DESC_LIST_CREATE_CODE, struct hailo_desc_list_create_params) -#define HAILO_DESC_LIST_RELEASE _IOR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_DESC_LIST_RELEASE_CODE, uintptr_t) +#define HAILO_DESC_LIST_RELEASE _IOR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_DESC_LIST_RELEASE_CODE, struct hailo_desc_list_release_params) #define HAILO_DESC_LIST_BIND_VDMA_BUFFER _IOR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_DESC_LIST_BIND_VDMA_BUFFER_CODE, struct hailo_desc_list_bind_vdma_buffer_params) #define HAILO_VDMA_LOW_MEMORY_BUFFER_ALLOC _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_LOW_MEMORY_BUFFER_ALLOC_CODE, struct hailo_allocate_low_memory_buffer_params) -#define HAILO_VDMA_LOW_MEMORY_BUFFER_FREE _IO_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_LOW_MEMORY_BUFFER_FREE_CODE) +#define HAILO_VDMA_LOW_MEMORY_BUFFER_FREE _IOR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_LOW_MEMORY_BUFFER_FREE_CODE, struct hailo_free_low_memory_buffer_params) #define HAILO_MARK_AS_IN_USE _IOW_(HAILO_VDMA_IOCTL_MAGIC, HAILO_MARK_AS_IN_USE_CODE, struct hailo_mark_as_in_use_params) #define HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC_CODE, struct hailo_allocate_continuous_buffer_params) -#define HAILO_VDMA_CONTINUOUS_BUFFER_FREE _IO_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_CONTINUOUS_BUFFER_FREE_CODE) +#define HAILO_VDMA_CONTINUOUS_BUFFER_FREE _IOR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_CONTINUOUS_BUFFER_FREE_CODE, struct hailo_free_continuous_buffer_params) + +#define HAILO_VDMA_LAUNCH_TRANSFER _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_LAUNCH_TRANSFER_CODE, struct hailo_vdma_launch_transfer_params) enum hailo_non_linux_ioctl_code { diff --git a/hailort/drivers/win/include/Public.h b/hailort/drivers/win/include/Public.h index 8dfecf63..6181033e 100644 --- a/hailort/drivers/win/include/Public.h +++ b/hailort/drivers/win/include/Public.h @@ -15,13 +15,9 @@ Module Name: --*/ -// -// Define an Interface Guid so that apps can find the device and talk to it. -// +#ifndef _HAILO_PUBLIC_H_ +#define _HAILO_PUBLIC_H_ -DEFINE_GUID (GUID_DEVINTERFACE_HailoKM, - 0xd88d31f1,0xfede,0x4e71,0xac,0x2a,0x6c,0xe0,0x01,0x8c,0x15,0x01); -// {d88d31f1-fede-4e71-ac2a-6ce0018c1501} #define HAILO_IOCTL_COMMON CTL_CODE(FILE_DEVICE_UNKNOWN, 0x801, METHOD_BUFFERED, FILE_ANY_ACCESS) #define IOCTL_FUNC(x) (((x) >> 2) & 0xfff) @@ -57,69 +53,7 @@ struct tCommonHailoIoctlParam #define HAILO_CMD_FREE_MEMORY 0x0060 #define HAILO_CMD_ALLOC_MEMORY 0x0061 -#define HAILO_IOCTL_COMPATIBLE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x802, METHOD_BUFFERED, FILE_ANY_ACCESS) -struct tCompatibleHailoIoctlParam -{ - union { - struct { - ULONG Size : 16; - ULONG Code : 8; - ULONG Type : 6; - ULONG Read : 1; - ULONG Write : 1; - } bits; - ULONG value; - } u; -}; - -#define HAILO_GENERAL_IOCTL_MAGIC 0 -#define HAILO_VDMA_IOCTL_MAGIC 1 -#define HAILO_NON_LINUX_IOCTL_MAGIC 2 - - - -static ULONG FORCEINLINE _IOC_(ULONG nr, ULONG type, ULONG size, bool read, bool write) -{ - tCompatibleHailoIoctlParam param; - param.u.bits.Code = nr; - param.u.bits.Size = size; - param.u.bits.Type = type; - param.u.bits.Read = read ? 1 : 0; - param.u.bits.Write = write ? 1 : 0; - return param.u.value; -} - -#define _IOW_(type,nr,size) _IOC_(nr, type, sizeof(size), true, false) -#define _IOR_(type,nr,size) _IOC_(nr, type, sizeof(size), false, true) -#define _IOWR_(type,nr,size) _IOC_(nr, type, sizeof(size), true, true) -#define _IO_(type,nr) _IOC_(nr, type, 0, false, false) #include "..\..\common\hailo_ioctl_common.h" -struct tCompatibleHailoIoctlData -{ - tCompatibleHailoIoctlParam Parameters; - ULONG_PTR Value; - union { - hailo_memory_transfer_params MemoryTransfer; - hailo_vdma_interrupts_enable_params VdmaInterruptsEnable; - hailo_vdma_interrupts_disable_params VdmaInterruptsDisable; - hailo_vdma_interrupts_read_timestamp_params VdmaInterruptsReadTimestamps; - hailo_vdma_interrupts_wait_params VdmaInterruptsWait; - hailo_vdma_buffer_sync_params VdmaBufferSync; - hailo_fw_control FirmwareControl; - hailo_vdma_buffer_map_params VdmaBufferMap; - hailo_vdma_buffer_unmap_params VdmaBufferUnmap; - hailo_desc_list_create_params DescListCreate; - uintptr_t DescListReleaseParam; - hailo_desc_list_bind_vdma_buffer_params DescListBind; - hailo_d2h_notification D2HNotification; - hailo_device_properties DeviceProperties; - hailo_driver_info DriverInfo; - hailo_vdma_channel_read_register_params ChannelRegisterRead; - hailo_vdma_channel_write_register_params ChannelRegisterWrite; - hailo_non_linux_desc_list_mmap_params DescListMmap; - hailo_read_log_params ReadLog; - hailo_mark_as_in_use_params MarkAsInUse; - } Buffer; -}; +#endif /* _HAILO_PUBLIC_H_ */ \ No newline at end of file diff --git a/hailort/hailort_service/CMakeLists.txt b/hailort/hailort_service/CMakeLists.txt index d302b2ee..a470a3dd 100644 --- a/hailort/hailort_service/CMakeLists.txt +++ b/hailort/hailort_service/CMakeLists.txt @@ -12,6 +12,7 @@ endif() add_executable(hailort_service hailort_rpc_service.cpp + cng_buffer_pool.cpp service_resource_manager.hpp ${HAILORT_SERVICE_OS_DIR}/hailort_service.cpp ${HAILORT_COMMON_CPP_SOURCES} diff --git a/hailort/hailort_service/cng_buffer_pool.cpp b/hailort/hailort_service/cng_buffer_pool.cpp new file mode 100644 index 00000000..b0541646 --- /dev/null +++ b/hailort/hailort_service/cng_buffer_pool.cpp @@ -0,0 +1,163 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file cng_buffer_pool.cpp + * @brief Network group buffer pool implementation + **/ + +#include "cng_buffer_pool.hpp" +#include "service_resource_manager.hpp" +#include "hailo/hailort.h" + +namespace hailort +{ + + +Expected> ServiceStreamBufferPool::create(uint32_t vdevice_handle, + size_t buffer_size, size_t buffer_count, hailo_dma_buffer_direction_t direction, EventPtr shutdown_event) +{ + auto map_buffer_lambda = [direction](std::shared_ptr vdevice, BufferPtr buffer) { + return DmaMappedBuffer::create(*vdevice, buffer->data(), buffer->size(), direction); + }; + auto &vdevice_manager = ServiceResourceManager::get_instance(); + + auto free_buffers_queue = SpscQueue::create(buffer_count, shutdown_event, DEFAULT_TRANSFER_TIMEOUT); + CHECK_EXPECTED(free_buffers_queue); + + std::vector buffers; + buffers.reserve(buffer_count); + for (size_t i = 0; i < buffer_count; i++) { + auto buffer = Buffer::create_shared(buffer_size, BufferStorageParams::create_dma()); + CHECK_EXPECTED(buffer); + + auto mapped_buffer = vdevice_manager.execute>(vdevice_handle, map_buffer_lambda, buffer.value()); + CHECK_EXPECTED(mapped_buffer); + + auto status = free_buffers_queue->enqueue(buffer.value()); + CHECK_SUCCESS(status); + + buffers.emplace_back(AllocatedMappedBuffer{ buffer.release(), mapped_buffer.release()}); + } + + auto buffer_pool_ptr = make_shared_nothrow(buffer_size, std::move(buffers), + free_buffers_queue.release(), buffer_count); + CHECK_NOT_NULL_AS_EXPECTED(buffer_pool_ptr, HAILO_OUT_OF_HOST_MEMORY); + + return buffer_pool_ptr; +} + +ServiceStreamBufferPool::ServiceStreamBufferPool(size_t buffer_size, std::vector &&buffers, + SpscQueue &&free_buffers_queue, size_t buffers_count) : + m_buffer_size(buffer_size), + m_buffers_count(buffers_count), + m_buffers(std::move(buffers)), + m_free_buffers_queue(std::move(free_buffers_queue)) +{} + +Expected ServiceStreamBufferPool::acquire_buffer() +{ + auto buffer = m_free_buffers_queue.dequeue(DEFAULT_TRANSFER_TIMEOUT); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { + return make_unexpected(buffer.status()); + } + else if (HAILO_TIMEOUT == buffer.status()) { + LOGGER__WARNING( + "Failed to acquire buffer because the buffer pool is empty. This could be caused by uneven reading and writing speeds"); + return make_unexpected(buffer.status()); + } + CHECK_EXPECTED(buffer); + + return buffer.release(); +} + +hailo_status ServiceStreamBufferPool::return_to_pool(BufferPtr buffer) +{ + CHECK(buffer->size() == m_buffer_size, HAILO_INTERNAL_FAILURE, + "Buffer size is not the same as expected for pool! ({} != {})", buffer->size(), m_buffer_size); + + std::unique_lock lock(m_mutex); + auto status = m_free_buffers_queue.enqueue(buffer); + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + +size_t ServiceStreamBufferPool::buffers_count() +{ + return m_buffers_count; +} + +Expected> ServiceNetworkGroupBufferPool::create(uint32_t vdevice_handle) +{ + auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled); + CHECK_EXPECTED(shutdown_event_exp); + auto shutdown_event = shutdown_event_exp.release(); + + auto cng_buffer_pool_ptr = make_shared_nothrow(shutdown_event, vdevice_handle); + CHECK_NOT_NULL_AS_EXPECTED(cng_buffer_pool_ptr, HAILO_OUT_OF_HOST_MEMORY); + + return cng_buffer_pool_ptr; +} + +ServiceNetworkGroupBufferPool::ServiceNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle) : + m_output_name_to_buffer_pool(), m_shutdown_event(shutdown_event), m_vdevice_handle(vdevice_handle) +{} + +hailo_status ServiceNetworkGroupBufferPool::allocate_pool(const std::string &name, size_t frame_size, size_t pool_size) +{ + auto buffer_pool = ServiceStreamBufferPool::create(m_vdevice_handle, frame_size, + pool_size, HAILO_DMA_BUFFER_DIRECTION_D2H, m_shutdown_event); + CHECK_EXPECTED(buffer_pool); + + std::unique_lock lock(m_mutex); + m_output_name_to_buffer_pool[name] = buffer_pool.release(); + + return HAILO_SUCCESS; +} + +hailo_status ServiceNetworkGroupBufferPool::reallocate_pool(const std::string &name, size_t frame_size) +{ + std::unique_lock lock(m_mutex); + auto pool_size = m_output_name_to_buffer_pool[name]->buffers_count(); + m_output_name_to_buffer_pool[name].reset(); + + auto buffer_pool = ServiceStreamBufferPool::create(m_vdevice_handle, frame_size, + pool_size, HAILO_DMA_BUFFER_DIRECTION_D2H, m_shutdown_event); + CHECK_EXPECTED(buffer_pool); + m_output_name_to_buffer_pool[name] = buffer_pool.release(); + + return HAILO_SUCCESS; +} + +Expected ServiceNetworkGroupBufferPool::acquire_buffer(const std::string &output_name) +{ + CHECK_AS_EXPECTED(contains(m_output_name_to_buffer_pool, output_name), HAILO_INTERNAL_FAILURE, + "acquire_buffer() for output {} failed, output name does not exist in buffer pool", output_name); + + std::unique_lock lock(m_mutex); + auto buffer = m_output_name_to_buffer_pool.at(output_name)->acquire_buffer(); + CHECK_EXPECTED(buffer); + + return buffer.release(); +} + +hailo_status ServiceNetworkGroupBufferPool::return_to_pool(const std::string &output_name, BufferPtr buffer) +{ + CHECK(contains(m_output_name_to_buffer_pool, output_name), HAILO_INTERNAL_FAILURE, + "acquire_buffer() for output {} failed, output name does not exist in buffer pool", output_name); + + std::unique_lock lock(m_mutex); + auto status = m_output_name_to_buffer_pool.at(output_name)->return_to_pool(buffer); + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + +hailo_status ServiceNetworkGroupBufferPool::shutdown() +{ + return m_shutdown_event->signal(); +} + +} /* namespace hailort */ diff --git a/hailort/hailort_service/cng_buffer_pool.hpp b/hailort/hailort_service/cng_buffer_pool.hpp new file mode 100644 index 00000000..86172a3c --- /dev/null +++ b/hailort/hailort_service/cng_buffer_pool.hpp @@ -0,0 +1,88 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file cng_buffer_pool.hpp + * @brief This model represents the buffer pools for the output reads for each network group. Used in async API + **/ + +#ifndef _HAILO_CNG_BUFFER_POOL_HPP_ +#define _HAILO_CNG_BUFFER_POOL_HPP_ + +#include "hailo/hailort.h" +#include "hailo/hailort_common.hpp" +#include "hailo/buffer.hpp" +#include "hailo/vdevice.hpp" +#include "hailo/dma_mapped_buffer.hpp" +#include "utils/thread_safe_queue.hpp" + +namespace hailort +{ + +class ServiceStreamBufferPool +{ +public: + static Expected> create(uint32_t vdevice_handle, size_t buffer_size, + size_t buffer_count, hailo_dma_buffer_direction_t direction, EventPtr shutdown_event); + + struct AllocatedMappedBuffer { + BufferPtr buffer; + DmaMappedBuffer mapped_buffer; + }; + + ServiceStreamBufferPool(size_t buffer_size, std::vector &&buffers, + SpscQueue &&m_free_buffers_queue, size_t buffers_count); + virtual ~ServiceStreamBufferPool() = default; + + Expected acquire_buffer(); + hailo_status return_to_pool(BufferPtr buffer); + size_t buffers_count(); + +private: + + size_t m_buffer_size; + size_t m_buffers_count; + std::vector m_buffers; + SpscQueue m_free_buffers_queue; + std::mutex m_mutex; +}; + +using BufferPoolPtr = std::shared_ptr; +using output_name_t = std::string; + +// This object holds a buffer pool for each output streams of the network group. +// It is used to pre-allocate all the buffers necessary for the reads from the device. +// The buffers are reuseable, which also prevents allocation during inference. +// The buffers are mapped to the device during their creation, which prevent lazy mapping each frame inference. +// Currently only used in async API. +class ServiceNetworkGroupBufferPool +{ +public: + static Expected> create(uint32_t vdevice_handle); + + hailo_status allocate_pool(const std::string &name, size_t frame_size, size_t pool_size); + // Used in order to reallocate the pool buffers with different frame_size + hailo_status reallocate_pool(const std::string &name, size_t frame_size); + + ServiceNetworkGroupBufferPool(ServiceNetworkGroupBufferPool &&) = delete; + ServiceNetworkGroupBufferPool(const ServiceNetworkGroupBufferPool &) = delete; + ServiceNetworkGroupBufferPool &operator=(ServiceNetworkGroupBufferPool &&) = delete; + ServiceNetworkGroupBufferPool &operator=(const ServiceNetworkGroupBufferPool &) = delete; + virtual ~ServiceNetworkGroupBufferPool() = default; + + ServiceNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle); + Expected acquire_buffer(const std::string &output_name); + hailo_status return_to_pool(const std::string &output_name, BufferPtr buffer); + hailo_status shutdown(); + +private: + std::unordered_map m_output_name_to_buffer_pool; + EventPtr m_shutdown_event; + uint32_t m_vdevice_handle; + std::mutex m_mutex; +}; + +} /* namespace hailort */ + +#endif /* _HAILO_CNG_BUFFER_POOL_HPP_ */ diff --git a/hailort/hailort_service/hailort_rpc_service.cpp b/hailort/hailort_service/hailort_rpc_service.cpp index 482c93a7..da67ac08 100644 --- a/hailort/hailort_service/hailort_rpc_service.cpp +++ b/hailort/hailort_service/hailort_rpc_service.cpp @@ -16,20 +16,23 @@ #include "common/os_utils.hpp" #include "hailort_rpc_service.hpp" +#include "cng_buffer_pool.hpp" #include "rpc/rpc_definitions.hpp" #include "service_resource_manager.hpp" -#include "net_flow/ops/op_metadata.hpp" -#include "net_flow/ops/nms_post_process.hpp" -#include "net_flow/ops/yolov8_post_process.hpp" -#include "net_flow/ops/ssd_post_process.hpp" -#include "net_flow/ops/yolox_post_process.hpp" -#include "net_flow/ops/yolov5_op_metadata.hpp" -#include "net_flow/ops/yolov5_seg_op_metadata.hpp" +#include "net_flow/ops_metadata/op_metadata.hpp" +#include "net_flow/ops_metadata/nms_op_metadata.hpp" +#include "net_flow/ops_metadata/yolov8_op_metadata.hpp" +#include "net_flow/ops_metadata/ssd_op_metadata.hpp" +#include "net_flow/ops_metadata/yolox_op_metadata.hpp" +#include "net_flow/ops_metadata/yolov5_op_metadata.hpp" +#include "net_flow/ops_metadata/yolov5_seg_op_metadata.hpp" #include "hef/layer_info.hpp" #include + +#define MAX_GRPC_BUFFER_SIZE (2ULL * 1024 * 1024 * 1024) // 2GB namespace hailort { @@ -97,7 +100,7 @@ void HailoRtRpcService::remove_disconnected_clients() auto now = std::chrono::high_resolution_clock::now(); std::set pids_to_remove; { - std::unique_lock lock(m_mutex); + std::unique_lock lock(m_keep_alive_mutex); for (auto pid_to_last_alive : m_clients_pids) { auto duration = std::chrono::duration_cast(now - pid_to_last_alive.second); if (duration > hailort::HAILO_KEEPALIVE_INTERVAL) { @@ -133,7 +136,7 @@ void HailoRtRpcService::keep_alive() void HailoRtRpcService::update_client_id_timestamp(uint32_t pid) { - std::unique_lock lock(m_mutex); + std::unique_lock lock(m_keep_alive_mutex); m_clients_pids[pid] = std::chrono::high_resolution_clock::now(); } @@ -186,7 +189,7 @@ grpc::Status HailoRtRpcService::VDevice_create(grpc::ServerContext *, const VDev CHECK_EXPECTED_AS_RPC_STATUS(vdevice, reply); update_client_id_timestamp(request->pid()); - std::unique_lock lock(m_vdevice_creation_mutex); + std::unique_lock lock(m_vdevice_mutex); auto &vdevice_manager = ServiceResourceManager::get_instance(); auto vdevice_handle = vdevice_manager.register_resource(request->pid(), std::move(vdevice.release())); @@ -271,6 +274,7 @@ grpc::Status HailoRtRpcService::VDevice_configure(grpc::ServerContext*, const VD } update_client_id_timestamp(request->pid()); + std::unique_lock lock(m_vdevice_mutex); auto lambda = [](std::shared_ptr vdevice, Hef &hef, NetworkGroupsParamsMap &configure_params_map) { return vdevice->configure(hef, configure_params_map); }; @@ -281,14 +285,57 @@ grpc::Status HailoRtRpcService::VDevice_configure(grpc::ServerContext*, const VD auto &networks_manager = ServiceResourceManager::get_instance(); for (auto network : networks.value()) { - auto handle = networks_manager.register_resource(request->pid(), network); - reply->add_networks_handles(handle); + auto ng_handle = networks_manager.register_resource(request->pid(), network); + reply->add_networks_handles(ng_handle); + + bool allocate_for_raw_streams = false; + // The network_group's buffer pool is used for the read's buffers, + // On async flow - we allocate for raw-streams. This way they are already pre-allocated and mapped to the device + if ((configure_params_map.size() > 0) && + (configure_params_map.begin()->second.stream_params_by_name.begin()->second.flags == HAILO_STREAM_FLAGS_ASYNC)) { + // We assume that if 1 stream is marked as ASYNC, they all are + allocate_for_raw_streams = true; + } + auto status = create_buffer_pools_for_ng(request->identifier().vdevice_handle(), ng_handle, request->pid(), allocate_for_raw_streams); + CHECK_SUCCESS_AS_RPC_STATUS(status, reply); } reply->set_status(static_cast(HAILO_SUCCESS)); return grpc::Status::OK; } +hailo_status HailoRtRpcService::create_buffer_pools_for_ng(uint32_t vdevice_handle, uint32_t ng_handle, uint32_t request_pid, + bool allocate_for_raw_streams) +{ + auto cng_buffer_pool = ServiceNetworkGroupBufferPool::create(vdevice_handle); + CHECK_EXPECTED_AS_STATUS(cng_buffer_pool); + + auto &cng_buffer_pool_manager = ServiceResourceManager::get_instance(); + auto cng_buffer_pool_handle = cng_buffer_pool_manager.register_resource(request_pid, cng_buffer_pool.release()); + CHECK(cng_buffer_pool_handle == ng_handle, HAILO_INTERNAL_FAILURE, + "cng_buffer_pool_handle = {} must be equal to network_group_handle ={}", cng_buffer_pool_handle, ng_handle); + + if (allocate_for_raw_streams) { + // For Async API - The buffer size in the pool will be the stream's hw frame size as used in the infer_model pipeline + auto min_buffer_pool_size = get_min_buffer_pool_size(ng_handle); + CHECK_EXPECTED_AS_STATUS(min_buffer_pool_size); + + auto streams_infos = get_all_stream_infos(ng_handle); + CHECK_EXPECTED_AS_STATUS(streams_infos); + + for (const auto &stream_info : streams_infos.value()) { + if (stream_info.direction == HAILO_D2H_STREAM) { + auto allocate_lambda = [&](std::shared_ptr cng_buffer_pool) { + return cng_buffer_pool->allocate_pool(stream_info.name, stream_info.hw_frame_size, min_buffer_pool_size.value()); + }; + CHECK_SUCCESS(cng_buffer_pool_manager.execute(ng_handle, allocate_lambda)); + } + } + } + + return HAILO_SUCCESS; +} + grpc::Status HailoRtRpcService::VDevice_get_physical_devices_ids(grpc::ServerContext*, const VDevice_get_physical_devices_ids_Request* request, VDevice_get_physical_devices_ids_Reply* reply) { @@ -370,96 +417,192 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_dup_handle(grpc::ServerCo return grpc::Status::OK; } +ProtoCallbackIdentifier serialize_callback_identifier(uint32_t vdevice_handle, uint32_t ng_handle, + callback_type_t cb_type, const std::string &stream_name, uint32_t cb_idx, hailo_status status, BufferPtr buffer = nullptr) +{ + ProtoCallbackIdentifier cb_identifier; + cb_identifier.set_vdevice_handle(vdevice_handle); + cb_identifier.set_network_group_handle(ng_handle); + cb_identifier.set_cb_type(cb_type); + cb_identifier.set_stream_name(stream_name); + cb_identifier.set_cb_idx(cb_idx); + cb_identifier.set_status(status); + if (buffer != nullptr) { + cb_identifier.set_data(buffer->data(), buffer->size()); + } + + return cb_identifier; +} + grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_release(grpc::ServerContext*, const Release_Request *request, Release_Reply *reply) { + auto buffer_shutdown_lambda = [](std::shared_ptr cng_buffer_pool) { + return cng_buffer_pool->shutdown(); + }; + + auto &buffer_pool_manager = ServiceResourceManager::get_instance(); + auto status = buffer_pool_manager.execute(request->network_group_identifier().network_group_handle(), buffer_shutdown_lambda); + CHECK_SUCCESS_AS_RPC_STATUS(status, reply); + buffer_pool_manager.release_resource(request->network_group_identifier().network_group_handle(), request->pid()); + auto &manager = ServiceResourceManager::get_instance(); manager.release_resource(request->network_group_identifier().network_group_handle(), request->pid()); reply->set_status(static_cast(HAILO_SUCCESS)); return grpc::Status::OK; } -grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_infer_async(grpc::ServerContext*, - const ConfiguredNetworkGroup_infer_async_Request *request, ConfiguredNetworkGroup_infer_async_Reply *reply) +hailo_status HailoRtRpcService::add_input_named_buffer(const ProtoTransferRequest &proto_stream_transfer_request, + uint32_t vdevice_handle, uint32_t ng_handle, std::shared_ptr infer_async_request, + NamedBuffersCallbacks &named_buffers_callbacks) { - auto vdevice_handle = request->identifier().vdevice_handle(); - auto ng_handle = request->identifier().network_group_handle(); - auto infer_request_done_cb_idx = request->infer_request_done_cb_idx(); + // Prepare input buffer + BufferPtr buffer; + MemoryView mem_view; + auto *data = reinterpret_cast(proto_stream_transfer_request.data().c_str()); + if (reinterpret_cast(data) % HailoRTCommon::HW_DATA_ALIGNMENT == 0) { + // Input buffers is aligned to 8 + mem_view = MemoryView::create_const(data, proto_stream_transfer_request.data().size()); + } else { + // The memory is not aligned to 8, therefore we need to copy the data into a buffer + auto buffer_exp = Buffer::create_shared(data, proto_stream_transfer_request.data().size(), + BufferStorageParams::create_dma()); + CHECK_EXPECTED(buffer_exp); + buffer = buffer_exp.release(); + mem_view = MemoryView(*buffer); + } + + // Preparing callback + auto &stream_name = proto_stream_transfer_request.stream_name(); + auto cb_idx = proto_stream_transfer_request.cb_idx(); + std::function transfer_done = [this, vdevice_handle, ng_handle, cb_idx, stream_name, buffer, infer_async_request] + (hailo_status status) + { + // We pass the request (which is shared_ptr) to the callback in order to keep the input's memory alive until inference is done. + (void)infer_async_request; + (void)buffer; + + auto cb_identifier = serialize_callback_identifier(vdevice_handle, ng_handle, CALLBACK_TYPE_TRANSFER, + stream_name, cb_idx, status); + enqueue_cb_identifier(vdevice_handle, std::move(cb_identifier)); + }; + + named_buffers_callbacks.emplace(stream_name, std::make_pair(mem_view, transfer_done)); + return HAILO_SUCCESS; +} + +hailo_status HailoRtRpcService::add_output_named_buffer(const ProtoTransferRequest &proto_stream_transfer_request, uint32_t vdevice_handle, + uint32_t ng_handle, NamedBuffersCallbacks &named_buffers_callbacks) +{ + // Prepare output buffer + auto &stream_name = proto_stream_transfer_request.stream_name(); + auto buffer_exp = acquire_buffer_from_cng_pool(ng_handle, stream_name); + CHECK_EXPECTED(buffer_exp); + auto buffer = buffer_exp.release(); + + // Prepare callback + auto cb_idx = proto_stream_transfer_request.cb_idx(); + std::function transfer_done = [this, vdevice_handle, ng_handle, cb_idx, stream_name, buffer] + (hailo_status status) + { + auto cb_identifier = serialize_callback_identifier(vdevice_handle, ng_handle, CALLBACK_TYPE_TRANSFER, + stream_name, cb_idx, status, buffer); + return_buffer_to_cng_pool(ng_handle, stream_name, buffer); + enqueue_cb_identifier(vdevice_handle, std::move(cb_identifier)); + }; + + named_buffers_callbacks.emplace(stream_name, std::make_pair(MemoryView(*buffer), transfer_done)); + return HAILO_SUCCESS; +} +Expected HailoRtRpcService::prepare_named_buffers_callbacks(uint32_t vdevice_handle, + uint32_t ng_handle, std::shared_ptr infer_async_request) +{ NamedBuffersCallbacks named_buffers_callbacks; - for (const auto &proto_transfer_request : request->transfer_requests()) { - auto &stream_name = proto_transfer_request.stream_name(); - auto direction = proto_transfer_request.direction(); - auto cb_idx = proto_transfer_request.cb_idx(); - BufferPtr buffer; + for (const auto &proto_stream_transfer_request : infer_async_request->transfer_requests()) { + auto direction = proto_stream_transfer_request.direction(); + auto status = HAILO_SUCCESS; if (direction == HAILO_H2D_STREAM) { - // TODO: Remove memcpy after HRT-12238 - auto buffer_exp = Buffer::create_shared(reinterpret_cast(proto_transfer_request.data().c_str()), - proto_transfer_request.size(), BufferStorageParams::create_dma()); - CHECK_EXPECTED_AS_RPC_STATUS(buffer_exp, reply); - buffer = buffer_exp.release(); + status = add_input_named_buffer(proto_stream_transfer_request, vdevice_handle, ng_handle, infer_async_request, named_buffers_callbacks); } else { - // TODO: HRT-12360 - Use buffer pool for the service reads - auto buffer_exp = Buffer::create_shared(proto_transfer_request.size(), BufferStorageParams::create_dma()); - CHECK_EXPECTED_AS_RPC_STATUS(buffer_exp, reply); - buffer = buffer_exp.release(); + status = add_output_named_buffer(proto_stream_transfer_request, vdevice_handle, ng_handle, named_buffers_callbacks); } + CHECK_SUCCESS_AS_EXPECTED(status); + } - std::function transfer_done = [vdevice_handle, ng_handle, cb_idx, stream_name, direction, buffer] - (hailo_status status) - { - ProtoCallbackIdentifier cb_identifier; - cb_identifier.set_vdevice_handle(vdevice_handle); - cb_identifier.set_network_group_handle(ng_handle); - cb_identifier.set_cb_type(CALLBACK_TYPE_TRANSFER); - cb_identifier.set_stream_name(stream_name); - cb_identifier.set_cb_idx(cb_idx); - cb_identifier.set_status(status); - - auto lambda = [direction](std::shared_ptr cb_queue, ProtoCallbackIdentifier &cb_identifier, BufferPtr buffer) { - if (direction == HAILO_D2H_STREAM) { - cb_identifier.set_data(buffer->data(), buffer->size()); - } - return cb_queue->enqueue(std::move(cb_identifier)); - }; + return named_buffers_callbacks; +} - auto &cb_queue_manager = ServiceResourceManager::get_instance(); - auto exc_status = cb_queue_manager.execute(vdevice_handle, lambda, std::move(cb_identifier), buffer); - if (exc_status != HAILO_SUCCESS) { - LOGGER__ERROR("Failed to enqueue callback to VDeviceCallbacksQueue with status={}", status); - } - }; - named_buffers_callbacks.emplace(stream_name, std::make_pair(MemoryView(*buffer), transfer_done)); +void HailoRtRpcService::enqueue_cb_identifier(uint32_t vdevice_handle, ProtoCallbackIdentifier &&cb_identifier) +{ + auto lambda = [](std::shared_ptr cb_queue, ProtoCallbackIdentifier &cb_identifier) { + return cb_queue->enqueue(std::move(cb_identifier)); + }; + + auto &cb_queue_manager = ServiceResourceManager::get_instance(); + auto status = cb_queue_manager.execute(vdevice_handle, lambda, std::move(cb_identifier)); + if (status != HAILO_SUCCESS) { + LOGGER__ERROR("Failed to enqueue callback to VDeviceCallbacksQueue with status={}", status); } +} - auto infer_request_done_cb = [vdevice_handle, ng_handle, infer_request_done_cb_idx](hailo_status status){ - ProtoCallbackIdentifier cb_identifier; - cb_identifier.set_vdevice_handle(vdevice_handle); - cb_identifier.set_network_group_handle(ng_handle); - cb_identifier.set_cb_type(CALLBACK_TYPE_INFER_REQUEST); - cb_identifier.set_cb_idx(infer_request_done_cb_idx); +hailo_status HailoRtRpcService::return_buffer_to_cng_pool(uint32_t ng_handle, const std::string &output_name, BufferPtr buffer) +{ + auto &cng_buffer_pool_manager = ServiceResourceManager::get_instance(); + auto lambda_return_to_pool = [](std::shared_ptr cng_buffer_pool, + const std::string &stream_name, BufferPtr buffer) { + return cng_buffer_pool->return_to_pool(stream_name, buffer); + }; + auto status = cng_buffer_pool_manager.execute(ng_handle, lambda_return_to_pool, + output_name, buffer); + CHECK_SUCCESS(status); - auto lambda = [](std::shared_ptr cb_queue, ProtoCallbackIdentifier &cb_identifier) { - return cb_queue->enqueue(std::move(cb_identifier)); - }; + return HAILO_SUCCESS; +} - auto &cb_queue_manager = ServiceResourceManager::get_instance(); - auto exc_status = cb_queue_manager.execute(vdevice_handle, lambda, std::move(cb_identifier)); - if (exc_status != HAILO_SUCCESS) { - LOGGER__ERROR("Failed to enqueue callback to VDeviceCallbacksQueue with status={}", status); - } +Expected HailoRtRpcService::acquire_buffer_from_cng_pool(uint32_t ng_handle, const std::string &output_name) +{ + auto &cng_buffer_pool_manager = ServiceResourceManager::get_instance(); + auto lambda_acquire_buffer = [](std::shared_ptr cng_buffer_pool, const std::string &output_name) { + return cng_buffer_pool->acquire_buffer(output_name); }; + auto buffer = cng_buffer_pool_manager.execute>(ng_handle, lambda_acquire_buffer, output_name); + CHECK_EXPECTED(buffer); + return buffer.release(); +} + +grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_infer_async(grpc::ServerContext*, + const ConfiguredNetworkGroup_infer_async_Request *raw_request, ConfiguredNetworkGroup_infer_async_Reply *reply) +{ + // Moving ownership of the request, so we can use the request's memory as the input buffers instead of allocating new memory for it. + auto request = make_shared_nothrow(std::move(*raw_request)); + auto vdevice_handle = request->identifier().vdevice_handle(); + auto ng_handle = request->identifier().network_group_handle(); + auto infer_request_done_cb_idx = request->infer_request_done_cb_idx(); + + // Prepare buffers + auto named_buffers_callbacks = prepare_named_buffers_callbacks(vdevice_handle, ng_handle, request); + CHECK_EXPECTED_AS_RPC_STATUS(named_buffers_callbacks, reply); + + // Prepare request finish callback + auto infer_request_done_cb = [this, vdevice_handle, ng_handle, infer_request_done_cb_idx](hailo_status status) { + auto cb_identifier = serialize_callback_identifier(vdevice_handle, ng_handle, CALLBACK_TYPE_INFER_REQUEST, + "", infer_request_done_cb_idx, status); + enqueue_cb_identifier(vdevice_handle, std::move(cb_identifier)); + }; + + // Run infer async auto lambda = [](std::shared_ptr cng, NamedBuffersCallbacks &named_buffers_callbacks, const std::function &infer_request_done_cb) { return cng->infer_async(named_buffers_callbacks, infer_request_done_cb); }; auto &manager = ServiceResourceManager::get_instance(); - auto status = manager.execute(request->identifier().network_group_handle(), lambda, named_buffers_callbacks, infer_request_done_cb); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + auto status = manager.execute(request->identifier().network_group_handle(), lambda, named_buffers_callbacks.release(), infer_request_done_cb); + if (HAILO_STREAM_ABORT == status) { LOGGER__INFO("User aborted inference"); - reply->set_status(static_cast(HAILO_STREAM_ABORTED_BY_USER)); + reply->set_status(static_cast(HAILO_STREAM_ABORT)); return grpc::Status::OK; } CHECK_SUCCESS_AS_RPC_STATUS(status, reply); @@ -629,7 +772,7 @@ void serialize_vstream_info(const hailo_vstream_info_t &info, ProtoVStreamInfo * auto nms_shape_proto = info_proto->mutable_nms_shape(); nms_shape_proto->set_number_of_classes(info.nms_shape.number_of_classes); nms_shape_proto->set_max_bbox_per_class(info.nms_shape.max_bboxes_per_class); - nms_shape_proto->set_max_mask_size(info.nms_shape.max_mask_size); + nms_shape_proto->set_max_accumulated_mask_size(info.nms_shape.max_accumulated_mask_size); } else { auto shape_proto = info_proto->mutable_shape(); shape_proto->set_height(info.shape.height); @@ -918,6 +1061,7 @@ void serialize_yolov5seg_op_metadata(hailort::net_flow::OpMetadata &op_metadata, yolov5seg_config_proto->set_mask_threshold(yolov5seg_config.mask_threshold); yolov5seg_config_proto->set_layer_name(yolov5seg_config.proto_layer_name); + yolov5seg_config_proto->set_max_accumulated_mask_size(yolov5seg_config.max_accumulated_mask_size); } void serialize_op_matadata(hailort::net_flow::OpMetadata &op_metadata, ProtoOpMetadata *op_metadata_proto) @@ -1026,12 +1170,7 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_all_vstream_infos(grp const ConfiguredNetworkGroup_get_vstream_infos_Request *request, ConfiguredNetworkGroup_get_vstream_infos_Reply *reply) { - auto lambda = [](std::shared_ptr cng, std::string network_name) { - return cng->get_all_vstream_infos(network_name); - }; - auto &net_group_manager = ServiceResourceManager::get_instance(); - auto expected_vstream_infos = net_group_manager.execute>>( - request->identifier().network_group_handle(), lambda, request->network_name()); + auto expected_vstream_infos = get_all_vstream_infos(request->identifier().network_group_handle()); CHECK_EXPECTED_AS_RPC_STATUS(expected_vstream_infos, reply); serialize_vstream_infos(reply, expected_vstream_infos.value()); @@ -1170,7 +1309,6 @@ grpc::Status HailoRtRpcService::InputVStreams_create(grpc::ServerContext *, cons auto &net_group_manager = ServiceResourceManager::get_instance(); net_group_manager.dup_handle(network_group_handle, client_pid); - auto lambda = [](std::shared_ptr cng, const std::map &inputs_params) { return cng->create_input_vstreams(inputs_params); }; @@ -1178,13 +1316,12 @@ grpc::Status HailoRtRpcService::InputVStreams_create(grpc::ServerContext *, cons CHECK_EXPECTED_AS_RPC_STATUS(vstreams_expected, reply); auto vstreams = vstreams_expected.release(); - auto &manager = ServiceResourceManager::get_instance(); + auto &vstreams_manager = ServiceResourceManager::get_instance(); for (size_t i = 0; i < vstreams.size(); i++) { - auto handle = manager.register_resource(client_pid, make_shared_nothrow(std::move(vstreams[i]))); + auto handle = vstreams_manager.register_resource(client_pid, make_shared_nothrow(std::move(vstreams[i]))); reply->add_handles(handle); } - reply->set_status(static_cast(HAILO_SUCCESS)); return grpc::Status::OK; } @@ -1237,13 +1374,19 @@ grpc::Status HailoRtRpcService::OutputVStreams_create(grpc::ServerContext *, con CHECK_EXPECTED_AS_RPC_STATUS(vstreams_expected, reply); auto vstreams = vstreams_expected.release(); - auto &manager = ServiceResourceManager::get_instance(); + // The network_group's buffer pool is used for the read's buffers. + auto &cng_buffer_pool_manager = ServiceResourceManager::get_instance(); + auto &vstream_manager = ServiceResourceManager::get_instance(); for (size_t i = 0; i < vstreams.size(); i++) { - auto handle = manager.register_resource(client_pid, make_shared_nothrow(std::move(vstreams[i]))); + auto allocate_lambda = [&](std::shared_ptr cng_buffer_pool) { + return cng_buffer_pool->allocate_pool(vstreams[i].name(), vstreams[i].get_frame_size(), output_params.at(vstreams[i].name()).queue_size); + }; + CHECK_SUCCESS_AS_RPC_STATUS(cng_buffer_pool_manager.execute(network_group_handle, allocate_lambda), reply); + + auto handle = vstream_manager.register_resource(client_pid, make_shared_nothrow(std::move(vstreams[i]))); reply->add_handles(handle); } - reply->set_status(static_cast(HAILO_SUCCESS)); return grpc::Status::OK; } @@ -1292,16 +1435,17 @@ grpc::Status HailoRtRpcService::InputVStream_is_multi_planar(grpc::ServerContext grpc::Status HailoRtRpcService::InputVStream_write(grpc::ServerContext*, const InputVStream_write_Request *request, InputVStream_write_Reply *reply) { - std::vector data(request->data().begin(), request->data().end()); + MemoryView mem_view = MemoryView::create_const(reinterpret_cast(request->data().c_str()), + request->data().size()); auto lambda = [](std::shared_ptr input_vstream, const MemoryView &buffer) { return input_vstream->write(std::move(buffer)); }; auto &manager = ServiceResourceManager::get_instance(); - auto status = manager.execute(request->identifier().vstream_handle(), lambda, MemoryView::create_const(data.data(), data.size())); + auto status = manager.execute(request->identifier().vstream_handle(), lambda, mem_view); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { LOGGER__INFO("User aborted VStream write."); - reply->set_status(static_cast(HAILO_STREAM_ABORTED_BY_USER)); + reply->set_status(static_cast(HAILO_STREAM_ABORT)); return grpc::Status::OK; } CHECK_SUCCESS_AS_RPC_STATUS(status, reply, "VStream write failed"); @@ -1315,6 +1459,7 @@ grpc::Status HailoRtRpcService::InputVStream_write_pix(grpc::ServerContext*, con hailo_pix_buffer_t pix_buffer = {}; pix_buffer.index = request->index(); pix_buffer.number_of_planes = request->number_of_planes(); + pix_buffer.memory_type = HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR; // Service does not support other memory types std::vector> data_arrays; data_arrays.reserve(pix_buffer.number_of_planes); for (uint32_t i =0; i < pix_buffer.number_of_planes; i++) { @@ -1329,9 +1474,9 @@ grpc::Status HailoRtRpcService::InputVStream_write_pix(grpc::ServerContext*, con auto &manager = ServiceResourceManager::get_instance(); auto status = manager.execute(request->identifier().vstream_handle(), lambda, pix_buffer); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { LOGGER__INFO("User aborted VStream write."); - reply->set_status(static_cast(HAILO_STREAM_ABORTED_BY_USER)); + reply->set_status(static_cast(HAILO_STREAM_ABORT)); return grpc::Status::OK; } CHECK_SUCCESS_AS_RPC_STATUS(status, reply, "VStream write failed"); @@ -1362,34 +1507,71 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_network_infos(grpc::S grpc::Status HailoRtRpcService::OutputVStream_read(grpc::ServerContext*, const OutputVStream_read_Request *request, OutputVStream_read_Reply *reply) { - std::vector data(request->size()); + auto ng_handle = request->identifier().network_group_handle(); + auto vstream_name = output_vstream_name(request->identifier().vstream_handle()); + CHECK_EXPECTED_AS_RPC_STATUS(vstream_name, reply); + + auto buffer_exp = acquire_buffer_from_cng_pool(ng_handle, vstream_name.value()); + CHECK_EXPECTED_AS_RPC_STATUS(buffer_exp, reply); + auto buffer = buffer_exp.release(); + auto lambda = [](std::shared_ptr output_vstream, MemoryView &buffer) { return output_vstream->read(std::move(buffer)); }; auto &manager = ServiceResourceManager::get_instance(); - auto status = manager.execute(request->identifier().vstream_handle(), lambda, MemoryView(data.data(), data.size())); + auto status = manager.execute(request->identifier().vstream_handle(), lambda, MemoryView(buffer->data(), buffer->size())); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { LOGGER__INFO("User aborted VStream read."); - reply->set_status(static_cast(HAILO_STREAM_ABORTED_BY_USER)); + reply->set_status(static_cast(HAILO_STREAM_ABORT)); return grpc::Status::OK; } CHECK_SUCCESS_AS_RPC_STATUS(status, reply, "VStream read failed"); - reply->set_data(data.data(), data.size()); + + if (buffer->size() > MAX_GRPC_BUFFER_SIZE) { + LOGGER__ERROR("Response buffer size is too big: {}. Max response size is: {}", buffer->size(), MAX_GRPC_BUFFER_SIZE); + reply->set_status(static_cast(HAILO_RPC_FAILED)); + return grpc::Status::OK; + } + + reply->set_data(buffer->data(), buffer->size()); + + status = return_buffer_to_cng_pool(ng_handle, vstream_name.value(), buffer); + CHECK_SUCCESS_AS_RPC_STATUS(status, reply); + reply->set_status(static_cast(HAILO_SUCCESS)); return grpc::Status::OK; } -grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_all_stream_infos(grpc::ServerContext*, - const ConfiguredNetworkGroup_get_all_stream_infos_Request *request, - ConfiguredNetworkGroup_get_all_stream_infos_Reply *reply) +Expected> HailoRtRpcService::get_all_stream_infos(uint32_t ng_handle) { auto lambda = [](std::shared_ptr cng) { return cng->get_all_stream_infos(); }; auto &manager = ServiceResourceManager::get_instance(); - auto expected_stream_infos = manager.execute>>(request->identifier().network_group_handle(), - lambda); + auto expected_stream_infos = manager.execute>>(ng_handle, lambda); + CHECK_EXPECTED(expected_stream_infos); + + return expected_stream_infos.release(); +} + +Expected> HailoRtRpcService::get_all_vstream_infos(uint32_t ng_handle) +{ + auto lambda = [](std::shared_ptr cng) { + return cng->get_all_vstream_infos(); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto expected_vstream_infos = manager.execute>>(ng_handle, lambda); + CHECK_EXPECTED(expected_vstream_infos); + + return expected_vstream_infos.release(); +} + +grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_all_stream_infos(grpc::ServerContext*, + const ConfiguredNetworkGroup_get_all_stream_infos_Request *request, + ConfiguredNetworkGroup_get_all_stream_infos_Reply *reply) +{ + auto expected_stream_infos = get_all_stream_infos(request->identifier().network_group_handle()); CHECK_EXPECTED_AS_RPC_STATUS(expected_stream_infos, reply); auto proto_stream_infos = reply->mutable_stream_infos(); @@ -1495,16 +1677,23 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_sorted_output_names(g return grpc::Status::OK; } -grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_min_buffer_pool_size(grpc::ServerContext*, - const ConfiguredNetworkGroup_get_min_buffer_pool_size_Request *request, - ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply *reply) +Expected HailoRtRpcService::get_min_buffer_pool_size(uint32_t ng_handle) { auto lambda = [](std::shared_ptr cng) { return cng->get_min_buffer_pool_size(); }; auto &manager = ServiceResourceManager::get_instance(); - auto min_buffer_pool_size_expected = manager.execute>(request->identifier().network_group_handle(), - lambda); + auto min_buffer_pool_size = manager.execute>(ng_handle, lambda); + CHECK_EXPECTED(min_buffer_pool_size); + + return min_buffer_pool_size.release(); +} + +grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_min_buffer_pool_size(grpc::ServerContext*, + const ConfiguredNetworkGroup_get_min_buffer_pool_size_Request *request, + ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply *reply) +{ + auto min_buffer_pool_size_expected = get_min_buffer_pool_size(request->identifier().network_group_handle()); CHECK_EXPECTED_AS_RPC_STATUS(min_buffer_pool_size_expected, reply); reply->set_min_buffer_pool_size(static_cast(min_buffer_pool_size_expected.release())); @@ -1598,6 +1787,22 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_nms_max_bboxes_per_cl return grpc::Status::OK; } +grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(grpc::ServerContext*, + const ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Request *request, + ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Reply *reply) +{ + auto lambda = [](std::shared_ptr cng, const std::string &edge_name, uint32_t max_accumulated_mask_size) { + return cng->set_nms_max_accumulated_mask_size(edge_name, max_accumulated_mask_size); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto status = manager.execute(request->identifier().network_group_handle(), lambda, + request->edge_name(), request->max_accumulated_mask_size()); + CHECK_SUCCESS_AS_RPC_STATUS(status, reply); + + reply->set_status(static_cast(HAILO_SUCCESS)); + return grpc::Status::OK; +} + grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_stream_names_from_vstream_name(grpc::ServerContext*, const ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request *request, ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply *reply) @@ -1656,11 +1861,7 @@ grpc::Status HailoRtRpcService::InputVStream_get_frame_size(grpc::ServerContext* grpc::Status HailoRtRpcService::OutputVStream_get_frame_size(grpc::ServerContext*, const VStream_get_frame_size_Request *request, VStream_get_frame_size_Reply *reply) { - auto lambda = [](std::shared_ptr output_vstream) { - return output_vstream->get_frame_size(); - }; - auto &manager = ServiceResourceManager::get_instance(); - auto frame_size = manager.execute>(request->identifier().vstream_handle(), lambda); + auto frame_size = output_vstream_frame_size(request->identifier().vstream_handle()); CHECK_EXPECTED_AS_RPC_STATUS(frame_size, reply); reply->set_frame_size(static_cast(frame_size.release())); @@ -1692,14 +1893,34 @@ grpc::Status HailoRtRpcService::InputVStream_name(grpc::ServerContext*, const VS return grpc::Status::OK; } -grpc::Status HailoRtRpcService::OutputVStream_name(grpc::ServerContext*, const VStream_name_Request *request, - VStream_name_Reply *reply) +Expected HailoRtRpcService::output_vstream_name(uint32_t vstream_handle) { auto lambda = [](std::shared_ptr output_vstream) { return output_vstream->name(); }; auto &manager = ServiceResourceManager::get_instance(); - auto name = manager.execute>(request->identifier().vstream_handle(), lambda); + auto name = manager.execute>(vstream_handle, lambda); + CHECK_EXPECTED(name); + + return name.release(); +} + +Expected HailoRtRpcService::output_vstream_frame_size(uint32_t vstream_handle) +{ + auto lambda = [](std::shared_ptr output_vstream) { + return output_vstream->get_frame_size(); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto frame_size = manager.execute>(vstream_handle, lambda); + CHECK_EXPECTED(frame_size); + + return frame_size.release(); +} + +grpc::Status HailoRtRpcService::OutputVStream_name(grpc::ServerContext*, const VStream_name_Request *request, + VStream_name_Reply *reply) +{ + auto name = output_vstream_name(request->identifier().vstream_handle()); CHECK_EXPECTED_AS_RPC_STATUS(name, reply); reply->set_name(name.release()); @@ -1971,6 +2192,23 @@ grpc::Status HailoRtRpcService::OutputVStream_set_nms_iou_threshold(grpc::Server return grpc::Status::OK; } +hailo_status HailoRtRpcService::update_buffer_size_in_pool(uint32_t vstream_handle, uint32_t network_group_handle) +{ + auto vstream_name = output_vstream_name(vstream_handle); + CHECK_EXPECTED(vstream_name); + + auto frame_size = output_vstream_frame_size(vstream_handle); + CHECK_EXPECTED(frame_size); + + auto &cng_buffer_pool_manager = ServiceResourceManager::get_instance(); + auto allocate_lambda = [&](std::shared_ptr cng_buffer_pool) { + return cng_buffer_pool->reallocate_pool(vstream_name.release(), frame_size.release()); + }; + CHECK_SUCCESS(cng_buffer_pool_manager.execute(network_group_handle, allocate_lambda)); + + return HAILO_SUCCESS; +} + grpc::Status HailoRtRpcService::OutputVStream_set_nms_max_proposals_per_class(grpc::ServerContext*, const VStream_set_nms_max_proposals_per_class_Request *request, VStream_set_nms_max_proposals_per_class_Reply *reply) { @@ -1981,6 +2219,26 @@ grpc::Status HailoRtRpcService::OutputVStream_set_nms_max_proposals_per_class(gr auto status = manager.execute(request->identifier().vstream_handle(), lambda, static_cast(request->max_proposals_per_class())); CHECK_SUCCESS_AS_RPC_STATUS(status, reply, "set_nms_max_proposals_per_class failed"); + status = update_buffer_size_in_pool(request->identifier().vstream_handle(), request->identifier().network_group_handle()); + CHECK_SUCCESS_AS_RPC_STATUS(status, reply, "Updating buffer size in pool failed"); + + reply->set_status(static_cast(HAILO_SUCCESS)); + return grpc::Status::OK; +} + +grpc::Status HailoRtRpcService::OutputVStream_set_nms_max_accumulated_mask_size(grpc::ServerContext*, + const VStream_set_nms_max_accumulated_mask_size_Request *request, VStream_set_nms_max_accumulated_mask_size_Reply *reply) +{ + auto lambda = [](std::shared_ptr output_vstream, uint32_t max_accumulated_mask_size) { + return output_vstream->set_nms_max_accumulated_mask_size(max_accumulated_mask_size); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto status = manager.execute(request->identifier().vstream_handle(), lambda, static_cast(request->max_accumulated_mask_size())); + CHECK_SUCCESS_AS_RPC_STATUS(status, reply, "set_nms_max_accumulated_mask_size failed"); + + status = update_buffer_size_in_pool(request->identifier().vstream_handle(), request->identifier().network_group_handle()); + CHECK_SUCCESS_AS_RPC_STATUS(status, reply, "Updating buffer size in pool failed"); + reply->set_status(static_cast(HAILO_SUCCESS)); return grpc::Status::OK; } diff --git a/hailort/hailort_service/hailort_rpc_service.hpp b/hailort/hailort_service/hailort_rpc_service.hpp index 0531e533..5e022cc3 100644 --- a/hailort/hailort_service/hailort_rpc_service.hpp +++ b/hailort/hailort_service/hailort_rpc_service.hpp @@ -123,6 +123,8 @@ class HailoRtRpcService final : public ProtoHailoRtRpc::Service { const VStream_set_nms_iou_threshold_Request *request, VStream_set_nms_iou_threshold_Reply*) override; virtual grpc::Status OutputVStream_set_nms_max_proposals_per_class(grpc::ServerContext *ctx, const VStream_set_nms_max_proposals_per_class_Request *request, VStream_set_nms_max_proposals_per_class_Reply*) override; + virtual grpc::Status OutputVStream_set_nms_max_accumulated_mask_size(grpc::ServerContext *ctx, + const VStream_set_nms_max_accumulated_mask_size_Request *request, VStream_set_nms_max_accumulated_mask_size_Reply*) override; virtual grpc::Status ConfiguredNetworkGroup_dup_handle(grpc::ServerContext *ctx, const ConfiguredNetworkGroup_dup_handle_Request *request, ConfiguredNetworkGroup_dup_handle_Reply*) override; @@ -206,6 +208,9 @@ class HailoRtRpcService final : public ProtoHailoRtRpc::Service { virtual grpc::Status ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(grpc::ServerContext*, const ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Request *request, ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Reply *reply) override; + virtual grpc::Status ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(grpc::ServerContext*, + const ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Request *request, + ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Reply *reply) override; virtual grpc::Status ConfiguredNetworkGroup_get_stream_names_from_vstream_name(grpc::ServerContext*, const ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request *request, ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply *reply) override; @@ -224,12 +229,30 @@ class HailoRtRpcService final : public ProtoHailoRtRpc::Service { void abort_vstreams_by_pids(std::set &pids); void remove_disconnected_clients(); void update_client_id_timestamp(uint32_t pid); + Expected get_min_buffer_pool_size(uint32_t ng_handle); + Expected> get_all_stream_infos(uint32_t ng_handle); + Expected> get_all_vstream_infos(uint32_t ng_handle); + Expected output_vstream_name(uint32_t vstream_handle); + hailo_status create_buffer_pools_for_ng(uint32_t vdevice_handle, uint32_t ng_handle, uint32_t request_pid, + bool allocate_for_raw_streams); + Expected prepare_named_buffers_callbacks(uint32_t vdevice_handle, + uint32_t ng_handle, std::shared_ptr infer_async_request); + hailo_status add_input_named_buffer(const ProtoTransferRequest &proto_stream_transfer_request, uint32_t vdevice_handle, + uint32_t ng_handle, std::shared_ptr infer_async_request, + NamedBuffersCallbacks &named_buffers_callbacks); + hailo_status add_output_named_buffer(const ProtoTransferRequest &proto_stream_transfer_request, uint32_t vdevice_handle, + uint32_t ng_handle, NamedBuffersCallbacks &named_buffers_callbacks); + void enqueue_cb_identifier(uint32_t vdevice_handle, ProtoCallbackIdentifier &&cb_identifier); + hailo_status return_buffer_to_cng_pool(uint32_t ng_handle, const std::string &output_name, BufferPtr buffer); + Expected acquire_buffer_from_cng_pool(uint32_t ng_handle, const std::string &output_name); + Expected output_vstream_frame_size(uint32_t vstream_handle); + hailo_status update_buffer_size_in_pool(uint32_t vstream_handle, uint32_t network_group_handle); - std::mutex m_mutex; + std::mutex m_keep_alive_mutex; std::map> m_clients_pids; std::unique_ptr m_keep_alive; - std::mutex m_vdevice_creation_mutex; + std::mutex m_vdevice_mutex; }; } diff --git a/hailort/hailortcli/CMakeLists.txt b/hailort/hailortcli/CMakeLists.txt index 4f05b5da..8b180dad 100644 --- a/hailort/hailortcli/CMakeLists.txt +++ b/hailort/hailortcli/CMakeLists.txt @@ -72,6 +72,7 @@ target_link_libraries(hailortcli nlohmann_json spdlog::spdlog readerwriterqueue + eigen DotWriter scheduler_mon_proto profiler_proto) diff --git a/hailort/hailortcli/benchmark_command.cpp b/hailort/hailortcli/benchmark_command.cpp index 7c3d1c18..57fa385f 100644 --- a/hailort/hailortcli/benchmark_command.cpp +++ b/hailort/hailortcli/benchmark_command.cpp @@ -98,10 +98,10 @@ hailo_status BenchmarkCommand::execute() std::cout << "FPS (hw_only) = " << hw_only_res.fps().value() <overall_latency()) { - std::cout << " (overall) = " << InferResultsFormatUtils::latency_result_to_ms(overall_latency.value()) << " ms" << std::endl; + std::cout << " (overall) = " << InferStatsPrinter::latency_result_to_ms(overall_latency.value()) << " ms" << std::endl; } } if (!m_not_measure_power) { diff --git a/hailort/hailortcli/download_action_list_command.cpp b/hailort/hailortcli/download_action_list_command.cpp index fdd978e7..7d57108f 100644 --- a/hailort/hailortcli/download_action_list_command.cpp +++ b/hailort/hailortcli/download_action_list_command.cpp @@ -411,7 +411,7 @@ Expected DownloadActionListCommand::parse_single_action(uint32_t b } Expected DownloadActionListCommand::parse_context(Device &device, uint32_t network_group_id, - CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint8_t context_index, const std::string &context_name) + CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index, const std::string &context_name) { uint8_t converted_context_type = static_cast(context_type); uint32_t action_list_base_address = 0; @@ -521,7 +521,7 @@ Expected DownloadActionListCommand::parse_network_group(Device &de network_group_json["contexts"].emplace_back(preliminary_context_json.release()); const auto dynamic_contexts_count = number_of_dynamic_contexts_per_network_group.value()[network_group_id]; - for (uint8_t context_index = 0; context_index < dynamic_contexts_count; context_index++) { + for (uint16_t context_index = 0; context_index < dynamic_contexts_count; context_index++) { auto context_json = parse_context(device, network_group_id, CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_DYNAMIC, context_index, fmt::format("dynamic_{}", context_index)); diff --git a/hailort/hailortcli/download_action_list_command.hpp b/hailort/hailortcli/download_action_list_command.hpp index ede0ac65..0cbb8851 100644 --- a/hailort/hailortcli/download_action_list_command.hpp +++ b/hailort/hailortcli/download_action_list_command.hpp @@ -61,7 +61,7 @@ class DownloadActionListCommand : public DeviceCommand uint32_t current_buffer_offset, uint32_t *action_length, bool *is_repeated, uint8_t *num_repeated, CONTEXT_SWITCH_DEFS__ACTION_TYPE_t *sub_action_type, uint32_t *time_stamp); static Expected parse_context(Device &device, uint32_t network_group_id, - CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint8_t context_index, + CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index, const std::string &context_name); static double get_accumulator_mean_value(const AccumulatorPtr &accumulator, double default_value = INVALID_NUMERIC_VALUE); static Expected parse_network_groups(Device &device, const ConfiguredNetworkGroupVector &network_groups); diff --git a/hailort/hailortcli/graph_printer.cpp b/hailort/hailortcli/graph_printer.cpp index f58fa9ba..2975886a 100644 --- a/hailort/hailortcli/graph_printer.cpp +++ b/hailort/hailortcli/graph_printer.cpp @@ -119,16 +119,16 @@ DotWriter::HtmlString PipelineGraphNode::format_runtime_stats(const std::vector< // We split the statistics into two lines std::stringstream string_stream; string_stream << "" << accumulator->get_data_type() << ": "; - string_stream << "mean=" << InferResultsFormatUtils::format_statistic(accumulator_result.mean()) << ", "; - string_stream << "min=" << InferResultsFormatUtils::format_statistic(accumulator_result.min()) << ", "; - string_stream << "max=" << InferResultsFormatUtils::format_statistic(accumulator_result.max()) << ", "; + string_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.mean(), "mean") << ", "; + string_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.min(), "min") << ", "; + string_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.max(), "max") << ", "; lines.emplace_back(string_stream.str()); // Clear the stream and format the next line string_stream.str(""); - string_stream << "var=" << InferResultsFormatUtils::format_statistic(accumulator_result.var()) << ", "; - string_stream << "sd=" << InferResultsFormatUtils::format_statistic(accumulator_result.sd()) << ", "; - string_stream << "mean_sd=" << InferResultsFormatUtils::format_statistic(accumulator_result.mean_sd()); + string_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.var(), "var") << ", "; + string_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.sd(), "sd") << ", "; + string_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.mean_sd(), "mean_sd"); lines.emplace_back(string_stream.str()); } diff --git a/hailort/hailortcli/infer_stats_printer.cpp b/hailort/hailortcli/infer_stats_printer.cpp index 2e062578..546c8cf4 100644 --- a/hailort/hailortcli/infer_stats_printer.cpp +++ b/hailort/hailortcli/infer_stats_printer.cpp @@ -30,27 +30,7 @@ static std::string infer_mode_to_string(InferMode infer_mode) } } -std::string InferResultsFormatUtils::format_statistic(const Expected &statistic, uint32_t precision) -{ - if (!statistic.has_value()) { - return "-"; - } - - std::stringstream string_stream; - string_stream << std::fixed << std::setprecision(precision) << statistic.value(); - return string_stream.str(); -} - -std::string InferResultsFormatUtils::format_statistic(const Expected &statistic) -{ - if (!statistic.has_value()) { - return "-"; - } - - return std::to_string(statistic.value()); -} - -double InferResultsFormatUtils::latency_result_to_ms(std::chrono::nanoseconds latency) +double InferStatsPrinter::latency_result_to_ms(std::chrono::nanoseconds latency) { return std::chrono::duration_cast>(latency).count(); } @@ -172,12 +152,12 @@ void InferStatsPrinter::print_csv(const std::vector &network_groups m_results_csv_file << ","; if (auto hw_latency = results.hw_latency()) { - m_results_csv_file << InferResultsFormatUtils::latency_result_to_ms(hw_latency.value()); + m_results_csv_file << InferStatsPrinter::latency_result_to_ms(hw_latency.value()); } m_results_csv_file << ","; if (auto overall_latency = results.overall_latency()) { - m_results_csv_file << InferResultsFormatUtils::latency_result_to_ms(overall_latency.value()); + m_results_csv_file << InferStatsPrinter::latency_result_to_ms(overall_latency.value()); } // TODO HRT-5363 support multiple devices (Currently assumes 1 device in the map) @@ -327,12 +307,12 @@ void InferStatsPrinter::print_benchmark_csv(InferResult &hw_inference_result, m_results_csv_file << ","; if (auto hw_latency = latency_res->hw_latency()) { - m_results_csv_file << InferResultsFormatUtils::latency_result_to_ms(hw_latency.value()); + m_results_csv_file << InferStatsPrinter::latency_result_to_ms(hw_latency.value()); } m_results_csv_file << ","; if (auto overall_latency = latency_res->overall_latency()) { - m_results_csv_file << InferResultsFormatUtils::latency_result_to_ms(overall_latency.value()); + m_results_csv_file << InferStatsPrinter::latency_result_to_ms(overall_latency.value()); } // TODO HRT-5363 support multiple devices (Currently assumes 1 device in the map) @@ -378,11 +358,11 @@ void InferStatsPrinter::print_stdout_single_element(const T &results, size_t fra } if (auto hw_latency = results.hw_latency()) { - std::cout << " HW Latency: " << InferResultsFormatUtils::latency_result_to_ms(hw_latency.value()) << " ms" << std::endl; + std::cout << " HW Latency: " << InferStatsPrinter::latency_result_to_ms(hw_latency.value()) << " ms" << std::endl; } if (auto overall_latency = results.overall_latency()) { - std::cout << " Overall Latency: " << InferResultsFormatUtils::latency_result_to_ms(overall_latency.value()) << " ms" << std::endl; + std::cout << " Overall Latency: " << InferStatsPrinter::latency_result_to_ms(overall_latency.value()) << " ms" << std::endl; } } @@ -489,12 +469,12 @@ void InferStatsPrinter::write_accumulator_results(std::ofstream &output_stream, output_stream << vstream_name << ","; output_stream << accumulator->get_data_type() << ","; output_stream << elem_name << ","; - output_stream << InferResultsFormatUtils::format_statistic(accumulator_result.mean()) << ","; - output_stream << InferResultsFormatUtils::format_statistic(accumulator_result.min()) << ","; - output_stream << InferResultsFormatUtils::format_statistic(accumulator_result.max()) << ","; - output_stream << InferResultsFormatUtils::format_statistic(accumulator_result.var()) << ","; - output_stream << InferResultsFormatUtils::format_statistic(accumulator_result.sd()) << ","; - output_stream << InferResultsFormatUtils::format_statistic(accumulator_result.mean_sd()) << ","; + output_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.mean()) << ","; + output_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.min()) << ","; + output_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.max()) << ","; + output_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.var()) << ","; + output_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.sd()) << ","; + output_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.mean_sd()) << ","; if (NO_INDEX != index) { output_stream << index; } diff --git a/hailort/hailortcli/infer_stats_printer.hpp b/hailort/hailortcli/infer_stats_printer.hpp index ac0e9503..0d28c4eb 100644 --- a/hailort/hailortcli/infer_stats_printer.hpp +++ b/hailort/hailortcli/infer_stats_printer.hpp @@ -15,19 +15,9 @@ #include -class InferResultsFormatUtils final { -public: - InferResultsFormatUtils() = delete; - - static const uint32_t DEFAULT_FLOATING_POINT_PRECISION = 4; - - static std::string format_statistic(const Expected &statistic, uint32_t precision = DEFAULT_FLOATING_POINT_PRECISION); - static std::string format_statistic(const Expected &statistic); - static double latency_result_to_ms(std::chrono::nanoseconds latency); -}; - class InferStatsPrinter final { public: + static double latency_result_to_ms(std::chrono::nanoseconds latency); static Expected create(const inference_runner_params ¶ms, bool print_running_info = true); void print(const std::vector &network_groups_names, Expected &inference_result); void print_benchmark_csv(InferResult &hw_inference_result, diff --git a/hailort/hailortcli/inference_progress.cpp b/hailort/hailortcli/inference_progress.cpp index ee7d6d05..f514ee0b 100644 --- a/hailort/hailortcli/inference_progress.cpp +++ b/hailort/hailortcli/inference_progress.cpp @@ -142,7 +142,7 @@ std::string NetworkProgressBar::get_progress_text() double avg_hw_latency = 0; auto latency_expected = m_configured_network_group->get_latency_measurement(m_network_name); if (latency_expected) { - avg_hw_latency = InferResultsFormatUtils::latency_result_to_ms(latency_expected.release().avg_hw_latency); + avg_hw_latency = InferStatsPrinter::latency_result_to_ms(latency_expected.release().avg_hw_latency); } if (avg_hw_latency > 0) { diff --git a/hailort/hailortcli/run2/io_wrappers.hpp b/hailort/hailortcli/run2/io_wrappers.hpp index 7a0f1da8..f3337ab2 100644 --- a/hailort/hailortcli/run2/io_wrappers.hpp +++ b/hailort/hailortcli/run2/io_wrappers.hpp @@ -16,12 +16,15 @@ #include "common/file_utils.hpp" #include "common/latency_meter.hpp" +#include "hailo/dma_mapped_buffer.hpp" + #include #include using namespace hailort; constexpr uint32_t UNLIMITED_FRAMERATE = 0; +constexpr size_t AMOUNT_OF_OUTPUT_BUFFERS_SYNC_API = 1; #ifndef HAILO_EMULATOR constexpr std::chrono::milliseconds HAILORTCLI_DEFAULT_TIMEOUT(HAILO_DEFAULT_VSTREAM_TIMEOUT_MS); @@ -44,21 +47,27 @@ class FramerateThrottle final }; // Wrapper for InputStream or InputVStream objects. -// We use std::enable_from_this because on async api the callback is using `this`. We want to increase the reference -// count until the callback is over. +// We use std::enable_from_this because on async api, we want to increase the ref count of this object until the +// callback is called. It can happen since network_group->shutdown() may be called after this object is being +// destructed. template class WriterWrapper final : public std::enable_shared_from_this> { public: template static Expected> create(Writer &writer, const WriterParams ¶ms, - const LatencyMeterPtr &overall_latency_meter, uint32_t framerate) + VDevice &vdevice, const LatencyMeterPtr &overall_latency_meter, uint32_t framerate, bool async_api) { - auto dataset = create_dataset(writer, params); - CHECK_EXPECTED(dataset); + TRY(auto dataset, create_dataset(writer, params)); + + std::vector dataset_mapped_buffers; + if (async_api) { + TRY(dataset_mapped_buffers, dma_map_dataset(dataset, vdevice)); + } std::shared_ptr wrapper( - new (std::nothrow) WriterWrapper(writer, dataset.release(), overall_latency_meter, framerate)); + new (std::nothrow) WriterWrapper(writer, std::move(dataset), std::move(dataset_mapped_buffers), + overall_latency_meter, framerate)); CHECK_NOT_NULL_AS_EXPECTED(wrapper, HAILO_OUT_OF_HOST_MEMORY); return wrapper; @@ -102,10 +111,11 @@ class WriterWrapper final : public std::enable_shared_from_this &&dataset, const LatencyMeterPtr &overall_latency_meter, - uint32_t framerate) : + WriterWrapper(Writer &writer, std::vector &&dataset, std::vector &&dataset_mapped_buffers, + const LatencyMeterPtr &overall_latency_meter, uint32_t framerate) : m_writer(std::ref(writer)), m_dataset(std::move(dataset)), + m_dataset_mapped_buffers(std::move(dataset_mapped_buffers)), m_overall_latency_meter(overall_latency_meter), m_framerate_throttle(framerate) {} @@ -142,6 +152,7 @@ class WriterWrapper final : public std::enable_shared_from_this> create_constant_dataset(size_t frame_size) { const uint8_t const_byte = 0xAB; + auto constant_buffer = Buffer::create_shared(frame_size, const_byte, BufferStorageParams::create_dma()); CHECK_EXPECTED(constant_buffer); @@ -169,9 +180,20 @@ class WriterWrapper final : public std::enable_shared_from_this> dma_map_dataset(const std::vector &dataset, VDevice &vdevice) { + std::vector dataset_mapped_buffers; + for (const auto &buffer : dataset) { + auto mapped_buffer = DmaMappedBuffer::create(vdevice, buffer->data(), buffer->size(), HAILO_DMA_BUFFER_DIRECTION_H2D); + CHECK_EXPECTED(mapped_buffer); + dataset_mapped_buffers.emplace_back(mapped_buffer.release()); + } + return dataset_mapped_buffers; + } + std::reference_wrapper m_writer; std::vector m_dataset; + std::vector m_dataset_mapped_buffers; size_t m_current_buffer_index = 0; LatencyMeterPtr m_overall_latency_meter; @@ -182,20 +204,51 @@ template using WriterWrapperPtr = std::shared_ptr>; // Wrapper for OutputStream or OutputVStream objects. -// We use std::enable_from_this because on async api the callback is using `this`. We want to increase the reference -// count until the callback is over. +// We use std::enable_from_this because on async api, we want to increase the ref count of this object until the +// callback is called. It can happen since network_group->shutdown() may be called after this object is being +// destructed. template class ReaderWrapper final : public std::enable_shared_from_this> { public: - static Expected> create(Reader &reader, const LatencyMeterPtr &overall_latency_meter, - std::shared_ptr net_live_track) + + // Function that gets the amount of output buffers needed for stream. Templated for both possible cases of Types that + // ReaderWrapper can wrap - OutputStream and OutputVStream + + // In async create amount of output buffers equal to async_max_queue_size - we do this because in async mode we want + // each stream to have its own buffer. (Otherwise can cause bugs in NMS Async mode.) + static Expected get_amount_of_output_buffers(OutputStream &output_stream, bool async_api) { - auto buffer = Buffer::create_shared(reader.get_frame_size(), BufferStorageParams::create_dma()); - CHECK_EXPECTED(buffer); + if (async_api) { + return output_stream.get_async_max_queue_size(); + } else { + return static_cast(AMOUNT_OF_OUTPUT_BUFFERS_SYNC_API); + } + } + + // Vstreams will always be sync hence 1 output buffer is enough. + static Expected get_amount_of_output_buffers(OutputVStream &output_vstream, bool async_api) + { + (void) output_vstream; + (void) async_api; + return static_cast(AMOUNT_OF_OUTPUT_BUFFERS_SYNC_API); + } + + static Expected> create(Reader &reader, VDevice &vdevice, + const LatencyMeterPtr &overall_latency_meter, std::shared_ptr net_live_track, bool async_api) + { + TRY(const auto amount_of_output_buffers, get_amount_of_output_buffers(reader,async_api)); + + TRY(auto output_buffers, create_output_buffers(reader, amount_of_output_buffers)); + + std::vector dma_mapped_buffers; + if (async_api) { + TRY(dma_mapped_buffers, dma_map_output_buffers(vdevice, amount_of_output_buffers, output_buffers)); + } std::shared_ptr wrapper( - new (std::nothrow) ReaderWrapper(reader, buffer.release(), overall_latency_meter, net_live_track)); + new (std::nothrow) ReaderWrapper(reader, std::move(output_buffers), std::move(dma_mapped_buffers), + overall_latency_meter, net_live_track)); CHECK_NOT_NULL_AS_EXPECTED(wrapper, HAILO_OUT_OF_HOST_MEMORY); return wrapper; @@ -206,7 +259,7 @@ class ReaderWrapper final : public std::enable_shared_from_thissize(), HAILORTCLI_DEFAULT_TIMEOUT); + return get().wait_for_async_ready(m_buffer[0]->size(), HAILORTCLI_DEFAULT_TIMEOUT); } hailo_status read_async(typename Reader::TransferDoneCallback callback) { auto self = std::enable_shared_from_this>::shared_from_this(); - return get().read_async(MemoryView(*m_buffer), + return get().read_async(MemoryView(*next_buffer()), [self, original=callback](const typename Reader::CompletionInfo &completion_info) { original(completion_info); if (completion_info.status == HAILO_SUCCESS) { @@ -233,10 +286,11 @@ class ReaderWrapper final : public std::enable_shared_from_this net_live_track) : + ReaderWrapper(Reader &reader, std::vector &&buffer, std::vector &&mapped_buffer_ptr, + const LatencyMeterPtr &overall_latency_meter, std::shared_ptr net_live_track) : m_reader(std::ref(reader)), m_buffer(std::move(buffer)), + m_mapped_buffer_ptr(std::move(mapped_buffer_ptr)), m_overall_latency_meter(overall_latency_meter), m_net_live_track(net_live_track) {} @@ -253,9 +307,51 @@ class ReaderWrapper final : public std::enable_shared_from_this m_reader; - BufferPtr m_buffer; + std::vector m_buffer; + std::vector m_mapped_buffer_ptr; LatencyMeterPtr m_overall_latency_meter; std::shared_ptr m_net_live_track; + size_t m_current_buffer_index = 0; + + static Expected> create_output_buffers(Reader &reader, size_t amount_of_output_buffers) + { + std::vector output_buffers; + output_buffers.reserve(amount_of_output_buffers); + + for (size_t i = 0; i < amount_of_output_buffers; i++) { + TRY(auto buffer, Buffer::create_shared(reader.get_frame_size(), BufferStorageParams::create_dma())); + output_buffers.emplace_back(std::move(buffer)); + } + + return output_buffers; + } + + static Expected>dma_map_output_buffers(VDevice &vdevice, size_t amount_of_output_buffers, + const std::vector &output_buffers) + { + std::vector mapped_output_buffers; + mapped_output_buffers.reserve(amount_of_output_buffers); + + for (const auto& output_buffer : output_buffers) { + TRY(auto mapped_buffer, + DmaMappedBuffer::create(vdevice, output_buffer->data(), output_buffer->size(), HAILO_DMA_BUFFER_DIRECTION_D2H)); + mapped_output_buffers.emplace_back(std::move(mapped_buffer)); + } + + return mapped_output_buffers; + } + + size_t next_buffer_index() + { + const auto index = m_current_buffer_index; + m_current_buffer_index = (m_current_buffer_index + 1) % m_buffer.size(); + return index; + } + + BufferPtr next_buffer() + { + return m_buffer[next_buffer_index()]; + } }; template diff --git a/hailort/hailortcli/run2/network_live_track.cpp b/hailort/hailortcli/run2/network_live_track.cpp index 0033816c..c9d35ce1 100644 --- a/hailort/hailortcli/run2/network_live_track.cpp +++ b/hailort/hailortcli/run2/network_live_track.cpp @@ -75,7 +75,7 @@ uint32_t NetworkLiveTrack::push_text_impl(std::stringstream &ss) if (m_cng) { auto hw_latency_measurement = m_cng->get_latency_measurement(); if (hw_latency_measurement) { - ss << fmt::format("{}hw latency: {:.2f} ms", get_separator(), InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency)); + ss << fmt::format("{}hw latency: {:.2f} ms", get_separator(), InferStatsPrinter::latency_result_to_ms(hw_latency_measurement->avg_hw_latency)); } else if (HAILO_NOT_AVAILABLE != hw_latency_measurement.status()) { // HAILO_NOT_AVAILABLE is a valid error, we ignore it ss << fmt::format("{}hw latency: NaN (err)", get_separator()); } @@ -83,7 +83,7 @@ uint32_t NetworkLiveTrack::push_text_impl(std::stringstream &ss) else { auto hw_latency_measurement = m_configured_infer_model->get_hw_latency_measurement(); if (hw_latency_measurement) { - ss << fmt::format("{}hw latency: {:.2f} ms", get_separator(), InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency)); + ss << fmt::format("{}hw latency: {:.2f} ms", get_separator(), InferStatsPrinter::latency_result_to_ms(hw_latency_measurement->avg_hw_latency)); } else if (HAILO_NOT_AVAILABLE != hw_latency_measurement.status()) { // HAILO_NOT_AVAILABLE is a valid error, we ignore it ss << fmt::format("{}hw latency: NaN (err)", get_separator()); @@ -93,7 +93,7 @@ uint32_t NetworkLiveTrack::push_text_impl(std::stringstream &ss) if (m_overall_latency_meter) { auto overall_latency_measurement = m_overall_latency_meter->get_latency(false); if (overall_latency_measurement) { - ss << fmt::format("{}overall latency: {:.2f} ms", get_separator(), InferResultsFormatUtils::latency_result_to_ms(*overall_latency_measurement)); + ss << fmt::format("{}overall latency: {:.2f} ms", get_separator(), InferStatsPrinter::latency_result_to_ms(*overall_latency_measurement)); } else if (HAILO_NOT_AVAILABLE != overall_latency_measurement.status()) { // HAILO_NOT_AVAILABLE is a valid error, we ignore it ss << fmt::format("{}overall latency: NaN (err)", get_separator()); @@ -127,13 +127,13 @@ void NetworkLiveTrack::push_json_impl(nlohmann::ordered_json &json) if (m_cng) { auto hw_latency_measurement = m_cng->get_latency_measurement(); if (hw_latency_measurement){ - network_group_json["hw_latency"] = InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency); + network_group_json["hw_latency"] = InferStatsPrinter::latency_result_to_ms(hw_latency_measurement->avg_hw_latency); } } else { auto hw_latency_measurement = m_configured_infer_model->get_hw_latency_measurement(); if (hw_latency_measurement){ - network_group_json["hw_latency"] = InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency); + network_group_json["hw_latency"] = InferStatsPrinter::latency_result_to_ms(hw_latency_measurement->avg_hw_latency); } } @@ -141,7 +141,7 @@ void NetworkLiveTrack::push_json_impl(nlohmann::ordered_json &json) if (m_overall_latency_meter){ auto overall_latency_measurement = m_overall_latency_meter->get_latency(false); if (overall_latency_measurement){ - network_group_json["overall_latency"] = InferResultsFormatUtils::latency_result_to_ms(*overall_latency_measurement); + network_group_json["overall_latency"] = InferStatsPrinter::latency_result_to_ms(*overall_latency_measurement); } } json["network_groups"].emplace_back(network_group_json); diff --git a/hailort/hailortcli/run2/network_runner.cpp b/hailort/hailortcli/run2/network_runner.cpp index 15a563a2..86845f73 100644 --- a/hailort/hailortcli/run2/network_runner.cpp +++ b/hailort/hailortcli/run2/network_runner.cpp @@ -254,7 +254,7 @@ Expected> NetworkRunner::create_shared(VDevice &v switch (final_net_params.mode) { - case InferenceMode::FULL: + case InferenceMode::FULL_SYNC: { std::map vstreams_params; for (auto &vstream_params : final_net_params.vstream_params) { @@ -263,13 +263,13 @@ Expected> NetworkRunner::create_shared(VDevice &v auto vstreams = create_vstreams(*cfgr_net_group, vstreams_params); CHECK_EXPECTED(vstreams); - auto net_runner = make_shared_nothrow(final_net_params, expected_net_group_name.value(), vdevice, + auto net_runner = make_shared_nothrow(final_net_params, expected_net_group_name.value(), vdevice, std::move(vstreams->first), std::move(vstreams->second), cfgr_net_group); CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY); net_runner_ptr = std::static_pointer_cast(net_runner); break; } - case InferenceMode::RAW: // Fallthrough + case InferenceMode::RAW_SYNC: // Fallthrough case InferenceMode::RAW_ASYNC: // Fallthrough case InferenceMode::RAW_ASYNC_SINGLE_THREAD: { @@ -425,10 +425,10 @@ Expected, std::vector>> Netwo } const std::vector NetworkRunner::ALLOWED_INFERENCE_RETURN_VALUES{ - {HAILO_SUCCESS, HAILO_STREAM_ABORTED_BY_USER, HAILO_SHUTDOWN_EVENT_SIGNALED} + {HAILO_SUCCESS, HAILO_STREAM_ABORT, HAILO_SHUTDOWN_EVENT_SIGNALED} }; -FullNetworkRunner::FullNetworkRunner(const NetworkParams ¶ms, const std::string &name, VDevice &vdevice, +FullSyncNetworkRunner::FullSyncNetworkRunner(const NetworkParams ¶ms, const std::string &name, VDevice &vdevice, std::vector &&input_vstreams, std::vector &&output_vstreams, std::shared_ptr cng) : NetworkRunner(params, name, vdevice, cng), @@ -437,14 +437,15 @@ FullNetworkRunner::FullNetworkRunner(const NetworkParams ¶ms, const std::str { } -Expected>> FullNetworkRunner::start_inference_threads(EventPtr shutdown_event, +Expected>> FullSyncNetworkRunner::start_inference_threads(EventPtr shutdown_event, std::shared_ptr net_live_track) { + static const bool SYNC_API = false; std::vector> threads; for (auto &input_vstream : m_input_vstreams) { const auto vstream_params = get_params(input_vstream.name()); - auto writer = WriterWrapper::create(input_vstream, vstream_params, m_overall_latency_meter, - m_params.framerate); + auto writer = WriterWrapper::create(input_vstream, vstream_params, m_vdevice, + m_overall_latency_meter, m_params.framerate, SYNC_API); CHECK_EXPECTED(writer); threads.emplace_back(std::make_unique>("WRITE", @@ -455,8 +456,8 @@ Expected>> FullNetworkRunner::start_inf bool first = true; //TODO: check with multiple outputs for (auto &output_vstream : m_output_vstreams) { - auto reader = ReaderWrapper::create(output_vstream, m_overall_latency_meter, - first ? net_live_track : nullptr); + auto reader = ReaderWrapper::create(output_vstream, m_vdevice, + m_overall_latency_meter, first ? net_live_track : nullptr, SYNC_API); CHECK_EXPECTED(reader); threads.emplace_back(std::make_unique>("READ", @@ -469,12 +470,12 @@ Expected>> FullNetworkRunner::start_inf return threads; } -void FullNetworkRunner::stop() +void FullSyncNetworkRunner::stop() { (void) m_cng->shutdown(); } -std::set FullNetworkRunner::get_input_names() +std::set FullSyncNetworkRunner::get_input_names() { std::set result; @@ -485,7 +486,7 @@ std::set FullNetworkRunner::get_input_names() return result; } -std::set FullNetworkRunner::get_output_names() +std::set FullSyncNetworkRunner::get_output_names() { std::set result; @@ -496,7 +497,7 @@ std::set FullNetworkRunner::get_output_names() return result; } -VStreamParams FullNetworkRunner::get_params(const std::string &name) +VStreamParams FullSyncNetworkRunner::get_params(const std::string &name) { for (const auto ¶ms : m_params.vstream_params) { if (name == params.name) { @@ -552,9 +553,12 @@ Expected FullAsyncNetworkRunner::create_infer_job(const Configure m_overall_latency_meter->add_start_sample(std::chrono::steady_clock::now().time_since_epoch()); } auto job = m_configured_infer_model->run_async(bindings, [=, &inference_status] (const AsyncInferCompletionInfo &completion_info) { + if (HAILO_SUCCESS != completion_info.status) { inference_status = completion_info.status; - LOGGER__ERROR("Failed in infer async request"); + if (HAILO_STREAM_ABORT != completion_info.status) { + LOGGER__ERROR("Failed in infer async request"); + } return; } if (m_overall_latency_meter) { @@ -575,36 +579,6 @@ hailo_status FullAsyncNetworkRunner::run_single_thread_async_infer(EventPtr shut { auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event); - std::map inputs_buffer_pool; - const uint8_t const_byte = 0xAB; - for (const auto &input_name : get_input_names()) { - inputs_buffer_pool[input_name] = {}; - auto input_config = m_infer_model->input(input_name); - CHECK_EXPECTED_AS_STATUS(input_config); - - auto params = get_params(input_name); - if (params.input_file_path.empty()) { - auto constant_buffer = Buffer::create(input_config->get_frame_size(), const_byte, BufferStorageParams::create_dma()); - CHECK_EXPECTED_AS_STATUS(constant_buffer); - inputs_buffer_pool[input_name] = constant_buffer.release(); - } else { - auto buffer = read_binary_file(params.input_file_path, BufferStorageParams::create_dma()); - CHECK_EXPECTED_AS_STATUS(buffer); - inputs_buffer_pool[input_name] = buffer.release(); - } - } - - std::map outputs_buffer_pool; - for (const auto &output_name : get_output_names()) { - outputs_buffer_pool[output_name] = {}; - auto output_config = m_infer_model->output(output_name); - CHECK_EXPECTED_AS_STATUS(output_config); - - auto constant_buffer = Buffer::create(output_config->get_frame_size(), 0, BufferStorageParams::create_dma()); - CHECK_EXPECTED_AS_STATUS(constant_buffer); - outputs_buffer_pool[output_name] = constant_buffer.release(); - } - std::unique_ptr guard = nullptr; if (HAILO_SCHEDULING_ALGORITHM_NONE != m_params.scheduling_algorithm) { auto status = m_configured_infer_model->set_scheduler_threshold(m_params.scheduler_threshold); @@ -624,24 +598,64 @@ hailo_status FullAsyncNetworkRunner::run_single_thread_async_infer(EventPtr shut auto bindings = m_configured_infer_model->create_bindings(); CHECK_EXPECTED_AS_STATUS(bindings); - for (auto &pair : inputs_buffer_pool) { - auto &name = pair.first; - auto &buffer = pair.second; - bindings->input(name)->set_buffer(hailort::MemoryView(buffer)); + std::unordered_map input_buffers; // Keys are inputs names + std::vector output_buffers; + std::vector dma_mapped_buffers; + + const uint8_t const_byte = 0xAB; + for (const auto &name : get_input_names()) { + auto input_config = m_infer_model->input(name); + CHECK_EXPECTED_AS_STATUS(input_config); + + auto params = get_params(name); + auto buffer = params.input_file_path.empty() ? + Buffer::create(input_config->get_frame_size(), const_byte, BufferStorageParams::create_dma()) : + read_binary_file(params.input_file_path, BufferStorageParams::create_dma()); + CHECK_EXPECTED_AS_STATUS(buffer); + CHECK(0 == (buffer->size() % input_config->get_frame_size()), HAILO_INVALID_ARGUMENT, + "Size of data for input '{}' must be a multiple of the frame size {}. Received - {}", name, input_config->get_frame_size(), buffer->size()); + input_buffers.emplace(name, buffer.release()); + + for (uint32_t i = 0; i < (input_buffers.at(name).size() % input_config->get_frame_size()); i++) { + auto mapped_buffer = DmaMappedBuffer::create(m_vdevice, input_buffers.at(name).data() + (i * input_config->get_frame_size()), + input_config->get_frame_size(), HAILO_DMA_BUFFER_DIRECTION_H2D); + CHECK_EXPECTED_AS_STATUS(mapped_buffer); + dma_mapped_buffers.emplace_back(mapped_buffer.release()); + } } - for (auto &pair : outputs_buffer_pool) { - auto &name = pair.first; - auto &buffer = pair.second; - bindings->output(name)->set_buffer(hailort::MemoryView(buffer)); + + for (const auto &name : get_output_names()) { + auto output_config = m_infer_model->output(name); + CHECK_EXPECTED_AS_STATUS(output_config); + + auto buffer = Buffer::create(output_config->get_frame_size(), 0, BufferStorageParams::create_dma()); + CHECK_EXPECTED_AS_STATUS(buffer); + output_buffers.emplace_back(buffer.release()); + + auto mapped_buffer = DmaMappedBuffer::create(m_vdevice, output_buffers.back().data(), output_buffers.back().size(), + HAILO_DMA_BUFFER_DIRECTION_D2H); + CHECK_EXPECTED_AS_STATUS(mapped_buffer); + dma_mapped_buffers.emplace_back(mapped_buffer.release()); + + CHECK_SUCCESS(bindings->output(name)->set_buffer(MemoryView(output_buffers.back()))); } FramerateThrottle frame_rate_throttle(m_params.framerate); AsyncInferJob last_job; auto inference_status = HAILO_SUCCESS; + uint32_t frame_id = 0; while (HAILO_TIMEOUT == shutdown_event->wait(std::chrono::milliseconds(0)) && (HAILO_SUCCESS == inference_status)) { for (uint32_t frames_in_cycle = 0; frames_in_cycle < m_params.batch_size; frames_in_cycle++) { - if (HAILO_SUCCESS == m_configured_infer_model->wait_for_async_ready(HAILO_INFINITE_TIMEOUT)) { + for (const auto &name : get_input_names()) { + auto input_config = m_infer_model->input(name); + CHECK_EXPECTED_AS_STATUS(input_config); + auto offset = (frame_id % (input_buffers.at(name).size() / input_config->get_frame_size())) * input_config->get_frame_size(); + CHECK_SUCCESS(bindings->input(name)->set_buffer(MemoryView(input_buffers.at(name).data() + offset, + input_config->get_frame_size()))); + } + frame_id++; + if (HAILO_SUCCESS == m_configured_infer_model->wait_for_async_ready(DEFAULT_TRANSFER_TIMEOUT)) { auto job_exp = create_infer_job(*bindings, net_live_track, frame_rate_throttle, inference_status); CHECK_EXPECTED_AS_STATUS(job_exp); last_job = job_exp.release(); @@ -653,6 +667,7 @@ hailo_status FullAsyncNetworkRunner::run_single_thread_async_infer(EventPtr shut last_job.wait(HAILO_INFINITE_TIMEOUT); } } + m_configured_infer_model->shutdown(); last_job.wait(HAILO_INFINITE_TIMEOUT); return inference_status; @@ -674,8 +689,8 @@ Expected>> RawNetworkRunner::start_infe std::vector> threads; for (auto &input_stream : m_input_streams) { const auto stream_params = get_params(input_stream.get().name()); - auto writer = WriterWrapper::create(input_stream.get(), stream_params, m_overall_latency_meter, - m_params.framerate); + auto writer = WriterWrapper::create(input_stream.get(), stream_params, m_vdevice, + m_overall_latency_meter, m_params.framerate, async_streams); CHECK_EXPECTED(writer); if (async_streams) { @@ -693,8 +708,8 @@ Expected>> RawNetworkRunner::start_infe bool first = true; //TODO: check with multiple outputs for (auto &output_stream : m_output_streams) { - auto reader = ReaderWrapper::create(output_stream.get(), m_overall_latency_meter, - first ? net_live_track : nullptr); + auto reader = ReaderWrapper::create(output_stream.get(), m_vdevice, + m_overall_latency_meter, first ? net_live_track : nullptr, async_streams); CHECK_EXPECTED(reader); if (async_streams) { @@ -717,13 +732,15 @@ Expected>> RawNetworkRunner::start_infe hailo_status RawNetworkRunner::run_single_thread_async_infer(EventPtr shutdown_event, std::shared_ptr net_live_track) { + static const bool ASYNC_API = true; + // Build output wrappers std::vector> reader_wrappers; std::vector output_semaphores; bool is_first_output = true; for (auto &output_stream : m_output_streams) { - auto reader_wrapper = ReaderWrapper::create(output_stream.get(), m_overall_latency_meter, - is_first_output ? net_live_track : nullptr); + auto reader_wrapper = ReaderWrapper::create(output_stream.get(), m_vdevice, + m_overall_latency_meter, is_first_output ? net_live_track : nullptr, ASYNC_API); CHECK_EXPECTED_AS_STATUS(reader_wrapper); is_first_output = false; @@ -731,9 +748,9 @@ hailo_status RawNetworkRunner::run_single_thread_async_infer(EventPtr shutdown_e CHECK_EXPECTED_AS_STATUS(max_queue_size); auto semaphore = Semaphore::create_shared(static_cast(*max_queue_size)); - CHECK_NOT_NULL(semaphore, HAILO_OUT_OF_HOST_MEMORY); + CHECK_EXPECTED_AS_STATUS(semaphore); - output_semaphores.emplace_back(semaphore); + output_semaphores.emplace_back(semaphore.release()); reader_wrappers.emplace_back(reader_wrapper.release()); } @@ -742,16 +759,16 @@ hailo_status RawNetworkRunner::run_single_thread_async_infer(EventPtr shutdown_e std::vector input_semaphores; for (auto &input_stream : m_input_streams) { auto writer_wrapper = WriterWrapper::create(input_stream.get(), - get_params(input_stream.get().name()), m_overall_latency_meter, m_params.framerate); + get_params(input_stream.get().name()), m_vdevice, m_overall_latency_meter, m_params.framerate, ASYNC_API); CHECK_EXPECTED_AS_STATUS(writer_wrapper); auto max_queue_size = writer_wrapper.value()->get().get_async_max_queue_size(); CHECK_EXPECTED_AS_STATUS(max_queue_size); auto semaphore = Semaphore::create_shared(static_cast(*max_queue_size)); - CHECK_NOT_NULL(semaphore, HAILO_OUT_OF_HOST_MEMORY); + CHECK_EXPECTED_AS_STATUS(semaphore); - input_semaphores.emplace_back(semaphore); + input_semaphores.emplace_back(semaphore.release()); writer_wrappers.emplace_back(writer_wrapper.release()); } diff --git a/hailort/hailortcli/run2/network_runner.hpp b/hailort/hailortcli/run2/network_runner.hpp index d0d0376f..9601172f 100644 --- a/hailort/hailortcli/run2/network_runner.hpp +++ b/hailort/hailortcli/run2/network_runner.hpp @@ -37,10 +37,10 @@ constexpr std::chrono::milliseconds SYNC_EVENT_TIMEOUT(1000); enum class InferenceMode { - FULL, + FULL_SYNC, FULL_ASYNC, - RAW, + RAW_SYNC, RAW_ASYNC, RAW_ASYNC_SINGLE_THREAD, }; @@ -166,7 +166,7 @@ class NetworkRunner for (auto i = 0; i < m_params.batch_size; i++) { auto status = writer->write(); - if (status == HAILO_STREAM_ABORTED_BY_USER) { + if (status == HAILO_STREAM_ABORT) { return status; } CHECK_SUCCESS(status); @@ -198,7 +198,7 @@ class NetworkRunner for (auto i = 0; i < m_params.batch_size; i++) { auto status = writer->wait_for_async_ready(); - if (status == HAILO_STREAM_ABORTED_BY_USER) { + if (status == HAILO_STREAM_ABORT) { return status; } CHECK_SUCCESS(status); @@ -209,7 +209,7 @@ class NetworkRunner (void)sync_event->signal(); } }); - if (status == HAILO_STREAM_ABORTED_BY_USER) { + if (status == HAILO_STREAM_ABORT) { return status; } CHECK_SUCCESS(status); @@ -243,7 +243,7 @@ class NetworkRunner for (auto i = 0; i < m_params.batch_size; i++) { auto status = reader->read(); - if (status == HAILO_STREAM_ABORTED_BY_USER) { + if (status == HAILO_STREAM_ABORT) { return status; } CHECK_SUCCESS(status); @@ -275,7 +275,7 @@ class NetworkRunner for (auto i = 0; i < m_params.batch_size; i++) { auto status = reader->wait_for_async_ready(); - if (status == HAILO_STREAM_ABORTED_BY_USER) { + if (status == HAILO_STREAM_ABORT) { return status; } CHECK_SUCCESS(status); @@ -286,7 +286,7 @@ class NetworkRunner (void)sync_event->signal(); } }); - if (status == HAILO_STREAM_ABORTED_BY_USER) { + if (status == HAILO_STREAM_ABORT) { return status; } CHECK_SUCCESS(status); @@ -323,10 +323,10 @@ class NetworkRunner static Expected create_dataset_from_input_file(const std::string &file_path, size_t size); }; -class FullNetworkRunner : public NetworkRunner +class FullSyncNetworkRunner : public NetworkRunner { public: - FullNetworkRunner(const NetworkParams ¶ms, const std::string &name, VDevice &vdevice, + FullSyncNetworkRunner(const NetworkParams ¶ms, const std::string &name, VDevice &vdevice, std::vector &&input_vstreams, std::vector &&output_vstreams, std::shared_ptr cng); diff --git a/hailort/hailortcli/run2/run2_command.cpp b/hailort/hailortcli/run2/run2_command.cpp index 6de243e2..4bfdf062 100644 --- a/hailort/hailortcli/run2/run2_command.cpp +++ b/hailort/hailortcli/run2/run2_command.cpp @@ -324,6 +324,7 @@ class Run2 : public CLI::App bool is_ethernet_device() const; void validate_and_set_scheduling_algorithm(); + void validate_mode_supports_service(); std::vector m_network_params; uint32_t m_time_to_run; @@ -346,7 +347,6 @@ class Run2 : public CLI::App std::string m_measure_fw_actions_output_path; }; - Run2::Run2() : CLI::App("Run networks", "run2") { add_measure_fw_actions_subcom(); @@ -354,14 +354,17 @@ Run2::Run2() : CLI::App("Run networks", "run2") add_option("-t,--time-to-run", m_time_to_run, "Time to run (seconds)") ->default_val(DEFAULT_TIME_TO_RUN_SECONDS) ->check(CLI::PositiveNumber); - add_option("-m,--mode", m_mode, "Inference mode") + auto mode = add_option("-m,--mode", m_mode, "Inference mode") ->transform(HailoCheckedTransformer({ - { "full", InferenceMode::FULL }, + { "full_sync", InferenceMode::FULL_SYNC }, + { "full", InferenceMode::FULL_SYNC, OptionVisibility::HIDDEN }, // TODO: Remove option { "full_async", InferenceMode::FULL_ASYNC }, - { "raw", InferenceMode::RAW }, + { "raw_sync", InferenceMode::RAW_SYNC }, + { "raw", InferenceMode::RAW_SYNC, OptionVisibility::HIDDEN }, // TODO: Remove option { "raw_async", InferenceMode::RAW_ASYNC }, { "raw_async_single_thread", InferenceMode::RAW_ASYNC_SINGLE_THREAD, OptionVisibility::HIDDEN } - }))->default_val("full"); + }))->default_val("full_sync"); + add_option("-j,--json", m_stats_json_path, "If set save statistics as json to the specified path") ->default_val("") ->check(FileSuffixValidator(JSON_SUFFIX)); @@ -412,8 +415,12 @@ Run2::Run2() : CLI::App("Run networks", "run2") // When working with service over ip - client doesn't have access to physical devices } + hailo_deprecate_options(this, { std::make_shared(mode, "full", "full_sync"), + std::make_shared(mode, "raw", "raw_sync") }, false); + parse_complete_callback([this]() { validate_and_set_scheduling_algorithm(); + validate_mode_supports_service(); }); } @@ -578,6 +585,14 @@ bool Run2::is_ethernet_device() const return is_valid_ip(m_device_ids[0]); } +void Run2::validate_mode_supports_service() +{ + if (m_multi_process_service) { + PARSE_CHECK(((InferenceMode::FULL_SYNC == m_mode) || (InferenceMode::FULL_ASYNC == m_mode)), + "When running multi-process, only FULL_SYNC or FULL_ASYNC modes are allowed"); + } +} + void Run2::validate_and_set_scheduling_algorithm() { if (m_scheduling_algorithm == HAILO_SCHEDULING_ALGORITHM_NONE) { @@ -617,9 +632,9 @@ static hailo_status wait_for_threads(std::vector> & auto last_error_status = HAILO_SUCCESS; for (auto& thread : threads) { auto thread_status = thread->get(); - if ((HAILO_SUCCESS != thread_status) && (HAILO_STREAM_ABORTED_BY_USER != thread_status)) { + if ((HAILO_SUCCESS != thread_status) && (HAILO_STREAM_ABORT != thread_status)) { last_error_status = thread_status; - LOGGER__ERROR("Thread failed with with status {}", thread_status); + LOGGER__ERROR("Thread failed with status {}", thread_status); } } return last_error_status; @@ -628,12 +643,12 @@ static hailo_status wait_for_threads(std::vector> & std::string get_str_infer_mode(const InferenceMode& infer_mode) { switch(infer_mode){ - case InferenceMode::FULL: - return "full"; + case InferenceMode::FULL_SYNC: + return "full_sync"; case InferenceMode::FULL_ASYNC: return "full_async"; - case InferenceMode::RAW: - return "raw"; + case InferenceMode::RAW_SYNC: + return "raw_sync"; case InferenceMode::RAW_ASYNC: return "raw_async"; case InferenceMode::RAW_ASYNC_SINGLE_THREAD: @@ -682,8 +697,8 @@ Expected> Run2::create_vdevice() CHECK_AS_EXPECTED(!get_multi_process_service(), HAILO_INVALID_OPERATION, "Collecting runtime data is not supported with multi process service"); CHECK_AS_EXPECTED(get_device_count() == 1, HAILO_INVALID_OPERATION, "Collecting runtime data is not supported with multi device"); CHECK_AS_EXPECTED(!(get_measure_hw_latency() || get_measure_overall_latency()), HAILO_INVALID_OPERATION, "Latency measurement is not allowed when collecting runtime data"); - CHECK_AS_EXPECTED((get_mode() == InferenceMode::RAW) || (get_mode() == InferenceMode::RAW_ASYNC), HAILO_INVALID_OPERATION, - "'measure-fw-actions' is only supported with '--mode=raw'. Received mode: '{}'", get_str_infer_mode(get_mode())); + CHECK_AS_EXPECTED((get_mode() == InferenceMode::RAW_SYNC) || (get_mode() == InferenceMode::RAW_ASYNC), HAILO_INVALID_OPERATION, + "'measure-fw-actions' is only supported with '--mode=raw_sync' or '--mode=raw_async'. Received mode: '{}'", get_str_infer_mode(get_mode())); } vdevice_params.group_id = get_group_id().c_str(); @@ -725,6 +740,8 @@ Expected>> Run2::init_and_run_net_run auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event); + activation_barrier.arrive_and_wait(); + if (get_measure_power() || get_measure_current() || get_measure_temp()) { auto physical_devices = vdevice->get_physical_devices(); CHECK_EXPECTED(physical_devices); @@ -732,17 +749,12 @@ Expected>> Run2::init_and_run_net_run for (auto &device : physical_devices.value()) { auto measurement_live_track = MeasurementLiveTrack::create_shared(device.get(), get_measure_power(), get_measure_current(), get_measure_temp()); - if (HAILO_SUCCESS != measurement_live_track.status()) { - activation_barrier.terminate(); - } CHECK_EXPECTED(measurement_live_track); live_stats->add(measurement_live_track.release(), 2); } } - // TODO: wait for all nets before starting timer. start() should update TimerLiveTrack to start. or maybe append here but first in vector... - activation_barrier.arrive_and_wait(); CHECK_SUCCESS_AS_EXPECTED(live_stats->start()); auto status = shutdown_event->wait(get_time_to_run()); if (HAILO_TIMEOUT != status) { diff --git a/hailort/hailortcli/run_command.cpp b/hailort/hailortcli/run_command.cpp index 63edf7c8..55b62a19 100644 --- a/hailort/hailortcli/run_command.cpp +++ b/hailort/hailortcli/run_command.cpp @@ -401,7 +401,7 @@ hailo_status send_loop(const inference_runner_params ¶ms, SendObject &send_o auto status = send_object.write(MemoryView( const_cast(input_buffer->data()) + offset, send_object.get_frame_size())); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { LOGGER__DEBUG("Input stream was aborted!"); return status; } @@ -692,7 +692,7 @@ static hailo_status run_streaming_impl(std::shared_ptr c auto error_status = HAILO_SUCCESS; for (auto& result : results) { auto status = result->get(); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { continue; } if (HAILO_SUCCESS != status) { diff --git a/hailort/hailortcli/udp_rate_limiter_command.cpp b/hailort/hailortcli/udp_rate_limiter_command.cpp index b1978af6..bbcb0517 100644 --- a/hailort/hailortcli/udp_rate_limiter_command.cpp +++ b/hailort/hailortcli/udp_rate_limiter_command.cpp @@ -18,7 +18,7 @@ #define PORTS_COUNT (16) // Should be same as HW_PACKAGE__CORE_PKG__N_AXIS_IN UdpRateLimiterCommand::UdpRateLimiterCommand (CLI::App &parent_app) : - Command(parent_app.add_subcommand("udp-rate-limiter", "Limit UDP rate")) + Command(parent_app.add_subcommand("udp-rate-limiter", "Limit the UDP rate")) { m_set_command = m_app->add_subcommand("set", "Sets the udp rate limit"); m_set_command->add_option("--kbit-rate", m_rate_kbit_sec, "rate in Kbit/s") diff --git a/hailort/libhailort/CMakeLists.txt b/hailort/libhailort/CMakeLists.txt index b44ab0e3..89df1ba4 100644 --- a/hailort/libhailort/CMakeLists.txt +++ b/hailort/libhailort/CMakeLists.txt @@ -2,23 +2,24 @@ cmake_minimum_required(VERSION 3.0.0) # set(CMAKE_C_CLANG_TIDY "clang-tidy;-checks=*") set(HAILORT_MAJOR_VERSION 4) -set(HAILORT_MINOR_VERSION 16) -set(HAILORT_REVISION_VERSION 2) +set(HAILORT_MINOR_VERSION 17) +set(HAILORT_REVISION_VERSION 0) # Add the cmake folder so the modules there are found set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) # Generate hef-proto files using host protobuf -protobuf_generate_cpp(PROTO_HEF_SRC PROTO_HEF_HEADR hef.proto) +protobuf_generate_cpp(PROTO_HEF_SRC PROTO_HEF_HEADER hef.proto) +protobuf_generate_python(PROTO_HEF_PY hef.proto) # TODO (HRT-12504): Copy hef_pb2.py to tools directory -add_library(hef_proto ${PROTO_HEF_SRC} ${PROTO_HEF_HEADR}) +add_library(hef_proto ${PROTO_HEF_SRC} ${PROTO_HEF_HEADER} ${PROTO_HEF_PY}) target_link_libraries(hef_proto libprotobuf-lite) set_target_properties(hef_proto PROPERTIES CXX_STANDARD 14 GENERATED TRUE POSITION_INDEPENDENT_CODE ON) if(CMAKE_HOST_WIN32) # https://github.com/protocolbuffers/protobuf/tree/master/cmake#notes-on-compiler-warnings target_compile_options(hef_proto PRIVATE /wd4244) endif() -get_filename_component(PROTO_HEADER_DIRECTORY ${PROTO_HEF_HEADR} DIRECTORY) +get_filename_component(PROTO_HEADER_DIRECTORY ${PROTO_HEF_HEADER} DIRECTORY) target_include_directories(hef_proto PUBLIC $ diff --git a/hailort/libhailort/bindings/gstreamer/CMakeLists.txt b/hailort/libhailort/bindings/gstreamer/CMakeLists.txt index abe92849..b69addd1 100644 --- a/hailort/libhailort/bindings/gstreamer/CMakeLists.txt +++ b/hailort/libhailort/bindings/gstreamer/CMakeLists.txt @@ -8,7 +8,7 @@ if(NOT CMAKE_HOST_UNIX) message(FATAL_ERROR "Only unix hosts are supported, stopping build") endif() -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) # GST_PLUGIN_DEFINE needs PACKAGE to be defined set(GST_HAILO_PACKAGE_NAME "hailo") @@ -19,13 +19,14 @@ pkg_search_module(GLIB REQUIRED glib-2.0) pkg_search_module(GSTREAMER REQUIRED gstreamer-1.0) pkg_search_module(GSTREAMER_BASE REQUIRED gstreamer-base-1.0) pkg_search_module(GSTREAMER_VIDEO REQUIRED gstreamer-video-1.0) +pkg_search_module(GSTREAMER_PLUGINS_BASE REQUIRED gstreamer-plugins-base-1.0) add_library(gsthailo SHARED gst-hailo/gsthailoplugin.cpp + gst-hailo/sync_gsthailonet.cpp + gst-hailo/sync_gst_hailosend.cpp + gst-hailo/sync_gst_hailorecv.cpp gst-hailo/gsthailonet.cpp - gst-hailo/gsthailosend.cpp - gst-hailo/gsthailorecv.cpp - gst-hailo/gsthailonet2.cpp gst-hailo/gsthailodevicestats.cpp gst-hailo/common.cpp gst-hailo/network_group_handle.cpp @@ -51,7 +52,7 @@ target_compile_options(gsthailo PRIVATE -DPACKAGE="${GST_HAILO_PACKAGE_NAME}") target_include_directories(gsthailo PRIVATE ${GSTREAMER_VIDEO_INCLUDE_DIRS}) -target_link_libraries(gsthailo HailoRT::libhailort ${GSTREAMER_VIDEO_LDFLAGS}) +target_link_libraries(gsthailo HailoRT::libhailort ${GSTREAMER_VIDEO_LDFLAGS} -lgstallocators-1.0) install(TARGETS gsthailo LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp index d4c64216..e1508af7 100644 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Hailo Technologies Ltd. All rights reserved. + * Copyright (c) 2021-2023 Hailo Technologies Ltd. All rights reserved. * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt) * * This library is free software; you can redistribute it and/or @@ -18,498 +18,692 @@ * Boston, MA 02110-1301, USA. */ #include "gsthailonet.hpp" -#include "gsthailosend.hpp" -#include "gsthailorecv.hpp" -#include "hailo_events/hailo_events.hpp" -#include "metadata/hailo_buffer_flag_meta.hpp" +#include "metadata/tensor_meta.hpp" +#include "hailo/buffer.hpp" #include "hailo/hailort_common.hpp" #include "hailo/hailort_defaults.hpp" -#include +#include #include +#include -GST_DEBUG_CATEGORY_STATIC(gst_hailonet_debug_category); -#define GST_CAT_DEFAULT gst_hailonet_debug_category - -constexpr std::chrono::milliseconds WAIT_FOR_FLUSH_TIMEOUT_MS(1000); - -static void gst_hailonet_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec); -static void gst_hailonet_get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec); -static gboolean gst_hailorecv_src_pad_event(GstPad *pad, GstObject *parent, GstEvent *event); -static GstPadProbeReturn gst_hailonet_sink_probe(GstPad *pad, GstPadProbeInfo *info, gpointer user_data); -static GstStateChangeReturn gst_hailonet_change_state(GstElement *element, GstStateChange transition); -static void gst_hailonet_flush_callback(GstHailoNet *hailonet, gpointer data); -static void gst_hailonet_inner_queue_overrun_callback(GstElement *queue, gpointer udata); -static void gst_hailonet_inner_queue_underrun_callback(GstElement *queue, gpointer udata); +#define WAIT_FOR_ASYNC_READY_TIMEOUT (std::chrono::milliseconds(10000)) +#define ERROR(msg, ...) g_print(msg, ##__VA_ARGS__) enum { PROP_0, - PROP_DEBUG, - PROP_DEVICE_ID, PROP_HEF_PATH, - PROP_NETWORK_NAME, PROP_BATCH_SIZE, + PROP_DEVICE_ID, + PROP_DEVICE_COUNT, + PROP_VDEVICE_GROUP_ID, + PROP_IS_ACTIVE, PROP_OUTPUTS_MIN_POOL_SIZE, PROP_OUTPUTS_MAX_POOL_SIZE, - PROP_IS_ACTIVE, - PROP_DEVICE_COUNT, - PROP_VDEVICE_KEY, PROP_SCHEDULING_ALGORITHM, PROP_SCHEDULER_TIMEOUT_MS, PROP_SCHEDULER_THRESHOLD, PROP_SCHEDULER_PRIORITY, - PROP_MULTI_PROCESS_SERVICE, PROP_INPUT_FORMAT_TYPE, PROP_OUTPUT_FORMAT_TYPE, PROP_NMS_SCORE_THRESHOLD, PROP_NMS_IOU_THRESHOLD, PROP_NMS_MAX_PROPOSALS_PER_CLASS, + PROP_INPUT_FROM_META, + PROP_NO_TRANSFORM, + PROP_MULTI_PROCESS_SERVICE, + PROP_PASS_THROUGH, + PROP_FORCE_WRITABLE, + + // Deprecated + PROP_VDEVICE_KEY, }; -G_DEFINE_TYPE(GstHailoNet, gst_hailonet, GST_TYPE_BIN); +static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY); +static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY); -static void gst_hailonet_class_init(GstHailoNetClass *klass) +G_DEFINE_TYPE (GstHailoAllocator, gst_hailo_allocator, GST_TYPE_ALLOCATOR); +G_DEFINE_TYPE (GstHailoNet, gst_hailonet, GST_TYPE_ELEMENT); + +static std::atomic_uint32_t hailonet_count(0); + +static bool gst_hailo_should_use_dma_buffers() { - GObjectClass *gobject_class = G_OBJECT_CLASS(klass); - GstElementClass *element_class = GST_ELEMENT_CLASS(klass); + const char *env = g_getenv(GST_HAILO_USE_DMA_BUFFER_ENV_VAR); + return (nullptr != env) && (0 == g_strcmp0(env, "1")); +} - GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY); - gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&src_template)); +static GstMemory *gst_hailo_allocator_alloc(GstAllocator* allocator, gsize size, GstAllocationParams* /*params*/) { + GstHailoAllocator *hailo_allocator = GST_HAILO_ALLOCATOR(allocator); + auto buffer = Buffer::create(size, BufferStorageParams::create_dma()); + if (!buffer) { + ERROR("Creating buffer for allocator has failed, status = %d\n", buffer.status()); + return nullptr; + } - GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY); - gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&sink_template)); + GstMemory *memory = gst_memory_new_wrapped(static_cast(0), buffer->data(), + buffer->size(), 0, buffer->size(), nullptr, nullptr); + if (nullptr == memory) { + ERROR("Creating new GstMemory for allocator has failed!\n"); + return nullptr; + } - gst_element_class_set_static_metadata(element_class, - "hailonet element", "Hailo/Network", - "Configure and Activate Hailo Network. " - "Supports the \"flush\" signal which blocks until there are no buffers currently processesd in the element. " - "When deactivating a hailonet during runtime (via set_property of \"is-active\" to False), make sure that no frames are being pushed into the " - "hailonet, since this operation waits until there are no frames coming in.", - PLUGIN_AUTHOR); + hailo_allocator->buffers[memory] = std::move(buffer.release()); + return memory; +} - element_class->change_state = GST_DEBUG_FUNCPTR(gst_hailonet_change_state); - - gobject_class->set_property = gst_hailonet_set_property; - gobject_class->get_property = gst_hailonet_get_property; - g_object_class_install_property(gobject_class, PROP_DEBUG, - g_param_spec_boolean("debug", "Debug flag", "Should print debug information", false, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_DEVICE_ID, - g_param_spec_string("device-id", "Device ID", "Device ID ([]::., same as in lspci command). Excludes device-count.", NULL, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_DEVICE_COUNT, - g_param_spec_uint("device-count", "Number of devices to use", "Number of physical devices to use. Excludes device-id.", HAILO_DEFAULT_DEVICE_COUNT, - std::numeric_limits::max(), HAILO_DEFAULT_DEVICE_COUNT, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_VDEVICE_KEY, - g_param_spec_uint("vdevice-key", - "Indicate whether to re-use or re-create vdevice", - "Relevant only when 'device-count' is passed. If not passed, the created vdevice will be unique to this hailonet." \ - "if multiple hailonets share 'vdevice-key' and 'device-count', the created vdevice will be shared between those hailonets", - MIN_VALID_VDEVICE_KEY, std::numeric_limits::max(), MIN_VALID_VDEVICE_KEY, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_HEF_PATH, - g_param_spec_string("hef-path", "HEF Path Location", "Location of the HEF file to read", NULL, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_NETWORK_NAME, - g_param_spec_string("net-name", "Network Name", - "Configure and run this specific network. " - "If not passed, configure and run the default network - ONLY if there is one network in the HEF!", NULL, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_BATCH_SIZE, - g_param_spec_uint("batch-size", "Inference Batch", "How many frame to send in one batch", MIN_GSTREAMER_BATCH_SIZE, MAX_GSTREAMER_BATCH_SIZE, HAILO_DEFAULT_BATCH_SIZE, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_OUTPUTS_MIN_POOL_SIZE, - g_param_spec_uint("outputs-min-pool-size", "Outputs Minimun Pool Size", "The minimum amount of buffers to allocate for each output layer", - 0, std::numeric_limits::max(), DEFAULT_OUTPUTS_MIN_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_OUTPUTS_MAX_POOL_SIZE, - g_param_spec_uint("outputs-max-pool-size", "Outputs Maximum Pool Size", - "The maximum amount of buffers to allocate for each output layer or 0 for unlimited", 0, std::numeric_limits::max(), - DEFAULT_OUTPUTS_MAX_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_IS_ACTIVE, - g_param_spec_boolean("is-active", "Is Network Activated", "Controls whether this element should be active. " - "By default, the hailonet element will not be active unless it is the only one. " - "Setting this property in combination with 'scheduling-algorithm' different than HAILO_SCHEDULING_ALGORITHM_NONE is not supported.", false, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); +static void gst_hailo_allocator_free(GstAllocator* allocator, GstMemory *mem) { + GstHailoAllocator *hailo_allocator = GST_HAILO_ALLOCATOR(allocator); + hailo_allocator->buffers.erase(mem); +} - g_object_class_install_property(gobject_class, PROP_SCHEDULING_ALGORITHM, - g_param_spec_enum("scheduling-algorithm", "Scheduling policy for automatic network group switching", "Controls the Model Scheduler algorithm of HailoRT. " - "Gets values from the enum GstHailoSchedulingAlgorithms. " - "Using Model Scheduler algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE, excludes the property 'is-active'. " - "When using the same VDevice across multiple hailonets, all should have the same 'scheduling-algorithm'. ", - GST_TYPE_SCHEDULING_ALGORITHM, HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_SCHEDULER_TIMEOUT_MS, - g_param_spec_uint("scheduler-timeout-ms", "Timeout for for scheduler in ms", "The maximum time period that may pass before getting run time from the scheduler," - " as long as at least one send request has been sent.", - HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, std::numeric_limits::max(), HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_SCHEDULER_THRESHOLD, - g_param_spec_uint("scheduler-threshold", "Frames threshold for scheduler", "The minimum number of send requests required before the hailonet is considered ready to get run time from the scheduler.", - HAILO_DEFAULT_SCHEDULER_THRESHOLD, std::numeric_limits::max(), HAILO_DEFAULT_SCHEDULER_THRESHOLD, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_SCHEDULER_PRIORITY, - g_param_spec_uint("scheduler-priority", "Priority index for scheduler", "When the scheduler will choose the next hailonet to run, higher priority will be prioritized in the selection. " - "Bigger number represent higher priority", - HAILO_SCHEDULER_PRIORITY_MIN, HAILO_SCHEDULER_PRIORITY_MAX, HAILO_SCHEDULER_PRIORITY_NORMAL, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_MULTI_PROCESS_SERVICE, - g_param_spec_boolean("multi-process-service", "Should run over HailoRT service", "Controls wether to run HailoRT over its service. " - "To use this property, the service should be active and scheduling-algorithm should be set. Defaults to false.", - HAILO_DEFAULT_MULTI_PROCESS_SERVICE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_INPUT_FORMAT_TYPE, - g_param_spec_enum("input-format-type", "Input format type", "Input format type(auto, float32, uint16, uint8). Default value is auto." - "Gets values from the enum GstHailoFormatType. ", - GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_OUTPUT_FORMAT_TYPE, - g_param_spec_enum("output-format-type", "Output format type", "Output format type(auto, float32, uint16, uint8). Default value is auto." - "Gets values from the enum GstHailoFormatType. ", - GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_NMS_SCORE_THRESHOLD, - g_param_spec_float("nms-score-threshold", "NMS score threshold", "Threshold used for filtering out candidates. Any box with score::max(), 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); +static void gst_hailo_allocator_class_init(GstHailoAllocatorClass* klass) { + GstAllocatorClass* allocator_class = GST_ALLOCATOR_CLASS(klass); - // See information about the "flush" signal in the element description - g_signal_new( - "flush", - GST_TYPE_HAILONET, - G_SIGNAL_ACTION, - 0, nullptr, nullptr, nullptr, G_TYPE_NONE, 0 - ); + allocator_class->alloc = gst_hailo_allocator_alloc; + allocator_class->free = gst_hailo_allocator_free; } -std::string create_name(std::string prefix, uint32_t id) +static void gst_hailo_allocator_init(GstHailoAllocator* allocator) { + allocator->buffers = std::unordered_map(); +} + +static hailo_status gst_hailonet_deconfigure(GstHailoNet *self) { - return prefix + std::to_string(id); + // This will wakeup any blocking calls to deuque + for (auto &name_pool_pair : self->output_buffer_pools) { + gst_buffer_pool_set_flushing(name_pool_pair.second, TRUE); + } + + std::unique_lock lock(self->infer_mutex); + self->configured_infer_model.reset(); + self->is_configured = false; + return HAILO_SUCCESS; } -Expected> HailoNetImpl::create(GstHailoNet *element) +static hailo_status gst_hailonet_free(GstHailoNet *self) { - if (nullptr == element) { - return make_unexpected(HAILO_INVALID_ARGUMENT); + std::unique_lock lock(self->infer_mutex); + self->configured_infer_model.reset(); + self->infer_model.reset(); + self->vdevice.reset(); + + { + std::unique_lock lock(self->thread_queue_mutex); + self->is_thread_running = false; } + self->thread_cv.notify_all(); - auto hailosend_name = create_name("hailosend", HailoNetImpl::m_hailonet_count); - GstElement *hailosend = gst_element_factory_make("hailosend", hailosend_name.c_str()); - if (nullptr == hailosend) { - GST_ELEMENT_ERROR(element, RESOURCE, FAILED, ("Failed creating hailosend element in bin!"), (NULL)); - return make_unexpected(HAILO_INTERNAL_FAILURE); + if (self->thread.joinable()) { + self->thread.join(); } - g_object_set(hailosend, "qos", FALSE, NULL); + if (nullptr != self->input_queue) { + gst_queue_array_free(self->input_queue); + } - auto hailoqueue_name = create_name("hailoqueue", HailoNetImpl::m_hailonet_count); - GstElement *queue = gst_element_factory_make("queue", hailoqueue_name.c_str()); - if (nullptr == queue) { - GST_ELEMENT_ERROR(element, RESOURCE, FAILED, ("Failed creating queue element in bin!"), (NULL)); - gst_object_unref(hailosend); - return make_unexpected(HAILO_INTERNAL_FAILURE); + if (nullptr != self->thread_queue) { + gst_queue_array_free(self->thread_queue); } - // Passing 0 disables the features here - g_object_set(queue, "max-size-time", (guint64)0, NULL); - g_object_set(queue, "max-size-bytes", (guint)0, NULL); - g_signal_connect(queue, "overrun", G_CALLBACK(gst_hailonet_inner_queue_overrun_callback), nullptr); - g_signal_connect(queue, "underrun", G_CALLBACK(gst_hailonet_inner_queue_underrun_callback), nullptr); + if (nullptr != self->input_caps) { + gst_caps_unref(self->input_caps); + } - auto hailorecv_name = create_name("hailorecv", HailoNetImpl::m_hailonet_count); - GstElement *hailorecv = gst_element_factory_make("hailorecv", hailorecv_name.c_str()); - if (nullptr == hailorecv) { - GST_ELEMENT_ERROR(element, RESOURCE, FAILED, ("Failed creating hailorecv element in bin!"), (NULL)); - gst_object_unref(hailosend); - gst_object_unref(queue); - return make_unexpected(HAILO_INTERNAL_FAILURE); + for (auto &name_pool_pair : self->output_buffer_pools) { + gboolean result = gst_buffer_pool_set_active(name_pool_pair.second, FALSE); + CHECK(result, HAILO_INTERNAL_FAILURE, "Could not release buffer pool"); + gst_object_unref(name_pool_pair.second); + } + if (gst_hailo_should_use_dma_buffers()) { + gst_object_unref(self->dma_allocator); + } else { + gst_object_unref(self->allocator); } - g_object_set(hailorecv, "qos", FALSE, NULL); + self->props.free_strings(); - g_signal_connect(element, "flush", G_CALLBACK(gst_hailonet_flush_callback), nullptr); + return HAILO_SUCCESS; +} - auto was_flushed_event = Event::create_shared(Event::State::not_signalled); - GST_CHECK_EXPECTED(was_flushed_event, element, RESOURCE, "Failed allocating memory for event!"); +static hailo_status gst_hailonet_set_format_types(GstHailoNet *self, std::shared_ptr infer_model) +{ + if (self->props.m_input_format_type.was_changed()) { + for (const auto &input_name : infer_model->get_input_names()) { + auto input = infer_model->input(input_name); + CHECK_EXPECTED_AS_STATUS(input); - auto ptr = make_unique_nothrow(element, hailosend, queue, hailorecv, was_flushed_event.release()); - if (nullptr == ptr) { - return make_unexpected(HAILO_OUT_OF_HOST_MEMORY); + input->set_format_type(self->props.m_input_format_type.get()); + } } + if (self->props.m_output_format_type.was_changed()) { + for (const auto &output_name : infer_model->get_output_names()) { + auto output = infer_model->output(output_name); + CHECK_EXPECTED_AS_STATUS(output); - return ptr; + output->set_format_type(self->props.m_output_format_type.get()); + } + } + + return HAILO_SUCCESS; } -std::atomic_uint32_t HailoNetImpl::m_hailonet_count(0); -std::mutex HailoNetImpl::m_mutex; -HailoNetImpl::HailoNetImpl(GstHailoNet *element, GstElement *hailosend, GstElement *queue, GstElement *hailorecv, EventPtr was_flushed_event) : - m_element(element), m_props(), m_output_formats(), m_hailosend(hailosend), m_queue(queue), m_hailorecv(hailorecv), - m_net_group_handle(nullptr), m_was_configured(false), m_has_called_activate(false), - m_was_flushed_event(was_flushed_event), m_pool(nullptr) +static hailo_status gst_hailonet_set_nms_params(GstHailoNet *self, std::shared_ptr infer_model) { - GST_DEBUG_CATEGORY_INIT(gst_hailonet_debug_category, "hailonet", 0, "debug category for hailonet element"); + // Check that if one of the NMS params are changed, we have NMS outputs in the model + auto has_nms_output = std::any_of(infer_model->outputs().begin(), infer_model->outputs().end(), [](const auto &output) + { + return output.is_nms(); + }); + + for (const auto &output_name : infer_model->get_output_names()) { + auto output = infer_model->output(output_name); + CHECK_EXPECTED_AS_STATUS(output); - /* gst_bin_add_many cannot fail. I use this function because the elements are created here and does not come from the outside so, - * gst_bin_add will not fail */ - gst_bin_add_many(GST_BIN(m_element), m_hailosend, m_queue, m_hailorecv, NULL); - init_ghost_sink(); - init_ghost_src(); + if (self->props.m_nms_score_threshold.was_changed()) { + CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS score threshold is set, but there is no NMS output in this model."); + if (output->is_nms()) { + output->set_nms_score_threshold(self->props.m_nms_score_threshold.get()); + } + } + if (self->props.m_nms_iou_threshold.was_changed()) { + CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS IoU threshold is set, but there is no NMS output in this model."); + if (output->is_nms()) { + output->set_nms_iou_threshold(self->props.m_nms_iou_threshold.get()); + } + } + if (self->props.m_nms_max_proposals_per_class.was_changed()) { + CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS max proposals per class is set, but there is no NMS output in this model."); + if (output->is_nms()) { + output->set_nms_max_proposals_per_class(self->props.m_nms_max_proposals_per_class.get()); + } + } + } - ++m_hailonet_count; + return HAILO_SUCCESS; } -HailoNetImpl::~HailoNetImpl() +static hailo_status gst_hailonet_set_scheduler_params(GstHailoNet *self, std::shared_ptr configured_infer_model) { - if (nullptr != m_pool) { - (void)gst_buffer_pool_set_active(m_pool, FALSE); + if (self->props.m_scheduler_timeout_ms.was_changed()) { + auto millis = std::chrono::milliseconds(self->props.m_scheduler_timeout_ms.get()); + auto status = configured_infer_model->set_scheduler_timeout(millis); + CHECK_SUCCESS(status, "Setting scheduler timeout failed, status = %d", status); } + if (self->props.m_scheduler_threshold.was_changed()) { + auto status = configured_infer_model->set_scheduler_threshold(self->props.m_scheduler_threshold.get()); + CHECK_SUCCESS(status, "Setting scheduler threshold failed, status = %d", status); + } + if (self->props.m_scheduler_priority.was_changed()) { + auto status = configured_infer_model->set_scheduler_priority(self->props.m_scheduler_priority.get()); + CHECK_SUCCESS(status, "Setting scheduler priority failed, status = %d", status); + } + + return HAILO_SUCCESS; } -void HailoNetImpl::init_ghost_sink() +static Expected gst_hailonet_create_buffer_pool(GstHailoNet *self, size_t frame_size) { - GstPad *pad = gst_element_get_static_pad(m_hailosend, "sink"); + GstBufferPool *pool = gst_buffer_pool_new(); + + GstStructure *config = gst_buffer_pool_get_config(pool); + gst_buffer_pool_config_set_params(config, nullptr, static_cast(frame_size), self->props.m_outputs_min_pool_size.get(), + self->props.m_outputs_max_pool_size.get()); - GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY); - GstPadTemplate *pad_tmpl = gst_static_pad_template_get(&sink_template); + if (gst_hailo_should_use_dma_buffers()) { + gst_buffer_pool_config_set_allocator(config, self->dma_allocator, nullptr); + } else { + gst_buffer_pool_config_set_allocator(config, GST_ALLOCATOR(self->allocator), nullptr); + } - GstPad *ghost_pad = gst_ghost_pad_new_from_template("sink", pad, pad_tmpl); - gst_pad_set_active(ghost_pad, TRUE); - gst_element_add_pad(GST_ELEMENT(m_element), ghost_pad); + gboolean result = gst_buffer_pool_set_config(pool, config); + CHECK_AS_EXPECTED(result, HAILO_INTERNAL_FAILURE, "Could not set config buffer pool"); - gst_pad_add_probe(pad, GST_PAD_PROBE_TYPE_BUFFER, static_cast(gst_hailonet_sink_probe), nullptr, nullptr); + result = gst_buffer_pool_set_active(pool, TRUE); + CHECK_AS_EXPECTED(result, HAILO_INTERNAL_FAILURE, "Could not set buffer pool as active"); - gst_object_unref(pad_tmpl); - gst_object_unref(pad); + return pool; } -void HailoNetImpl::init_ghost_src() +static hailo_status gst_hailonet_configure(GstHailoNet *self) { - GstPad *pad = gst_element_get_static_pad(m_hailorecv, "src"); + if (self->is_configured) { + return HAILO_SUCCESS; + } + + for (auto &name_pool_pair : self->output_buffer_pools) { + gst_buffer_pool_set_flushing(name_pool_pair.second, FALSE); + } + + self->infer_model->set_batch_size(self->props.m_batch_size.get()); + + auto status = gst_hailonet_set_format_types(self, self->infer_model); + CHECK_SUCCESS(status); + + status = gst_hailonet_set_nms_params(self, self->infer_model); + CHECK_SUCCESS(status); + + // In RGB formats, Gstreamer is padding each row to 4. + for (const auto &input_name : self->infer_model->get_input_names()) { + if(self->props.m_no_transform.get()) { + // In case transformation is disabled - format order will be the same as we get from the HW (stream info). + auto input_stream_infos = self->infer_model->hef().get_stream_info_by_name(input_name, HAILO_H2D_STREAM); + CHECK_EXPECTED_AS_STATUS(input_stream_infos); + self->infer_model->input(input_name)->set_format_order(input_stream_infos.value().format.order); + } else if (self->infer_model->input(input_name)->format().order == HAILO_FORMAT_ORDER_NHWC) { + self->infer_model->input(input_name)->set_format_order(HAILO_FORMAT_ORDER_RGB4); + } + } + + if (self->props.m_no_transform.get()) { + for (const auto &output_name : self->infer_model->get_output_names()) { + // In case transformation is disabled - format order will be the same as we get from the HW (stream info). + auto output_stream_infos = self->infer_model->hef().get_stream_info_by_name(output_name, HAILO_D2H_STREAM); + CHECK_EXPECTED_AS_STATUS(output_stream_infos); + self->infer_model->output(output_name)->set_format_order(output_stream_infos.value().format.order); + } + } - GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY); - GstPadTemplate *pad_tmpl = gst_static_pad_template_get(&src_template); + auto configured_infer_model = self->infer_model->configure(); + CHECK_EXPECTED_AS_STATUS(configured_infer_model); - GstPad *ghost_pad = gst_ghost_pad_new_from_template("src", pad, pad_tmpl); - gst_pad_set_active(ghost_pad, TRUE); - gst_element_add_pad(GST_ELEMENT(m_element), ghost_pad); + auto ptr = make_shared_nothrow(configured_infer_model.release()); + CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY); + self->configured_infer_model = ptr; - gst_pad_set_event_function(pad, gst_hailorecv_src_pad_event); + status = gst_hailonet_set_scheduler_params(self, self->configured_infer_model); + CHECK_SUCCESS(status); - gst_object_unref(pad_tmpl); - gst_object_unref(pad); + self->is_configured = true; + return HAILO_SUCCESS; } -void HailoNetImpl::set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec) +static hailo_status gst_hailonet_allocate_infer_resources(GstHailoNet *self) { - GST_DEBUG_OBJECT(m_element, "set_property"); + auto bindings = self->configured_infer_model->create_bindings(); + CHECK_EXPECTED_AS_STATUS(bindings); + self->infer_bindings = std::move(bindings.release()); + + self->output_buffer_pools = std::unordered_map(); + self->output_vstream_infos = std::unordered_map(); + + auto async_queue_size = self->configured_infer_model->get_async_queue_size(); + CHECK_EXPECTED_AS_STATUS(async_queue_size); + self->input_queue = gst_queue_array_new(static_cast(async_queue_size.value())); + self->thread_queue = gst_queue_array_new(static_cast(async_queue_size.value())); + self->is_thread_running = true; + self->thread = std::thread([self] () { + while (self->is_thread_running) { + GstBuffer *buffer = nullptr; + { + std::unique_lock lock(self->thread_queue_mutex); + self->thread_cv.wait(lock, [self] () { + return (self->buffers_in_thread_queue > 0) || !self->is_thread_running; + }); + if (!self->is_thread_running) { + break; + } - if ((object == nullptr) || (value == nullptr) || (pspec == nullptr)) { - g_error("set_property got null parameter!"); - return; + buffer = static_cast(gst_queue_array_pop_head(self->thread_queue)); + self->buffers_in_thread_queue--; + } + self->thread_cv.notify_all(); + if (GST_IS_PAD(self->srcpad)) { // Checking because we fail here when exiting the application + GstFlowReturn ret = gst_pad_push(self->srcpad, buffer); + if ((GST_FLOW_OK != ret) && (GST_FLOW_FLUSHING != ret) && (!self->has_got_eos)) { + ERROR("gst_pad_push failed with status = %d\n", ret); + break; + } + } + } + }); + + for (auto &output : self->infer_model->outputs()) { + auto buffer_pool = gst_hailonet_create_buffer_pool(self, output.get_frame_size()); + CHECK_EXPECTED_AS_STATUS(buffer_pool); + + self->output_buffer_pools[output.name()] = buffer_pool.release(); } - switch (property_id) { - case PROP_DEBUG: + auto vstream_infos = self->infer_model->hef().get_output_vstream_infos(); + CHECK_EXPECTED_AS_STATUS(vstream_infos); + + for (const auto &vstream_info : vstream_infos.value()) { + self->output_vstream_infos[vstream_info.name] = vstream_info; + } + + return HAILO_SUCCESS; +} + +static GstStateChangeReturn gst_hailonet_change_state(GstElement *element, GstStateChange transition) +{ + GstStateChangeReturn ret = GST_ELEMENT_CLASS(gst_hailonet_parent_class)->change_state(element, transition); + if (GST_STATE_CHANGE_FAILURE == ret) { + return ret; + } + + GstHailoNet *self = GST_HAILONET(element); + std::unique_lock lock(self->sink_probe_change_state_mutex); + + switch (transition) { + case GST_STATE_CHANGE_PAUSED_TO_PLAYING: + { + auto status = gst_hailonet_configure(self); + if (HAILO_SUCCESS != status) { + return GST_STATE_CHANGE_FAILURE; + } + break; + } + case GST_STATE_CHANGE_PLAYING_TO_PAUSED: + { + auto status = gst_hailonet_deconfigure(self); + if (HAILO_SUCCESS != status) { + return GST_STATE_CHANGE_FAILURE; + } + break; + } + case GST_STATE_CHANGE_READY_TO_NULL: { - gboolean debug = g_value_get_boolean(value); - g_object_set(m_hailosend, "debug", debug, NULL); - g_object_set(m_hailorecv, "debug", debug, NULL); + auto status = gst_hailonet_free(self); + if (HAILO_SUCCESS != status) { + return GST_STATE_CHANGE_FAILURE; + } break; } + default: + break; + } + + return ret; +} + +static hailo_status gst_hailonet_toggle_activation(GstHailoNet *self, gboolean old_is_active, gboolean new_is_active) +{ + std::unique_lock lock(self->infer_mutex); + + if (self->props.m_scheduling_algorithm.was_changed() && (HAILO_SCHEDULING_ALGORITHM_NONE != self->props.m_scheduling_algorithm.get())) { + g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported."); + return HAILO_INVALID_OPERATION; + } + + if (self->has_called_activate) { + // Should we keep this? If the user changes the is-active property when we are not configured, it's his fault. + if (!self->is_configured) { + g_warning("Trying to change is-active property when network is not configured!"); + return HAILO_INVALID_OPERATION; + } + if (old_is_active && !new_is_active) { + self->configured_infer_model->deactivate(); + } else if (!old_is_active && new_is_active) { + auto status = self->configured_infer_model->activate(); + CHECK_SUCCESS(status); + } else { + g_warning("Trying to change is-active property from %d to %d", old_is_active, new_is_active); + } + } + + self->props.m_is_active = new_is_active; + return HAILO_SUCCESS; +} + +static void gst_hailonet_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec) +{ + GstHailoNet *self = GST_HAILONET(object); + switch (property_id) { + case PROP_HEF_PATH: + if (self->is_configured) { + g_warning("The network was already configured so changing the HEF path will not take place!"); + break; + } + if (nullptr != self->props.m_hef_path.get()) { + g_free(self->props.m_hef_path.get()); + } + self->props.m_hef_path = g_strdup(g_value_get_string(value)); + break; + case PROP_BATCH_SIZE: + if (self->is_configured) { + g_warning("The network was already configured so changing the batch size will not take place!"); + break; + } + self->props.m_batch_size = static_cast(g_value_get_uint(value)); + break; case PROP_DEVICE_ID: - if (0 != m_props.m_device_count.get()) { + if (0 != self->props.m_device_count.get()) { g_error("device-id and device-count excludes eachother. received device-id=%s, device-count=%d", - g_value_get_string(value), m_props.m_device_count.get()); + g_value_get_string(value), self->props.m_device_count.get()); break; } - if (m_was_configured) { + if (self->is_configured) { g_warning("The network was already configured so changing the device ID will not take place!"); break; } - if (nullptr != m_props.m_device_id.get()) { - g_free(m_props.m_device_id.get()); + if (nullptr != self->props.m_device_id.get()) { + g_free(self->props.m_device_id.get()); } - m_props.m_device_id = g_strdup(g_value_get_string(value)); + self->props.m_device_id = g_strdup(g_value_get_string(value)); break; case PROP_DEVICE_COUNT: - if (nullptr != m_props.m_device_id.get()) { + if (nullptr != self->props.m_device_id.get()) { g_error("device-id and device-count excludes eachother. received device-id=%s, device-count=%d", - m_props.m_device_id.get(), g_value_get_uint(value)); + self->props.m_device_id.get(), g_value_get_uint(value)); break; } - if (m_was_configured) { + if (self->is_configured) { g_warning("The network was already configured so changing the device count will not take place!"); break; } - m_props.m_device_count = static_cast(g_value_get_uint(value)); + self->props.m_device_count = static_cast(g_value_get_uint(value)); break; - case PROP_VDEVICE_KEY: - if (m_was_configured) { - g_warning("The network was already configured so changing the vdevice key will not take place!"); + case PROP_VDEVICE_GROUP_ID: + if (self->is_configured) { + g_warning("The network was already configured so changing the vdevice group ID will not take place!"); break; } - m_props.m_vdevice_key = static_cast(g_value_get_uint(value)); - break; - case PROP_HEF_PATH: - if (m_was_configured) { - g_warning("The network was already configured so changing the HEF path will not take place!"); - break; + if (nullptr != self->props.m_vdevice_group_id.get()) { + g_free(self->props.m_vdevice_group_id.get()); } - if (nullptr != m_props.m_hef_path.get()) { - g_free(m_props.m_hef_path.get()); - } - m_props.m_hef_path = g_strdup(g_value_get_string(value)); + self->props.m_vdevice_group_id = g_strdup(g_value_get_string(value)); break; - case PROP_NETWORK_NAME: - if (m_was_configured) { - g_warning("The network was already configured so changing the network name will not take place!"); - break; - } - if (nullptr != m_props.m_network_name.get()) { - g_free(m_props.m_network_name.get()); - } - m_props.m_network_name = g_strdup(g_value_get_string(value)); + case PROP_IS_ACTIVE: + (void)gst_hailonet_toggle_activation(self, self->props.m_is_active.get(), g_value_get_boolean(value)); break; - case PROP_BATCH_SIZE: - if (m_was_configured) { - g_warning("The network was already configured so changing the batch size will not take place!"); - break; - } - m_props.m_batch_size = static_cast(g_value_get_uint(value)); + case PROP_PASS_THROUGH: + self->props.m_pass_through = g_value_get_boolean(value); + break; + case PROP_FORCE_WRITABLE: + self->props.m_should_force_writable = g_value_get_boolean(value); break; case PROP_OUTPUTS_MIN_POOL_SIZE: - if (m_was_configured) { - g_warning("The network was already configured so changing the outputs minimum pool size will not take place!"); + if (self->is_configured) { + g_warning("The network has already been configured, the output's minimum pool size cannot be changed!"); break; } - g_object_set(m_hailorecv, "outputs-min-pool-size", g_value_get_uint(value), NULL); + self->props.m_outputs_min_pool_size = g_value_get_uint(value); break; case PROP_OUTPUTS_MAX_POOL_SIZE: - if (m_was_configured) { + if (self->is_configured) { g_warning("The network was already configured so changing the outputs maximum pool size will not take place!"); break; } - g_object_set(m_hailorecv, "outputs-max-pool-size", g_value_get_uint(value), NULL); - break; - case PROP_IS_ACTIVE: - { - gboolean new_is_active = g_value_get_boolean(value); - - if (m_props.m_scheduling_algorithm.was_changed() && (HAILO_SCHEDULING_ALGORITHM_NONE != m_props.m_scheduling_algorithm.get())) { - g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported."); - break; - } - - if (m_has_called_activate) { - if (m_props.m_is_active.get() && !new_is_active) { - // Setting this to false before deactivating to signal hailosend and hailorecv to stop inferring - m_props.m_is_active = false; - hailo_status status = deactivate_network_group(); - if (HAILO_SUCCESS != status) { - g_error("Deactivating network group failed, status = %d", status); - return; - } - } else if (!m_props.m_is_active.get() && new_is_active) { - hailo_status status = m_net_group_handle->activate_network_group(); - if (HAILO_SUCCESS != status) { - g_error("Failed activating network group, status = %d", status); - break; - } - m_props.m_is_active = true; - } else { - g_warning("Trying to change is-active property state from %d to %d", m_props.m_is_active.get(), new_is_active); - break; - } - } else { - m_props.m_is_active = new_is_active; - } + self->props.m_outputs_max_pool_size = g_value_get_uint(value); break; - } case PROP_SCHEDULING_ALGORITHM: - if (m_was_configured) { + if (self->is_configured) { g_warning("The network was already configured so changing the scheduling algorithm will not take place!"); break; } - if (m_props.m_is_active.was_changed() && (g_value_get_enum(value) != HAILO_SCHEDULING_ALGORITHM_NONE)) { + if (self->props.m_is_active.was_changed() && (g_value_get_enum(value) != HAILO_SCHEDULING_ALGORITHM_NONE)) { g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported."); break; } - m_props.m_scheduling_algorithm = static_cast(g_value_get_enum(value)); + self->props.m_scheduling_algorithm = static_cast(g_value_get_enum(value)); break; case PROP_SCHEDULER_TIMEOUT_MS: - if (m_was_configured) { + if (self->is_configured) { g_warning("The network was already configured so changing the scheduling timeout will not take place!"); break; } - if (m_props.m_is_active.was_changed()) { - g_error("scheduler usage (scheduler-timeout-ms) in combination with 'is-active' is not supported."); - break; - } - m_props.m_scheduler_timeout_ms = g_value_get_uint(value); + self->props.m_scheduler_timeout_ms = g_value_get_uint(value); break; case PROP_SCHEDULER_THRESHOLD: - if (m_was_configured) { + if (self->is_configured) { g_warning("The network was already configured so changing the scheduling threshold will not take place!"); break; } - if (m_props.m_is_active.was_changed()) { - g_error("scheduler usage (scheduler-threshold) in combination with 'is-active' is not supported."); - break; - } - m_props.m_scheduler_threshold = g_value_get_uint(value); + self->props.m_scheduler_threshold = g_value_get_uint(value); break; case PROP_SCHEDULER_PRIORITY: - if (m_was_configured) { + if (self->is_configured) { g_warning("The network was already configured so changing the scheduling priority will not take place!"); break; } - if (m_props.m_is_active.was_changed()) { - g_error("scheduler usage (scheduler-priority) in combination with 'is-active' is not supported."); - break; - } - m_props.m_scheduler_priority = static_cast(g_value_get_uint(value)); - break; - case PROP_MULTI_PROCESS_SERVICE: - if (m_was_configured) { - g_warning("The network was already configured so changing the multi-process-service property will not take place!"); - break; - } - m_props.m_multi_process_service = g_value_get_boolean(value); + self->props.m_scheduler_priority = static_cast(g_value_get_uint(value)); break; case PROP_INPUT_FORMAT_TYPE: - if (m_was_configured) { + if (self->is_configured) { g_warning("The network was already configured so changing the format type will not take place!"); break; } - m_props.m_input_format_type = static_cast(g_value_get_enum(value)); + self->props.m_input_format_type = static_cast(g_value_get_enum(value)); break; case PROP_OUTPUT_FORMAT_TYPE: - if (m_was_configured) { + if (self->is_configured) { g_warning("The network was already configured so changing the format type will not take place!"); break; } - m_props.m_output_format_type = static_cast(g_value_get_enum(value)); + self->props.m_output_format_type = static_cast(g_value_get_enum(value)); break; case PROP_NMS_SCORE_THRESHOLD: - if (m_was_configured) { + if (self->is_configured) { g_warning("The network was already configured so changing the score threshold will not take place!"); break; } - m_props.m_nms_score_threshold = static_cast(g_value_get_float(value)); + self->props.m_nms_score_threshold = static_cast(g_value_get_float(value)); break; case PROP_NMS_IOU_THRESHOLD: - if (m_was_configured) { + if (self->is_configured) { g_warning("The network was already configured so changing the IoU threshold will not take place!"); break; } - m_props.m_nms_iou_threshold = static_cast(g_value_get_float(value)); + self->props.m_nms_iou_threshold = static_cast(g_value_get_float(value)); break; case PROP_NMS_MAX_PROPOSALS_PER_CLASS: - if (m_was_configured) { + if (self->is_configured) { g_warning("The network was already configured so changing the max proposals per class will not take place!"); break; } - m_props.m_nms_max_proposals_per_class = static_cast(g_value_get_uint(value)); + self->props.m_nms_max_proposals_per_class = static_cast(g_value_get_uint(value)); + break; + case PROP_INPUT_FROM_META: + if (self->is_configured) { + g_warning("The network was already configured so changing the input method will not take place!"); + break; + } + self->props.m_input_from_meta = g_value_get_boolean(value); + break; + case PROP_NO_TRANSFORM: + if (self->is_configured) { + g_warning("The network was already configured so disabling the transformation will not take place!"); + } + self->props.m_no_transform = g_value_get_boolean(value); + break; + case PROP_MULTI_PROCESS_SERVICE: + if (self->is_configured) { + g_warning("The network was already configured so changing the multi-process-service property will not take place!"); + break; + } + self->props.m_multi_process_service = g_value_get_boolean(value); + break; + + // Deprecated + case PROP_VDEVICE_KEY: + if (self->is_configured) { + g_warning("The network was already configured so changing the vdevice key will not take place!"); + break; + } + self->props.m_vdevice_key = static_cast(g_value_get_uint(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec); + break; + } +} + +static void gst_hailonet_get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec) +{ + GstHailoNet *self = GST_HAILONET(object); + switch (property_id) { + case PROP_HEF_PATH: + g_value_set_string(value, self->props.m_hef_path.get()); + break; + case PROP_BATCH_SIZE: + g_value_set_uint(value, self->props.m_batch_size.get()); + break; + case PROP_DEVICE_ID: + g_value_set_string(value, self->props.m_device_id.get()); + break; + case PROP_DEVICE_COUNT: + g_value_set_uint(value, self->props.m_device_count.get()); + break; + case PROP_VDEVICE_GROUP_ID: + g_value_set_string(value, self->props.m_vdevice_group_id.get()); + break; + case PROP_IS_ACTIVE: + g_value_set_boolean(value, self->props.m_is_active.get()); + break; + case PROP_PASS_THROUGH: + g_value_set_boolean(value, self->props.m_pass_through.get()); + break; + case PROP_FORCE_WRITABLE: + g_value_set_boolean(value, self->props.m_should_force_writable.get()); + break; + case PROP_OUTPUTS_MIN_POOL_SIZE: + g_value_set_uint(value, self->props.m_outputs_min_pool_size.get()); + break; + case PROP_OUTPUTS_MAX_POOL_SIZE: + g_value_set_uint(value, self->props.m_outputs_max_pool_size.get()); + break; + case PROP_SCHEDULING_ALGORITHM: + g_value_set_enum(value, self->props.m_scheduling_algorithm.get()); + break; + case PROP_SCHEDULER_TIMEOUT_MS: + g_value_set_uint(value, self->props.m_scheduler_timeout_ms.get()); + break; + case PROP_SCHEDULER_THRESHOLD: + g_value_set_uint(value, self->props.m_scheduler_threshold.get()); + break; + case PROP_SCHEDULER_PRIORITY: + g_value_set_uint(value, self->props.m_scheduler_priority.get()); + break; + case PROP_INPUT_FORMAT_TYPE: + g_value_set_enum(value, self->props.m_input_format_type.get()); + break; + case PROP_OUTPUT_FORMAT_TYPE: + g_value_set_enum(value, self->props.m_output_format_type.get()); + break; + case PROP_NMS_SCORE_THRESHOLD: + g_value_set_float(value, self->props.m_nms_score_threshold.get()); + break; + case PROP_NMS_IOU_THRESHOLD: + g_value_set_float(value, self->props.m_nms_iou_threshold.get()); + break; + case PROP_NMS_MAX_PROPOSALS_PER_CLASS: + g_value_set_uint(value, self->props.m_nms_max_proposals_per_class.get()); + break; + case PROP_INPUT_FROM_META: + g_value_set_boolean(value, self->props.m_input_from_meta.get()); + break; + case PROP_NO_TRANSFORM: + g_value_set_boolean(value, self->props.m_no_transform.get()); + break; + case PROP_MULTI_PROCESS_SERVICE: + g_value_set_boolean(value, self->props.m_multi_process_service.get()); + break; + + // Deprecated + case PROP_VDEVICE_KEY: + g_value_set_uint(value, self->props.m_vdevice_key.get()); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec); @@ -517,476 +711,763 @@ void HailoNetImpl::set_property(GObject *object, guint property_id, const GValue } } -void HailoNetImpl::get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec) +static void gst_hailonet_class_init(GstHailoNetClass *klass) +{ + GObjectClass *gobject_class = G_OBJECT_CLASS(klass); + GstElementClass *element_class = GST_ELEMENT_CLASS(klass); + + gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&sink_template)); + gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&src_template)); + element_class->change_state = gst_hailonet_change_state; + + gst_element_class_set_static_metadata(element_class, + "hailonet element", "Hailo/Network", + "Configure and Activate Hailo Network. " + "Supports the \"flush\" signal which blocks until there are no buffers currently processesd in the element. " + "When deactivating a hailonet during runtime (via set_property of \"is-active\" to False), make sure that no frames are being pushed into the " + "hailonet, since this operation waits until there are no frames coming in.", + PLUGIN_AUTHOR); + + gobject_class->set_property = gst_hailonet_set_property; + gobject_class->get_property = gst_hailonet_get_property; + g_object_class_install_property(gobject_class, PROP_HEF_PATH, + g_param_spec_string("hef-path", "HEF Path Location", "Location of the HEF file to read", nullptr, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_BATCH_SIZE, + g_param_spec_uint("batch-size", "Inference Batch", "How many frame to send in one batch", + MIN_GSTREAMER_BATCH_SIZE, MAX_GSTREAMER_BATCH_SIZE, HAILO_DEFAULT_BATCH_SIZE, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_OUTPUTS_MIN_POOL_SIZE, + g_param_spec_uint("outputs-min-pool-size", "Outputs Minimun Pool Size", "The minimum amount of buffers to allocate for each output layer", + 0, std::numeric_limits::max(), MIN_OUTPUTS_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_OUTPUTS_MAX_POOL_SIZE, + g_param_spec_uint("outputs-max-pool-size", "Outputs Maximum Pool Size", + "The maximum amount of buffers to allocate for each output layer or 0 for unlimited", 0, std::numeric_limits::max(), + MAX_OUTPUTS_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + + g_object_class_install_property(gobject_class, PROP_DEVICE_ID, + g_param_spec_string("device-id", "Device ID", "Device ID ([]::., same as in lspci command). Excludes device-count.", NULL, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_DEVICE_COUNT, + g_param_spec_uint("device-count", "Number of devices to use", "Number of physical devices to use. Excludes device-id.", HAILO_DEFAULT_DEVICE_COUNT, + std::numeric_limits::max(), HAILO_DEFAULT_DEVICE_COUNT, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_VDEVICE_GROUP_ID, + g_param_spec_string("vdevice-group-id", + "VDevice Group ID to share vdevices across hailonets", + "Used to share VDevices across different hailonet instances", HAILO_DEFAULT_VDEVICE_GROUP_ID, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + + // TODO (HRT-12306): Change is-active behavior + g_object_class_install_property(gobject_class, PROP_IS_ACTIVE, + g_param_spec_boolean("is-active", "Is Network Activated", "Controls whether this element should be active. " + "By default, the hailonet element will not be active unless it is the only one. " + "Setting this property in combination with 'scheduling-algorithm' different than HAILO_SCHEDULING_ALGORITHM_NONE is not supported.", false, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_PASS_THROUGH, + g_param_spec_boolean("pass-through", "Is element pass-through", "Controls whether the element will perform inference or simply pass buffers through. " + "By default, the hailonet element will not be pass-through. " + "Setting this property to true disables inference, regardless of the scheduler settings.", false, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_FORCE_WRITABLE, + g_param_spec_boolean("force-writable", "Force writable", "Controls whether the element will force the input buffer to be writable. " + "We force the input to be writable with the function gst_buffer_make_writable, which in most cases will do a shallow copy of the buffer. " + "But in some cases (when the buffer is marked as not shared - see gst_buffer_copy documentation), it will do a deep copy." + "By default, the hailonet element will not force the input buffer to be writable and will raise an error when the buffer is read-only.", false, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + + g_object_class_install_property(gobject_class, PROP_SCHEDULING_ALGORITHM, + g_param_spec_enum("scheduling-algorithm", "Scheduling policy for automatic network group switching", "Controls the Model Scheduler algorithm of HailoRT. " + "Gets values from the enum GstHailoSchedulingAlgorithms. " + "Using Model Scheduler algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE, excludes the property 'is-active'. " + "When using the same VDevice across multiple hailonets, all should have the same 'scheduling-algorithm'. ", + GST_TYPE_SCHEDULING_ALGORITHM, HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_SCHEDULER_TIMEOUT_MS, + g_param_spec_uint("scheduler-timeout-ms", "Timeout for for scheduler in ms", "The maximum time period that may pass before getting run time from the scheduler," + " as long as at least one send request has been sent.", + HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, std::numeric_limits::max(), HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_SCHEDULER_THRESHOLD, + g_param_spec_uint("scheduler-threshold", "Frames threshold for scheduler", "The minimum number of send requests required before the hailonet is considered ready to get run time from the scheduler.", + HAILO_DEFAULT_SCHEDULER_THRESHOLD, std::numeric_limits::max(), HAILO_DEFAULT_SCHEDULER_THRESHOLD, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_SCHEDULER_PRIORITY, + g_param_spec_uint("scheduler-priority", "Priority index for scheduler", "When the scheduler will choose the next hailonet to run, higher priority will be prioritized in the selection. " + "Bigger number represent higher priority", + HAILO_SCHEDULER_PRIORITY_MIN, HAILO_SCHEDULER_PRIORITY_MAX, HAILO_SCHEDULER_PRIORITY_NORMAL, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + + g_object_class_install_property(gobject_class, PROP_INPUT_FORMAT_TYPE, + g_param_spec_enum("input-format-type", "Input format type", "Input format type(auto, float32, uint16, uint8). Default value is auto." + "Gets values from the enum GstHailoFormatType. ", + GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_OUTPUT_FORMAT_TYPE, + g_param_spec_enum("output-format-type", "Output format type", "Output format type(auto, float32, uint16, uint8). Default value is auto." + "Gets values from the enum GstHailoFormatType. ", + GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_INPUT_FROM_META, + g_param_spec_boolean("input-from-meta", "Enable input from meta", "Take network input from metadata instead of video frame.", false, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_NO_TRANSFORM, + g_param_spec_boolean("no-transform", "Disable transformations", "Format will remain the same as the HW format.", false, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + + g_object_class_install_property(gobject_class, PROP_NMS_SCORE_THRESHOLD, + g_param_spec_float("nms-score-threshold", "NMS score threshold", "Threshold used for filtering out candidates. Any box with score::max(), 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + + g_object_class_install_property(gobject_class, PROP_MULTI_PROCESS_SERVICE, + g_param_spec_boolean("multi-process-service", "Should run over HailoRT service", "Controls wether to run HailoRT over its service. " + "To use this property, the service should be active and scheduling-algorithm should be set. Defaults to false.", + HAILO_DEFAULT_MULTI_PROCESS_SERVICE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + + // Deprecated + g_object_class_install_property(gobject_class, PROP_VDEVICE_KEY, + g_param_spec_uint("vdevice-key", + "Deprecated: Indicate whether to re-use or re-create vdevice", + "Deprecated: Use vdevice-group-id instead. Relevant only when 'device-count' is passed. If not passed, the created vdevice will be unique to this hailonet." \ + "if multiple hailonets share 'vdevice-key' and 'device-count', the created vdevice will be shared between those hailonets", + MIN_VALID_VDEVICE_KEY, std::numeric_limits::max(), MIN_VALID_VDEVICE_KEY, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + + // See information about the "flush" signal in the element description + g_signal_new( + "flush", + GST_TYPE_HAILONET, + G_SIGNAL_ACTION, + 0, nullptr, nullptr, nullptr, G_TYPE_NONE, 0 + ); +} + +static void gst_hailonet_push_buffer_to_thread(GstHailoNet *self, GstBuffer *buffer) { - GST_DEBUG_OBJECT(m_element, "get_property"); - - if ((object == nullptr) || (value == nullptr) || (pspec == nullptr)) { - g_error("get_property got null parameter!"); - return; + { + std::unique_lock lock(self->thread_queue_mutex); + self->thread_cv.wait(lock, [self] () { + bool is_unlimited_pool_not_empty = (self->props.m_outputs_max_pool_size.get() == 0) && (self->buffers_in_thread_queue < MAX_OUTPUTS_POOL_SIZE); + bool is_pool_empty = self->buffers_in_thread_queue < self->props.m_outputs_max_pool_size.get(); + return is_unlimited_pool_not_empty || is_pool_empty; + }); + gst_queue_array_push_tail(self->thread_queue, buffer); + self->buffers_in_thread_queue++; } + self->thread_cv.notify_all(); +} - switch (property_id) { - case PROP_DEBUG: - { - gboolean debug; - g_object_get(m_hailosend, "debug", &debug, nullptr); - g_value_set_boolean(value, debug); - break; +// TODO: This function should be refactored. It does many unrelated things and the user need to know that he should unmap the buffer +// in case of an error. AND it does not print errors nor return an indicative status (also the comments are confusing - "continue"?) +static bool set_infos(GstParentBufferMeta *parent_buffer_meta, hailo_vstream_info_t &vstream_info, GstMapInfo &info) +{ + gboolean map_succeeded = gst_buffer_map(parent_buffer_meta->buffer, &info, GST_MAP_READ); + if (!map_succeeded) { + // Failed to map, this buffer might not have a GstHailoTensorMeta, continue + return false; } - case PROP_DEVICE_ID: - g_value_set_string(value, m_props.m_device_id.get()); - break; - case PROP_DEVICE_COUNT: - g_value_set_uint(value, m_props.m_device_count.get()); - break; - case PROP_VDEVICE_KEY: - g_value_set_uint(value, m_props.m_vdevice_key.get()); - break; - case PROP_HEF_PATH: - g_value_set_string(value, m_props.m_hef_path.get()); - break; - case PROP_NETWORK_NAME: - g_value_set_string(value, m_props.m_network_name.get()); - break; - case PROP_BATCH_SIZE: - g_value_set_uint(value, m_props.m_batch_size.get()); - break; - case PROP_OUTPUTS_MIN_POOL_SIZE: - { - guint outputs_min_pool_size; - g_object_get(m_hailorecv, "outputs-min-pool-size", &outputs_min_pool_size, nullptr); - g_value_set_uint(value, outputs_min_pool_size); - break; + GstHailoTensorMeta *tensor_meta = GST_TENSOR_META_GET(parent_buffer_meta->buffer); + if (!tensor_meta) { + // Not a tensor meta (this buffer is not a tensor), unmap and continue + gst_buffer_unmap(parent_buffer_meta->buffer, &info); + return false; } - case PROP_OUTPUTS_MAX_POOL_SIZE: - { - guint outputs_max_pool_size; - g_object_get(m_hailorecv, "outputs-max-pool-size", &outputs_max_pool_size, nullptr); - g_value_set_uint(value, outputs_max_pool_size); - break; + vstream_info = tensor_meta->info; + return true; +} + +static Expected> gst_hailonet_read_input_dma_buffers_from_meta(GstHailoNet *self, GstBuffer *buffer) +{ + std::unordered_map input_buffer_metas; + gpointer state = NULL; + GstMeta *meta; + + while ((meta = gst_buffer_iterate_meta_filtered(buffer, &state, GST_PARENT_BUFFER_META_API_TYPE))) { + GstParentBufferMeta *parent_buffer_meta = reinterpret_cast(meta); + GstMapInfo info; + hailo_vstream_info_t vstream_info; + bool result = set_infos(parent_buffer_meta, vstream_info, info); + if (result) { + CHECK_AS_EXPECTED(gst_is_dmabuf_memory(info.memory), HAILO_INTERNAL_FAILURE, "GstMemory is not a DMA buf as expected!"); + + int fd = gst_fd_memory_get_fd(info.memory); + CHECK_AS_EXPECTED(fd != -1, HAILO_INTERNAL_FAILURE, "Failed to get FD from GstMemory!"); + + hailo_dma_buffer_t dma_buffer = {fd, info.size}; + input_buffer_metas[vstream_info.name] = dma_buffer; + gst_buffer_unmap(parent_buffer_meta->buffer, &info); + } } - case PROP_IS_ACTIVE: - g_value_set_boolean(value, m_props.m_is_active.get()); - break; - case PROP_SCHEDULING_ALGORITHM: - g_value_set_enum(value, m_props.m_scheduling_algorithm.get()); - break; - case PROP_SCHEDULER_TIMEOUT_MS: - g_value_set_uint(value, m_props.m_scheduler_timeout_ms.get()); - break; - case PROP_SCHEDULER_THRESHOLD: - g_value_set_uint(value, m_props.m_scheduler_threshold.get()); - break; - case PROP_SCHEDULER_PRIORITY: - g_value_set_uint(value, m_props.m_scheduler_priority.get()); - break; - case PROP_MULTI_PROCESS_SERVICE: - g_value_set_boolean(value, m_props.m_multi_process_service.get()); - break; - case PROP_INPUT_FORMAT_TYPE: - g_value_set_enum(value, m_props.m_input_format_type.get()); - break; - case PROP_OUTPUT_FORMAT_TYPE: - g_value_set_enum(value, m_props.m_output_format_type.get()); - break; - case PROP_NMS_SCORE_THRESHOLD: - g_value_set_float(value, m_props.m_nms_score_threshold.get()); - break; - case PROP_NMS_IOU_THRESHOLD: - g_value_set_float(value, m_props.m_nms_iou_threshold.get()); - break; - case PROP_NMS_MAX_PROPOSALS_PER_CLASS: - g_value_set_uint(value, m_props.m_nms_max_proposals_per_class.get()); - break; - default: - G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec); - break; + CHECK_AS_EXPECTED(!input_buffer_metas.empty(),HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer!"); + + for (auto &input : self->infer_model->inputs()) { + CHECK_AS_EXPECTED(input_buffer_metas.find(input.name()) != input_buffer_metas.end(), + HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer for input: %s", input.name().c_str()); } + + return input_buffer_metas; } -hailo_status HailoNetImpl::set_hef() +static hailo_status gst_hailonet_fill_multiple_input_bindings_dma_buffers(GstHailoNet *self, GstBuffer *buffer) { - m_net_group_handle = make_unique_nothrow(GST_ELEMENT(m_element)); - GST_CHECK(nullptr != m_net_group_handle, HAILO_OUT_OF_HOST_MEMORY, m_element, RESOURCE, "Failed allocating memory for network handle!"); - - hailo_status status = m_net_group_handle->set_hef(m_props.m_device_id.get(), m_props.m_device_count.get(), - m_props.m_vdevice_key.get(), m_props.m_scheduling_algorithm.get(), static_cast(m_props.m_multi_process_service.get()), - m_props.m_hef_path.get()); - if (HAILO_SUCCESS != status) { - return status; + auto input_buffers = gst_hailonet_read_input_dma_buffers_from_meta(self, buffer); + CHECK_EXPECTED_AS_STATUS(input_buffers); + for (const auto &name : self->infer_model->get_input_names()) + { + auto status = self->infer_bindings.input(name)->set_dma_buffer(input_buffers.value().at(name)); + CHECK_SUCCESS(status); } - if (m_props.m_multi_process_service.get()) { - GST_CHECK(m_props.m_scheduling_algorithm.get() != HAILO_SCHEDULING_ALGORITHM_NONE, - HAILO_INVALID_OPERATION, m_element, RESOURCE, "To use multi-process-service please set scheduling-algorithm."); + return HAILO_SUCCESS; +} + +static Expected> gst_hailonet_read_input_buffers_from_meta(GstHailoNet *self, GstBuffer *buffer) +{ + std::unordered_map input_buffer_metas; + gpointer state = NULL; + GstMeta *meta; + + while ((meta = gst_buffer_iterate_meta_filtered(buffer, &state, GST_PARENT_BUFFER_META_API_TYPE))) { + GstParentBufferMeta *parent_buffer_meta = reinterpret_cast(meta); + GstMapInfo info; + hailo_vstream_info_t vstream_info; + bool result = set_infos(parent_buffer_meta, vstream_info, info); + if (result) { + input_buffer_metas[vstream_info.name] = static_cast(info.data); + gst_buffer_unmap(parent_buffer_meta->buffer, &info); + } } + CHECK_AS_EXPECTED(!input_buffer_metas.empty(),HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer!"); - if (nullptr == m_props.m_network_name.get()) { - // TODO: HRT-4957 - GST_CHECK(m_net_group_handle->hef()->get_network_groups_names().size() == 1, HAILO_INVALID_ARGUMENT, m_element, RESOURCE, - "Network group has to be specified when there are more than one network groups in the HEF!"); - auto network_group_name = m_net_group_handle->hef()->get_network_groups_names()[0]; + for (auto &input : self->infer_model->inputs()) { + CHECK_AS_EXPECTED(input_buffer_metas.find(input.name()) != input_buffer_metas.end(), + HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer for input: %s", input.name().c_str()); + } - auto networks_infos = m_net_group_handle->hef()->get_network_infos(network_group_name.c_str()); - GST_CHECK_EXPECTED_AS_STATUS(networks_infos, m_element, RESOURCE, "Getting network infos from network group name was failed, status %d", networks_infos.status()); - GST_CHECK(networks_infos.value().size() == 1, HAILO_INVALID_ARGUMENT, m_element, RESOURCE, - "Network has to be specified when there are more than one network in the network group!"); + return input_buffer_metas; +} - std::string default_ng_name = HailoRTDefaults::get_network_name(network_group_name); - m_props.m_network_name = g_strdup(default_ng_name.c_str()); +static hailo_status gst_hailonet_fill_multiple_input_bindings(GstHailoNet *self, GstBuffer *buffer) +{ + auto input_buffers = gst_hailonet_read_input_buffers_from_meta(self, buffer); + CHECK_EXPECTED_AS_STATUS(input_buffers); + for (const auto &name : self->infer_model->get_input_names()) { + auto status = self->infer_bindings.input(name)->set_buffer(MemoryView(input_buffers.value().at(name), + self->infer_model->input(name)->get_frame_size())); + CHECK_SUCCESS(status); } - auto input_vstream_infos = m_net_group_handle->hef()->get_input_vstream_infos(m_props.m_network_name.get()); - GST_CHECK_EXPECTED_AS_STATUS(input_vstream_infos, m_element, RESOURCE, "Getting input vstream infos from HEF has failed, status = %d", - input_vstream_infos.status()); + return HAILO_SUCCESS; +} - // TODO: HRT-4095 - GST_CHECK(1 == input_vstream_infos->size(), HAILO_INVALID_OPERATION, m_element, RESOURCE, "hailonet element supports only HEFs with one input for now!"); +static hailo_status gst_hailonet_push_buffer_to_input_queue(GstHailoNet *self, GstBuffer *buffer) +{ + std::unique_lock lock(self->input_queue_mutex); + gst_queue_array_push_tail(self->input_queue, buffer); - auto input_vstream_info = input_vstream_infos.value()[0]; - GST_HAILOSEND(m_hailosend)->impl->set_input_vstream_infos(input_vstream_infos.release()); - GST_HAILOSEND(m_hailosend)->impl->set_batch_size(m_props.m_batch_size.get()); + return HAILO_SUCCESS; +} - GstBufferPool *pool = gst_buffer_pool_new(); - GstStructure *config = gst_buffer_pool_get_config(pool); +Expected> gst_hailonet_fill_output_bindings(GstHailoNet *self) +{ + std::unordered_map tensors; + for (auto &output : self->infer_model->outputs()) { + GstBuffer *output_buffer = nullptr; + GstFlowReturn flow_result = gst_buffer_pool_acquire_buffer(self->output_buffer_pools[output.name()], &output_buffer, nullptr); + if (GST_FLOW_FLUSHING == flow_result) { + return make_unexpected(HAILO_STREAM_ABORT); + } else { + CHECK_AS_EXPECTED(GST_FLOW_OK == flow_result, HAILO_INTERNAL_FAILURE, "Acquire buffer failed! flow status = %d", flow_result); + } - auto frame_size = HailoRTCommon::get_frame_size(input_vstream_info, input_vstream_info.format); - gst_buffer_pool_config_set_params(config, nullptr, frame_size, 1, 1); + GstMapInfo buffer_info; + gboolean result = gst_buffer_map(output_buffer, &buffer_info, GST_MAP_WRITE); + CHECK_AS_EXPECTED(result, HAILO_INTERNAL_FAILURE, "Failed mapping buffer!"); - gboolean result = gst_buffer_pool_set_config(pool, config); - GST_CHECK(result, HAILO_INTERNAL_FAILURE, m_element, RESOURCE, "Could not set config buffer pool"); + if (gst_hailo_should_use_dma_buffers()) { + CHECK_AS_EXPECTED(gst_is_dmabuf_memory(buffer_info.memory), HAILO_INTERNAL_FAILURE, "GstMemory is not a DMA buf as expected!"); - result = gst_buffer_pool_set_active(pool, TRUE); - GST_CHECK(result, HAILO_INTERNAL_FAILURE, m_element, RESOURCE, "Could not set buffer pool active"); + int fd = gst_fd_memory_get_fd(buffer_info.memory); + CHECK_AS_EXPECTED(fd != -1, HAILO_INTERNAL_FAILURE, "Failed to get FD from GstMemory!"); - m_pool = pool; + hailo_dma_buffer_t dma_buffer = {fd, buffer_info.size}; + auto status = self->infer_bindings.output(output.name())->set_dma_buffer(dma_buffer); + CHECK_SUCCESS_AS_EXPECTED(status); + } else { + auto status = self->infer_bindings.output(output.name())->set_buffer(MemoryView(buffer_info.data, buffer_info.size)); + CHECK_SUCCESS_AS_EXPECTED(status); + } - return HAILO_SUCCESS; + tensors[output.name()] = {output_buffer, buffer_info}; + } + return tensors; } -hailo_status HailoNetImpl::configure_network_group() +static hailo_status gst_hailonet_fill_single_input_binding(GstHailoNet *self, hailo_pix_buffer_t pix_buffer) { - std::unique_lock lock(m_mutex); - g_object_set(m_queue, "max-size-buffers", MAX_BUFFER_COUNT(m_props.m_batch_size.get()), NULL); + auto status = self->infer_bindings.input()->set_pix_buffer(pix_buffer); + CHECK_SUCCESS(status); - auto network_group_name = get_network_group_name(m_props.m_network_name.get()); - GST_CHECK_EXPECTED_AS_STATUS(network_group_name, m_element, RESOURCE, "Could not get network group name from name %s, status = %d", - m_props.m_network_name.get(), network_group_name.status()); + return HAILO_SUCCESS; +} - hailo_status status = m_net_group_handle->configure_network_group(network_group_name->c_str(), m_props.m_scheduling_algorithm.get(), m_props.m_batch_size.get()); - if (HAILO_SUCCESS != status) { - return status; - } - m_was_configured = true; +static hailo_status gst_hailonet_call_run_async(GstHailoNet *self, const std::unordered_map &tensors) +{ + auto status = self->configured_infer_model->wait_for_async_ready(WAIT_FOR_ASYNC_READY_TIMEOUT); + CHECK_SUCCESS(status); - if (m_props.m_scheduler_timeout_ms.was_changed()) { - status = m_net_group_handle->set_scheduler_timeout(m_props.m_network_name.get(), m_props.m_scheduler_timeout_ms.get()); - GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler timeout failed, status = %d", status); - } - if (m_props.m_scheduler_threshold.was_changed()) { - status = m_net_group_handle->set_scheduler_threshold(m_props.m_network_name.get(), m_props.m_scheduler_threshold.get()); - GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler threshold failed, status = %d", status); - } - if (m_props.m_scheduler_priority.was_changed()) { - status = m_net_group_handle->set_scheduler_priority(m_props.m_network_name.get(), m_props.m_scheduler_priority.get()); - GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler priority failed, status = %d", status); + { + std::unique_lock lock(self->flush_mutex); + self->ongoing_frames++; } - auto vstreams = m_net_group_handle->create_vstreams(m_props.m_network_name.get(), m_props.m_scheduling_algorithm.get(), m_output_formats, - m_props.m_input_format_type.get(), m_props.m_output_format_type.get()); - GST_CHECK_EXPECTED_AS_STATUS(vstreams, m_element, RESOURCE, "Creating vstreams failed, status = %d", status); + auto job = self->configured_infer_model->run_async(self->infer_bindings, [self, tensors] (const AsyncInferCompletionInfo &/*completion_info*/) { + GstBuffer *buffer = nullptr; + { + std::unique_lock lock(self->input_queue_mutex); + buffer = static_cast(gst_queue_array_pop_head(self->input_queue)); + } - GST_HAILOSEND(m_hailosend)->impl->set_input_vstreams(std::move(vstreams->first)); + for (auto &output : self->infer_model->outputs()) { + auto info = tensors.at(output.name()); + gst_buffer_unmap(info.buffer, &info.buffer_info); - // Check that if one of the NMS params are changed, we have NMS outputs in the model - auto has_nms_output = std::any_of(vstreams->second.begin(), vstreams->second.end(), [](const auto &vs) - { - return HailoRTCommon::is_nms(vs.get_info()); - }); + GstHailoTensorMeta *buffer_meta = GST_TENSOR_META_ADD(info.buffer); + buffer_meta->info = self->output_vstream_infos[output.name()]; - for (auto &out_vs : vstreams->second) { - if (m_props.m_nms_score_threshold.was_changed()) { - GST_CHECK(has_nms_output, HAILO_INVALID_OPERATION, m_element, RESOURCE, "NMS score threshold is set, but there is no NMS output in this model."); - if (HailoRTCommon::is_nms(out_vs.get_info())) { - status = out_vs.set_nms_score_threshold(m_props.m_nms_score_threshold.get()); - GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting NMS score threshold failed, status = %d", status); - } - } - if (m_props.m_nms_iou_threshold.was_changed()) { - GST_CHECK(has_nms_output, HAILO_INVALID_OPERATION, m_element, RESOURCE, "NMS IoU threshold is set, but there is no NMS output in this model."); - if (HailoRTCommon::is_nms(out_vs.get_info())) { - status = out_vs.set_nms_iou_threshold(m_props.m_nms_iou_threshold.get()); - GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting NMS IoU threshold failed, status = %d", status); - } + (void)gst_buffer_add_parent_buffer_meta(buffer, info.buffer); + gst_buffer_unref(info.buffer); } - if (m_props.m_nms_max_proposals_per_class.was_changed()) { - GST_CHECK(has_nms_output, HAILO_INVALID_OPERATION, m_element, RESOURCE, "NMS max proposals per class is set, but there is no NMS output in this model."); - if (HailoRTCommon::is_nms(out_vs.get_info())) { - status = out_vs.set_nms_max_proposals_per_class(m_props.m_nms_max_proposals_per_class.get()); - GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting NMS max proposals per class failed, status = %d", status); - } + + { + std::unique_lock lock(self->flush_mutex); + self->ongoing_frames--; } - } + self->flush_cv.notify_all(); - status = GST_HAILORECV(m_hailorecv)->impl->set_output_vstreams(std::move(vstreams->second), m_props.m_batch_size.get()); - GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting output vstreams failed, status = %d", status); + gst_hailonet_push_buffer_to_thread(self, buffer); + }); + CHECK_EXPECTED_AS_STATUS(job); + job->detach(); return HAILO_SUCCESS; } -hailo_status HailoNetImpl::activate_hailonet() +static hailo_status gst_hailonet_async_infer_multi_input(GstHailoNet *self, GstBuffer *buffer) { - if (HAILO_SCHEDULING_ALGORITHM_NONE != m_props.m_scheduling_algorithm.get()) { - m_props.m_is_active = true; - return HAILO_SUCCESS; + if (gst_hailo_should_use_dma_buffers()) { + auto status = gst_hailonet_fill_multiple_input_bindings_dma_buffers(self, buffer); + CHECK_SUCCESS(status); + } else { + auto status = gst_hailonet_fill_multiple_input_bindings(self, buffer); + CHECK_SUCCESS(status); } - if ((1 == m_hailonet_count) && (!m_props.m_is_active.was_changed())) { - m_props.m_is_active = true; - } + auto status = gst_hailonet_push_buffer_to_input_queue(self, buffer); + CHECK_SUCCESS(status); - if (m_props.m_is_active.get()) { - std::unique_lock lock(m_mutex); - hailo_status status = m_net_group_handle->activate_network_group(); - if (HAILO_SUCCESS != status) { - return status; - } + auto tensors = gst_hailonet_fill_output_bindings(self); + if (HAILO_STREAM_ABORT == tensors.status()) { + return HAILO_SUCCESS; } + CHECK_EXPECTED_AS_STATUS(tensors); - m_has_called_activate = true; - + status = gst_hailonet_call_run_async(self, tensors.value()); + CHECK_SUCCESS(status); return HAILO_SUCCESS; } -Expected HailoNetImpl::get_network_group_name(const std::string &network_name) +static hailo_status gst_hailonet_async_infer_single_input(GstHailoNet *self, GstBuffer * buffer, hailo_pix_buffer_t pix_buffer) { - for (const auto &network_group_name : m_net_group_handle->hef()->get_network_groups_names()) { - // Look for network_group with the given name - if ((network_name == network_group_name) || (network_name == HailoRTDefaults::get_network_name(network_group_name))) { - return std::string(network_group_name); - } + auto status = gst_hailonet_fill_single_input_binding(self, pix_buffer); + CHECK_SUCCESS(status); - auto network_infos = m_net_group_handle->hef()->get_network_infos(network_group_name); - GST_CHECK_EXPECTED(network_infos, m_element, RESOURCE, "Could not get network infos of group %s, status = %d", network_group_name.c_str(), - network_infos.status()); + status = gst_hailonet_push_buffer_to_input_queue(self, buffer); + CHECK_SUCCESS(status); - // Look for network with the given name - for (const auto &network_info : network_infos.value()) { - if (network_name == network_info.name) { - return std::string(network_group_name); - } - } + auto tensors = gst_hailonet_fill_output_bindings(self); + if (HAILO_STREAM_ABORT == tensors.status()) { + return HAILO_SUCCESS; } + CHECK_EXPECTED_AS_STATUS(tensors); - GST_ELEMENT_ERROR(m_element, RESOURCE, FAILED, ("Failed to get network group name from the name %s!", network_name.c_str()), (NULL)); - return make_unexpected(HAILO_NOT_FOUND); -} - -hailo_status HailoNetImpl::link_elements() -{ - /* Link elements here because only here we have the HEF and the Caps format */ - if (!gst_element_link_many(m_hailosend, m_queue, m_hailorecv, NULL)) { - GST_ELEMENT_ERROR(m_element, RESOURCE, FAILED, ("Could not add link elements in bin!"), (NULL)); - return HAILO_INTERNAL_FAILURE; - } + status = gst_hailonet_call_run_async(self, tensors.value()); + CHECK_SUCCESS(status); return HAILO_SUCCESS; } -hailo_status HailoNetImpl::abort_streams() +static Expected gst_hailonet_construct_pix_buffer(GstHailoNet *self, GstBuffer *buffer) { - if (!m_props.m_is_active.get()) { - return HAILO_SUCCESS; + GstVideoFrame frame; + auto result = gst_video_frame_map(&frame, &self->input_frame_info, buffer, + static_cast(GST_MAP_READ | GST_VIDEO_FRAME_MAP_FLAG_NO_REF)); + CHECK_AS_EXPECTED(result,HAILO_INTERNAL_FAILURE, "gst_video_frame_map failed!"); + + hailo_pix_buffer_t pix_buffer = {}; + pix_buffer.index = 0; + pix_buffer.number_of_planes = GST_VIDEO_INFO_N_PLANES(&frame.info); + pix_buffer.memory_type = HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR; + + for (uint32_t plane_index = 0; plane_index < pix_buffer.number_of_planes; plane_index++) { + pix_buffer.planes[plane_index].bytes_used = GST_VIDEO_INFO_PLANE_STRIDE(&frame.info, plane_index) * GST_VIDEO_INFO_COMP_HEIGHT(&frame.info, plane_index); + pix_buffer.planes[plane_index].plane_size = GST_VIDEO_INFO_PLANE_STRIDE(&frame.info, plane_index) * GST_VIDEO_INFO_COMP_HEIGHT(&frame.info, plane_index); + pix_buffer.planes[plane_index].user_ptr = GST_VIDEO_FRAME_PLANE_DATA(&frame, plane_index); } - auto status = GST_HAILOSEND(m_hailosend)->impl->abort_vstreams(); - GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed aborting input VStreams of hailosend, status = %d", status); - status = GST_HAILORECV(m_hailorecv)->impl->abort_vstreams(); - GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed aborting output VStreams of hailorecv, status = %d", status); - return HAILO_SUCCESS; + gst_video_frame_unmap(&frame); + return pix_buffer; } -hailo_status HailoNetImpl::deactivate_network_group() +static GstFlowReturn gst_hailonet_chain(GstPad * /*pad*/, GstObject * parent, GstBuffer * buffer) { - auto was_deactivated = m_net_group_handle->remove_network_group(); - GST_CHECK_EXPECTED_AS_STATUS(was_deactivated, m_element, RESOURCE, "Failed removing network, status = %d", was_deactivated.status()); + GstHailoNet *self = GST_HAILONET(parent); + std::unique_lock lock(self->infer_mutex); - if (was_deactivated.value()) { - return clear_vstreams(); + if (self->props.m_pass_through.get() || !self->props.m_is_active.get() || !self->is_configured) { + gst_hailonet_push_buffer_to_thread(self, buffer); + return GST_FLOW_OK; } - return HAILO_SUCCESS; -} -hailo_status HailoNetImpl::clear_vstreams() -{ - if (nullptr != GST_HAILOSEND(m_hailosend)->impl) { - hailo_status status = GST_HAILOSEND(m_hailosend)->impl->clear_vstreams(); - GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed clearing input VStreams of hailosend, status = %d", status); + if (!gst_buffer_is_writable(buffer)) { + if (self->props.m_should_force_writable.get()) { + buffer = gst_buffer_make_writable(buffer); + if (nullptr == buffer) { + ERROR("Failed to make buffer writable!"); + return GST_FLOW_ERROR; + } + } else { + ERROR("Input buffer is not writable! Use force-writable property to force the buffer to be writable"); + return GST_FLOW_ERROR; + } } - if (nullptr != GST_HAILORECV(m_hailorecv)->impl) { - hailo_status status = GST_HAILORECV(m_hailorecv)->impl->clear_vstreams(); - GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed clearing output VStreams of hailorecv, status = %d", status); + if (self->props.m_input_from_meta.get()) { + auto status = gst_hailonet_async_infer_multi_input(self, buffer); + if (HAILO_SUCCESS != status) { + return GST_FLOW_ERROR; + } + } else { + auto pix_buffer = gst_hailonet_construct_pix_buffer(self, buffer); + if (!pix_buffer) { + return GST_FLOW_ERROR; + } + auto status = gst_hailonet_async_infer_single_input(self, buffer, pix_buffer.value()); + if (HAILO_SUCCESS != status) { + return GST_FLOW_ERROR; + } } - return HAILO_SUCCESS; + return GST_FLOW_OK; } -gboolean HailoNetImpl::src_pad_event(GstEvent *event) +static hailo_status gst_hailonet_init_infer_model(GstHailoNet * self) { - assert(nullptr != event); + auto vdevice_params = HailoRTDefaults::get_vdevice_params(); - auto parsed_event = HailoSetOutputFormatEvent::parse(event); - if (HAILO_SUCCESS != parsed_event.status()) { - return FALSE; + hailo_device_id_t device_id = {0}; + if (self->props.m_device_id.was_changed()) { + auto expected_device_id = HailoRTCommon::to_device_id(self->props.m_device_id.get()); + CHECK_EXPECTED_AS_STATUS(expected_device_id); + device_id = std::move(expected_device_id.release()); + + vdevice_params.device_ids = &device_id; + } + if (self->props.m_device_count.was_changed()) { + vdevice_params.device_count = self->props.m_device_count.get(); + } + if (self->props.m_vdevice_group_id.was_changed()) { + vdevice_params.group_id = self->props.m_vdevice_group_id.get(); + } else if (self->props.m_vdevice_key.was_changed()) { + auto key_str = std::to_string(self->props.m_vdevice_key.get()); + vdevice_params.group_id = key_str.c_str(); + } + if (self->props.m_scheduling_algorithm.was_changed()) { + vdevice_params.scheduling_algorithm = self->props.m_scheduling_algorithm.get(); + } + if (self->props.m_multi_process_service.was_changed()) { + vdevice_params.multi_process_service = self->props.m_multi_process_service.get(); + CHECK(self->props.m_scheduling_algorithm.get() != HAILO_SCHEDULING_ALGORITHM_NONE, HAILO_INVALID_OPERATION, + "To use multi-process-service please set scheduling-algorithm to a value other than 'none'"); + } + + auto vdevice = VDevice::create(vdevice_params); + CHECK_EXPECTED_AS_STATUS(vdevice); + self->vdevice = std::move(vdevice.release()); + + auto infer_model = self->vdevice->create_infer_model(self->props.m_hef_path.get()); + CHECK_EXPECTED_AS_STATUS(infer_model); + self->infer_model = infer_model.release(); + + if(!(self->props.m_input_from_meta.get())){ + CHECK(self->infer_model->inputs().size() == 1, HAILO_INVALID_OPERATION, + "In case you want to run a multiple input model, please set the input-from-meta flag."); } - m_output_formats = std::move(parsed_event->formats); - return TRUE; + return HAILO_SUCCESS; } -GstPadProbeReturn HailoNetImpl::sink_probe() +static const gchar *gst_hailonet_get_format_string(const InferModel::InferStream &input) { - hailo_status status = activate_hailonet(); - GST_CHECK(HAILO_SUCCESS == status, GST_PAD_PROBE_REMOVE, m_element, RESOURCE, "Failed activating network, status = %d", status); - return GST_PAD_PROBE_REMOVE; + switch (input.format().order) { + case HAILO_FORMAT_ORDER_RGB4: + case HAILO_FORMAT_ORDER_NHWC: + if (input.shape().features == RGBA_FEATURES_SIZE) { + return "RGBA"; + } + if (input.shape().features == GRAY8_FEATURES_SIZE) { + return "GRAY8"; + } + /* Fallthrough */ + case HAILO_FORMAT_ORDER_NHCW: + case HAILO_FORMAT_ORDER_FCR: + case HAILO_FORMAT_ORDER_F8CR: + if (input.shape().features == GRAY8_FEATURES_SIZE) { + return "GRAY8"; + } + CHECK(RGB_FEATURES_SIZE == input.shape().features, nullptr, + "Features of input %s is not %d for RGB format! (features=%d)", input.name().c_str(), RGB_FEATURES_SIZE, + input.shape().features); + return "RGB"; + case HAILO_FORMAT_ORDER_YUY2: + CHECK(YUY2_FEATURES_SIZE == input.shape().features, nullptr, + "Features of input %s is not %d for YUY2 format! (features=%d)", input.name().c_str(), YUY2_FEATURES_SIZE, + input.shape().features); + return "YUY2"; + case HAILO_FORMAT_ORDER_NV12: + CHECK(NV12_FEATURES_SIZE == input.shape().features, nullptr, + "Features of input %s is not %d for NV12 format! (features=%d)", input.name().c_str(), NV12_FEATURES_SIZE, + input.shape().features); + return "NV12"; + case HAILO_FORMAT_ORDER_NV21: + CHECK(NV21_FEATURES_SIZE == input.shape().features, nullptr, + "Features of input %s is not %d for NV21 format! (features=%d)", input.name().c_str(), NV21_FEATURES_SIZE, + input.shape().features); + return "NV21"; + case HAILO_FORMAT_ORDER_I420: + CHECK(I420_FEATURES_SIZE == input.shape().features, nullptr, + "Features of input %s is not %d for I420 format! (features=%d)", input.name().c_str(), I420_FEATURES_SIZE, + input.shape().features); + return "I420"; + default: + ERROR("Input %s has an unsupported format order! order = %d\n", input.name().c_str(), input.format().order); + return nullptr; + } } -gboolean HailoNetImpl::is_active() +static uint32_t get_height_by_order(uint32_t original_height, hailo_format_order_t order) { - return m_props.m_is_active.get(); + switch (order) { + case HAILO_FORMAT_ORDER_NV12: + case HAILO_FORMAT_ORDER_NV21: + return original_height * 2; + default: + break; + } + return original_height; } -hailo_status HailoNetImpl::flush() +static GstCaps *gst_hailonet_get_caps(GstHailoNet *self) { - GstBuffer *buffer = nullptr; - GstFlowReturn flow_result = gst_buffer_pool_acquire_buffer(m_pool, &buffer, nullptr); - GST_CHECK(GST_FLOW_OK == flow_result, HAILO_INTERNAL_FAILURE, m_element, RESOURCE, "Acquire buffer failed!"); + if (self->did_critical_failure_happen) { + // Sometimes gst_hailonet_get_caps will get called again even after a critical failure happened and nullptr was returned + return nullptr; + } - GstHailoBufferFlagMeta *buffer_meta = GST_HAILO_BUFFER_FLAG_META_ADD(buffer); - buffer_meta->flag = BUFFER_FLAG_FLUSH; - GST_BUFFER_TIMESTAMP(buffer) = GST_HAILOSEND(m_hailosend)->impl->last_frame_pts(); + if (nullptr == self->vdevice) { + auto status = gst_hailonet_init_infer_model(self); + if (HAILO_SUCCESS != status) { + self->did_critical_failure_happen = true; + return nullptr; + } + } - GstPad *pad = gst_element_get_static_pad(m_hailosend, "src"); - flow_result = gst_pad_push(pad, buffer); - GST_CHECK(GST_FLOW_OK == flow_result, HAILO_INTERNAL_FAILURE, m_element, RESOURCE, "Pushing buffer to queue has failed!"); + // TODO (HRT-12491): check caps based on incoming metadata + if (self->props.m_input_from_meta.get()) { + GstCaps *new_caps = gst_caps_new_any(); + self->input_caps = new_caps; + return gst_caps_copy(new_caps); + } - hailo_status status = m_was_flushed_event->wait(WAIT_FOR_FLUSH_TIMEOUT_MS); - GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed waiting for flushed event, status = %d", status); + auto input = self->infer_model->input(); + if (!input) { + ERROR("Getting input has failed with status = %d\n", input.status()); + return nullptr; + } - status = m_was_flushed_event->reset(); - GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed resetting flushed event, status = %d", status); + const gchar *format = gst_hailonet_get_format_string(input.value()); + if (nullptr == format) { + return nullptr; + } - return HAILO_SUCCESS; -} + GstCaps *new_caps = gst_caps_new_simple("video/x-raw", + "format", G_TYPE_STRING, format, + "width", G_TYPE_INT, input->shape().width, + "height", G_TYPE_INT, get_height_by_order(input->shape().height, input->format().order), + nullptr); -hailo_status HailoNetImpl::signal_was_flushed_event() -{ - return m_was_flushed_event->signal(); + if (!gst_video_info_from_caps(&self->input_frame_info, new_caps)) { + ERROR("gst_video_info_from_caps failed\n"); + return nullptr; + } + + self->input_caps = new_caps; + return gst_caps_copy(new_caps); } -static void gst_hailonet_init(GstHailoNet *self) +static gboolean gst_hailonet_handle_sink_query(GstPad * pad, GstObject * parent, GstQuery * query) { - if (!do_versions_match(GST_ELEMENT(self))) { - return; + GstHailoNet *self = GST_HAILONET(parent); + switch (GST_QUERY_TYPE (query)) { + case GST_QUERY_CAPS: + { + GstCaps *caps = gst_hailonet_get_caps(self); + if (nullptr == caps) { + return FALSE; + } + gst_query_set_caps_result(query, caps); + gst_caps_unref(caps); + return TRUE; } - - auto hailonet_impl = HailoNetImpl::create(self); - if (!hailonet_impl) { - GST_ELEMENT_ERROR(self, RESOURCE, FAILED, ("Creating hailonet implementation has failed! status = %d", hailonet_impl.status()), (NULL)); - return; + case GST_QUERY_ALLOCATION: + { + // We implement this to make sure buffers are contiguous in memory + gst_query_add_allocation_meta(query, GST_VIDEO_META_API_TYPE, NULL); + return gst_pad_query_default(pad, parent, query); + } + default: + return gst_pad_query_default(pad, parent, query); } - - self->impl = hailonet_impl.release(); } -static void gst_hailonet_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec) +static gboolean gst_hailonet_handle_caps_event(GstHailoNet *self, GstCaps */*caps*/) { - GST_HAILONET(object)->impl->set_property(object, property_id, value, pspec); -} + if (nullptr == self->input_caps) { + return FALSE; + } -static void gst_hailonet_get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec) -{ - GST_HAILONET(object)->impl->get_property(object, property_id, value, pspec); -} + GstCaps *caps_result = gst_pad_peer_query_caps(self->srcpad, self->input_caps); + if (gst_caps_is_empty(caps_result)) { + return FALSE; + } -static gboolean gst_hailorecv_src_pad_event(GstPad */*pad*/, GstObject *parent, GstEvent *event) -{ - gboolean result = GST_HAILONET(GST_ELEMENT_PARENT(parent))->impl->src_pad_event(event); - if (result) { + if (gst_caps_is_any(caps_result)) { + gst_caps_unref(caps_result); return TRUE; } - GstBaseTransform *trans = GST_BASE_TRANSFORM_CAST(parent); - return GST_BASE_TRANSFORM_GET_CLASS(trans)->src_event(trans, event); + GstCaps *outcaps = gst_caps_fixate(caps_result); + gboolean res = gst_pad_set_caps(self->srcpad, outcaps); + gst_caps_unref(outcaps); + return res; } -static GstPadProbeReturn gst_hailonet_sink_probe(GstPad *pad, GstPadProbeInfo */*info*/, gpointer /*user_data*/) +static gboolean gst_hailonet_sink_event(GstPad *pad, GstObject *parent, GstEvent *event) { - return GST_HAILONET(GST_ELEMENT_PARENT(gst_pad_get_parent(pad)))->impl->sink_probe(); + GstHailoNet *self = GST_HAILONET(parent); + switch (GST_EVENT_TYPE(event)) { + case GST_EVENT_CAPS: + { + GstCaps *caps; + gst_event_parse_caps(event, &caps); + auto result = gst_hailonet_handle_caps_event(self, caps); + gst_event_unref(event); + return result; + } + case GST_EVENT_EOS: + self->has_got_eos = true; + return gst_pad_push_event(self->srcpad, event); + default: + return gst_pad_event_default(pad, parent, event); + } } -static GstStateChangeReturn gst_hailonet_change_state(GstElement *element, GstStateChange transition) +static GstPadProbeReturn gst_hailonet_sink_probe(GstPad */*pad*/, GstPadProbeInfo */*info*/, gpointer user_data) { - GstStateChangeReturn ret = GST_ELEMENT_CLASS(gst_hailonet_parent_class)->change_state(element, transition); - if (GST_STATE_CHANGE_FAILURE == ret) { - return ret; - } + GstHailoNet *self = static_cast(user_data); + std::unique_lock lock(self->sink_probe_change_state_mutex); - auto &hailonet = GST_HAILONET(element)->impl; - switch (transition) { - case GST_STATE_CHANGE_NULL_TO_READY: - { - hailo_status status = hailonet->link_elements(); - GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Linking elements has failed, status = %d\n", status); - break; + auto status = gst_hailonet_configure(self); + if (HAILO_SUCCESS != status) { + return GST_PAD_PROBE_DROP; } - case GST_STATE_CHANGE_READY_TO_PAUSED: - { - hailo_status status = hailonet->configure_network_group(); - GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Configuring network group failed, status = %d\n", status); - break; + + status = gst_hailonet_allocate_infer_resources(self); + if (HAILO_SUCCESS != status) { + return GST_PAD_PROBE_DROP; } - case GST_STATE_CHANGE_PLAYING_TO_PAUSED: - { - hailo_status status = hailonet->abort_streams(); - GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Aborting streams has failed, status = %d\n", status); - break; + + if (HAILO_SCHEDULING_ALGORITHM_NONE != self->props.m_scheduling_algorithm.get()) { + self->props.m_is_active = true; + return GST_PAD_PROBE_REMOVE; } - case GST_STATE_CHANGE_READY_TO_NULL: - { - if (HAILO_SCHEDULING_ALGORITHM_NONE == hailonet->get_props().m_scheduling_algorithm.get()) { - auto status = hailonet->deactivate_network_group(); - GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Deactivating network group failed, status = %d\n", status); - } - // Cleanup all of hailonet memory - hailonet.reset(); - break; + if ((1 == hailonet_count) && (!self->props.m_is_active.was_changed())) { + self->props.m_is_active = true; } - default: - break; + + if (self->props.m_is_active.get()) { + status = self->configured_infer_model->activate(); + if (HAILO_SUCCESS != status) { + return GST_PAD_PROBE_DROP; + } } - return ret; + self->has_called_activate = true; + return GST_PAD_PROBE_REMOVE; } -static void gst_hailonet_flush_callback(GstHailoNet *hailonet, gpointer /*data*/) +static void gst_hailonet_flush_callback(GstHailoNet *self, gpointer /*data*/) { - (void)hailonet->impl->flush(); + std::unique_lock lock(self->flush_mutex); + self->flush_cv.wait(lock, [self] () { + return 0 == self->ongoing_frames; + }); } -static void gst_hailonet_inner_queue_overrun_callback(GstElement *queue, gpointer /*udata*/) +static void gst_hailonet_init(GstHailoNet *self) { - if (GST_HAILONET(GST_ELEMENT_PARENT(queue))->impl->is_active()) { - GST_INFO("Inner queue of %s is overrun!", GST_ELEMENT_NAME(GST_ELEMENT_PARENT(queue))); + if (!do_versions_match(GST_ELEMENT(self))) { + return; } -} -static void gst_hailonet_inner_queue_underrun_callback(GstElement *queue, gpointer /*udata*/) -{ - if (GST_HAILONET(GST_ELEMENT_PARENT(queue))->impl->is_active()) { - GST_INFO("Inner queue of %s is underrun!", GST_ELEMENT_NAME(GST_ELEMENT_PARENT(queue))); + self->sinkpad = gst_pad_new_from_static_template(&sink_template, "sink"); + gst_pad_set_chain_function(self->sinkpad, gst_hailonet_chain); + gst_pad_set_query_function(self->sinkpad, gst_hailonet_handle_sink_query); + gst_pad_set_event_function(self->sinkpad, GST_DEBUG_FUNCPTR(gst_hailonet_sink_event)); + gst_element_add_pad(GST_ELEMENT (self), self->sinkpad); + gst_pad_add_probe(self->sinkpad, GST_PAD_PROBE_TYPE_BUFFER, static_cast(gst_hailonet_sink_probe), self, nullptr); + + self->srcpad = gst_pad_new_from_static_template(&src_template, "src"); + gst_element_add_pad(GST_ELEMENT (self), self->srcpad); + + self->input_caps = nullptr; + self->input_queue = nullptr; + self->thread_queue = nullptr; + self->is_thread_running = false; + self->has_got_eos = false; + self->buffers_in_thread_queue = 0; + self->props = HailoNetProperties(); + self->vdevice = nullptr; + self->is_configured = false; + self->has_called_activate = false; + self->ongoing_frames = 0; + self->did_critical_failure_happen = false; + + gchar *parent_name = gst_object_get_name(GST_OBJECT(self)); + gchar *name = g_strconcat(parent_name, ":hailo_allocator", NULL); + g_free(parent_name); + + if (gst_hailo_should_use_dma_buffers()) { + self->dma_allocator = gst_dmabuf_allocator_new(); + } else { + self->allocator = GST_HAILO_ALLOCATOR(g_object_new(GST_TYPE_HAILO_ALLOCATOR, "name", name, NULL)); + gst_object_ref_sink(self->allocator); + g_free(name); } -} \ No newline at end of file + + g_signal_connect(self, "flush", G_CALLBACK(gst_hailonet_flush_callback), nullptr); + + hailonet_count++; +} diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp index 8f151958..26244ebb 100644 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Hailo Technologies Ltd. All rights reserved. + * Copyright (c) 2021-2023 Hailo Technologies Ltd. All rights reserved. * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt) * * This library is free software; you can redistribute it and/or @@ -20,114 +20,164 @@ #ifndef _GST_HAILONET_HPP_ #define _GST_HAILONET_HPP_ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#include +#pragma GCC diagnostic pop + +#include +#include + +#include "hailo/infer_model.hpp" #include "common.hpp" -#include "network_group_handle.hpp" -#include "hailo/expected.hpp" -#include "hailo/event.hpp" -#include +#include #include +#include +#include + +using namespace hailort; G_BEGIN_DECLS -#define GST_TYPE_HAILONET (gst_hailonet_get_type()) -#define GST_HAILONET(obj) (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_HAILONET,GstHailoNet)) -#define GST_HAILONET_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_HAILONET,GstHailoNetClass)) -#define GST_IS_HAILONET(obj) (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_HAILONET)) -#define GST_IS_HAILONET_CLASS(obj) (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_HAILONET)) +#define GST_TYPE_HAILO_ALLOCATOR (gst_hailo_allocator_get_type()) +#define GST_HAILO_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_HAILO_ALLOCATOR, GstHailoAllocator)) +#define GST_HAILO_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_HAILO_ALLOCATOR, GstHailoAllocatorClass)) +#define GST_IS_HAILO_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_HAILO_ALLOCATOR)) +#define GST_IS_HAILO_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_HAILO_ALLOCATOR)) -class HailoNetImpl; -struct GstHailoNet +#define MIN_OUTPUTS_POOL_SIZE (MAX_GSTREAMER_BATCH_SIZE) +#define MAX_OUTPUTS_POOL_SIZE (MAX_GSTREAMER_BATCH_SIZE * 4) + +#define GST_HAILO_USE_DMA_BUFFER_ENV_VAR "GST_HAILO_USE_DMA_BUFFER" + +struct GstHailoAllocator { - GstBin parent; - std::unique_ptr impl; + GstAllocator parent; + std::unordered_map buffers; }; -struct GstHailoNetClass +struct GstHailoAllocatorClass { - GstBinClass parent; + GstAllocatorClass parent; }; +GType gst_hailo_allocator_get_type(void); + struct HailoNetProperties final { public: - HailoNetProperties() : m_device_id(nullptr), m_hef_path(nullptr), m_network_name(nullptr), m_batch_size(HAILO_DEFAULT_BATCH_SIZE), - m_is_active(false), m_device_count(0), m_vdevice_key(DEFAULT_VDEVICE_KEY), m_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN), - m_scheduler_timeout_ms(HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS), m_scheduler_threshold(HAILO_DEFAULT_SCHEDULER_THRESHOLD), m_scheduler_priority(HAILO_SCHEDULER_PRIORITY_NORMAL), - m_multi_process_service(HAILO_DEFAULT_MULTI_PROCESS_SERVICE), m_input_format_type(HAILO_FORMAT_TYPE_AUTO), - m_output_format_type(HAILO_FORMAT_TYPE_AUTO), m_nms_score_threshold(0), m_nms_iou_threshold(0), m_nms_max_proposals_per_class(0) - + HailoNetProperties() : m_hef_path(nullptr), m_batch_size(HAILO_DEFAULT_BATCH_SIZE), + m_device_id(nullptr), m_device_count(0), m_vdevice_group_id(nullptr), m_is_active(false), m_pass_through(false), + m_outputs_min_pool_size(MIN_OUTPUTS_POOL_SIZE), m_outputs_max_pool_size(MAX_OUTPUTS_POOL_SIZE), + m_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN), m_scheduler_timeout_ms(HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS), + m_scheduler_threshold(HAILO_DEFAULT_SCHEDULER_THRESHOLD), m_scheduler_priority(HAILO_SCHEDULER_PRIORITY_NORMAL), + m_input_format_type(HAILO_FORMAT_TYPE_AUTO), m_output_format_type(HAILO_FORMAT_TYPE_AUTO), + m_nms_score_threshold(0), m_nms_iou_threshold(0), m_nms_max_proposals_per_class(0), m_input_from_meta(false), + m_no_transform(false), m_multi_process_service(HAILO_DEFAULT_MULTI_PROCESS_SERVICE), m_should_force_writable(false), + m_vdevice_key(DEFAULT_VDEVICE_KEY) {} - HailoElemProperty m_device_id; + void free_strings() + { + if (m_hef_path.was_changed()) { + g_free(m_hef_path.get()); + } + if (m_device_id.was_changed()) { + g_free(m_device_id.get()); + } + if (m_vdevice_group_id.was_changed()) { + g_free(m_vdevice_group_id.get()); + } + } + HailoElemProperty m_hef_path; - HailoElemProperty m_network_name; // This property can be network group name or a network name HailoElemProperty m_batch_size; - HailoElemProperty m_is_active; + HailoElemProperty m_device_id; HailoElemProperty m_device_count; - HailoElemProperty m_vdevice_key; + HailoElemProperty m_vdevice_group_id; + HailoElemProperty m_is_active; + HailoElemProperty m_pass_through; + HailoElemProperty m_outputs_min_pool_size; + HailoElemProperty m_outputs_max_pool_size; HailoElemProperty m_scheduling_algorithm; HailoElemProperty m_scheduler_timeout_ms; HailoElemProperty m_scheduler_threshold; HailoElemProperty m_scheduler_priority; - HailoElemProperty m_multi_process_service; HailoElemProperty m_input_format_type; HailoElemProperty m_output_format_type; HailoElemProperty m_nms_score_threshold; HailoElemProperty m_nms_iou_threshold; HailoElemProperty m_nms_max_proposals_per_class; -}; + HailoElemProperty m_input_from_meta; + HailoElemProperty m_no_transform; + HailoElemProperty m_multi_process_service; + HailoElemProperty m_should_force_writable; -class HailoNetImpl final -{ -public: - static Expected> create(GstHailoNet *element); - HailoNetImpl(GstHailoNet *element, GstElement *hailosend, GstElement *queue, GstElement *hailorecv, EventPtr was_flushed_event); - ~HailoNetImpl(); - - void set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec); - void get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec); - hailo_status set_hef(); - hailo_status link_elements(); - hailo_status configure_network_group(); - hailo_status activate_hailonet(); - hailo_status abort_streams(); - - gboolean src_pad_event(GstEvent *event); - GstPadProbeReturn sink_probe(); - gboolean is_active(); - hailo_status flush(); - hailo_status signal_was_flushed_event(); - - hailo_status deactivate_network_group(); - HailoNetProperties &get_props() { - return m_props; - } + // Deprecated + HailoElemProperty m_vdevice_key; +}; -private: - void init_ghost_sink(); - void init_ghost_src(); - Expected get_network_group_name(const std::string &network_name); - - hailo_status clear_vstreams(); - - static std::atomic_uint32_t m_hailonet_count; - static std::mutex m_mutex; - GstHailoNet *m_element; - HailoNetProperties m_props; - std::vector m_output_formats; - GstElement *m_hailosend; - GstElement *m_queue; - GstElement *m_hailorecv; - std::unique_ptr m_net_group_handle; - bool m_was_configured; - bool m_has_called_activate; - EventPtr m_was_flushed_event; - GstBufferPool *m_pool; +typedef struct _GstHailoNet { + GstElement element; + GstPad *sinkpad; + GstPad *srcpad; + GstQueueArray *input_queue; + GstQueueArray *thread_queue; + std::atomic_uint32_t buffers_in_thread_queue; + std::thread thread; + HailoNetProperties props; + GstCaps *input_caps; + std::atomic_bool is_thread_running; + std::atomic_bool has_got_eos; + std::mutex sink_probe_change_state_mutex; + bool did_critical_failure_happen; + + std::unique_ptr vdevice; + std::shared_ptr infer_model; + std::shared_ptr configured_infer_model; + ConfiguredInferModel::Bindings infer_bindings; + bool is_configured; + std::mutex infer_mutex; + + bool has_called_activate; + std::atomic_uint32_t ongoing_frames; + std::condition_variable flush_cv; + std::mutex flush_mutex; + + GstVideoInfo input_frame_info; + + GstHailoAllocator *allocator; + GstAllocator *dma_allocator; + std::unordered_map output_buffer_pools; + std::unordered_map output_vstream_infos; + + std::mutex input_queue_mutex; + std::mutex thread_queue_mutex; + std::condition_variable thread_cv; +} GstHailoNet; + +typedef struct _GstHailoNetClass { + GstElementClass parent_class; +} GstHailoNetClass; + +struct TensorInfo { + GstBuffer *buffer; + GstMapInfo buffer_info; }; -GType gst_hailonet_get_type(void); +#define GST_TYPE_HAILONET (gst_hailonet_get_type()) +#define GST_HAILONET(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_HAILONET,GstHailoNet)) +#define GST_HAILONET_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_HAILONET,GstHailoNetClass)) +#define GST_IS_HAILONET(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_HAILONET)) +#define GST_IS_HAILONET_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_HAILONET)) + +GType gst_hailonet_get_type (void); G_END_DECLS -#endif /* _GST_HAILONET_HPP_ */ +#endif /* _GST_HAILONET_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.cpp deleted file mode 100644 index 69f87635..00000000 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.cpp +++ /dev/null @@ -1,1344 +0,0 @@ -/* - * Copyright (c) 2021-2023 Hailo Technologies Ltd. All rights reserved. - * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt) - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ -#include "gsthailonet2.hpp" -#include "metadata/tensor_meta.hpp" -#include "hailo/buffer.hpp" -#include "hailo/hailort_common.hpp" -#include "hailo/hailort_defaults.hpp" - -#include -#include - -#define WAIT_FOR_ASYNC_READY_TIMEOUT (std::chrono::milliseconds(10000)) -#define ERROR(msg, ...) g_print(msg, ##__VA_ARGS__) - -enum -{ - PROP_0, - PROP_HEF_PATH, - PROP_BATCH_SIZE, - PROP_DEVICE_ID, - PROP_DEVICE_COUNT, - PROP_VDEVICE_GROUP_ID, - PROP_IS_ACTIVE, - PROP_OUTPUTS_MIN_POOL_SIZE, - PROP_OUTPUTS_MAX_POOL_SIZE, - PROP_SCHEDULING_ALGORITHM, - PROP_SCHEDULER_TIMEOUT_MS, - PROP_SCHEDULER_THRESHOLD, - PROP_SCHEDULER_PRIORITY, - PROP_INPUT_FORMAT_TYPE, - PROP_OUTPUT_FORMAT_TYPE, - PROP_NMS_SCORE_THRESHOLD, - PROP_NMS_IOU_THRESHOLD, - PROP_NMS_MAX_PROPOSALS_PER_CLASS, - PROP_INPUT_FROM_META, - PROP_NO_TRANSFORM, - PROP_MULTI_PROCESS_SERVICE, - PROP_PASS_THROUGH, - - // Deprecated - PROP_VDEVICE_KEY, -}; - -static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY); -static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY); - -G_DEFINE_TYPE (GstHailoAllocator, gst_hailo_allocator, GST_TYPE_ALLOCATOR); -G_DEFINE_TYPE (GstHailoNet2, gst_hailonet2, GST_TYPE_ELEMENT); - -static std::atomic_uint32_t hailonet_count(0); - -static GstMemory *gst_hailo_allocator_alloc(GstAllocator* allocator, gsize size, GstAllocationParams* /*params*/) { - GstHailoAllocator *hailo_allocator = GST_HAILO_ALLOCATOR(allocator); - auto buffer = Buffer::create(size, BufferStorageParams::create_dma()); - if (!buffer) { - ERROR("Creating buffer for allocator has failed, status = %d\n", buffer.status()); - return nullptr; - } - - GstMemory *memory = gst_memory_new_wrapped(static_cast(0), buffer->data(), - buffer->size(), 0, buffer->size(), nullptr, nullptr); - if (nullptr == memory) { - ERROR("Creating new GstMemory for allocator has failed!\n"); - return nullptr; - } - - hailo_allocator->buffers[memory] = std::move(buffer.release()); - return memory; -} - -static void gst_hailo_allocator_free(GstAllocator* allocator, GstMemory *mem) { - GstHailoAllocator *hailo_allocator = GST_HAILO_ALLOCATOR(allocator); - hailo_allocator->buffers.erase(mem); -} - -static void gst_hailo_allocator_class_init(GstHailoAllocatorClass* klass) { - GstAllocatorClass* allocator_class = GST_ALLOCATOR_CLASS(klass); - - allocator_class->alloc = gst_hailo_allocator_alloc; - allocator_class->free = gst_hailo_allocator_free; -} - -static void gst_hailo_allocator_init(GstHailoAllocator* allocator) { - allocator->buffers = std::unordered_map(); -} - -static hailo_status gst_hailonet2_deconfigure(GstHailoNet2 *self) -{ - // This will wakeup any blocking calls to deuque - for (auto &name_pool_pair : self->output_buffer_pools) { - gst_buffer_pool_set_flushing(name_pool_pair.second, TRUE); - } - - std::unique_lock lock(self->infer_mutex); - self->configured_infer_model.reset(); - self->is_configured = false; - return HAILO_SUCCESS; -} - -static hailo_status gst_hailonet2_free(GstHailoNet2 *self) -{ - std::unique_lock lock(self->infer_mutex); - self->configured_infer_model.reset(); - self->infer_model.reset(); - self->vdevice.reset(); - - { - std::unique_lock lock(self->thread_queue_mutex); - self->is_thread_running = false; - } - self->thread_cv.notify_all(); - - if (self->thread.joinable()) { - self->thread.join(); - } - - if (nullptr != self->input_queue) { - gst_queue_array_free(self->input_queue); - } - - if (nullptr != self->thread_queue) { - gst_queue_array_free(self->thread_queue); - } - - if (nullptr != self->input_caps) { - gst_caps_unref(self->input_caps); - } - - for (auto &name_pool_pair : self->output_buffer_pools) { - gboolean result = gst_buffer_pool_set_active(name_pool_pair.second, FALSE); - CHECK(result, HAILO_INTERNAL_FAILURE, "Could not release buffer pool"); - gst_object_unref(name_pool_pair.second); - } - - gst_object_unref(self->allocator); - - self->props.free_strings(); - - return HAILO_SUCCESS; -} - -static hailo_status gst_hailonet2_set_format_types(GstHailoNet2 *self, std::shared_ptr infer_model) -{ - if (self->props.m_input_format_type.was_changed()) { - for (const auto &input_name : infer_model->get_input_names()) { - auto input = infer_model->input(input_name); - CHECK_EXPECTED_AS_STATUS(input); - - input->set_format_type(self->props.m_input_format_type.get()); - } - } - if (self->props.m_output_format_type.was_changed()) { - for (const auto &output_name : infer_model->get_output_names()) { - auto output = infer_model->output(output_name); - CHECK_EXPECTED_AS_STATUS(output); - - output->set_format_type(self->props.m_output_format_type.get()); - } - } - - return HAILO_SUCCESS; -} - -static hailo_status gst_hailonet2_set_nms_params(GstHailoNet2 *self, std::shared_ptr infer_model) -{ - // Check that if one of the NMS params are changed, we have NMS outputs in the model - auto has_nms_output = std::any_of(infer_model->outputs().begin(), infer_model->outputs().end(), [](const auto &output) - { - return output.is_nms(); - }); - - for (const auto &output_name : infer_model->get_output_names()) { - auto output = infer_model->output(output_name); - CHECK_EXPECTED_AS_STATUS(output); - - if (self->props.m_nms_score_threshold.was_changed()) { - CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS score threshold is set, but there is no NMS output in this model."); - if (output->is_nms()) { - output->set_nms_score_threshold(self->props.m_nms_score_threshold.get()); - } - } - if (self->props.m_nms_iou_threshold.was_changed()) { - CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS IoU threshold is set, but there is no NMS output in this model."); - if (output->is_nms()) { - output->set_nms_iou_threshold(self->props.m_nms_iou_threshold.get()); - } - } - if (self->props.m_nms_max_proposals_per_class.was_changed()) { - CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS max proposals per class is set, but there is no NMS output in this model."); - if (output->is_nms()) { - output->set_nms_max_proposals_per_class(self->props.m_nms_max_proposals_per_class.get()); - } - } - } - - return HAILO_SUCCESS; -} - -static hailo_status gst_hailonet2_set_scheduler_params(GstHailoNet2 *self, std::shared_ptr configured_infer_model) -{ - if (self->props.m_scheduler_timeout_ms.was_changed()) { - auto millis = std::chrono::milliseconds(self->props.m_scheduler_timeout_ms.get()); - auto status = configured_infer_model->set_scheduler_timeout(millis); - CHECK_SUCCESS(status, "Setting scheduler timeout failed, status = %d", status); - } - if (self->props.m_scheduler_threshold.was_changed()) { - auto status = configured_infer_model->set_scheduler_threshold(self->props.m_scheduler_threshold.get()); - CHECK_SUCCESS(status, "Setting scheduler threshold failed, status = %d", status); - } - if (self->props.m_scheduler_priority.was_changed()) { - auto status = configured_infer_model->set_scheduler_priority(self->props.m_scheduler_priority.get()); - CHECK_SUCCESS(status, "Setting scheduler priority failed, status = %d", status); - } - - return HAILO_SUCCESS; -} - -static Expected gst_hailonet2_create_buffer_pool(GstHailoNet2 *self, size_t frame_size) -{ - GstBufferPool *pool = gst_buffer_pool_new(); - - GstStructure *config = gst_buffer_pool_get_config(pool); - gst_buffer_pool_config_set_params(config, nullptr, static_cast(frame_size), self->props.m_outputs_min_pool_size.get(), - self->props.m_outputs_max_pool_size.get()); - - gst_buffer_pool_config_set_allocator(config, GST_ALLOCATOR(self->allocator), nullptr); - - gboolean result = gst_buffer_pool_set_config(pool, config); - CHECK_AS_EXPECTED(result, HAILO_INTERNAL_FAILURE, "Could not set config buffer pool"); - - result = gst_buffer_pool_set_active(pool, TRUE); - CHECK_AS_EXPECTED(result, HAILO_INTERNAL_FAILURE, "Could not set buffer pool as active"); - - return pool; -} - -static hailo_status gst_hailonet2_configure(GstHailoNet2 *self) -{ - if (self->is_configured) { - return HAILO_SUCCESS; - } - - for (auto &name_pool_pair : self->output_buffer_pools) { - gst_buffer_pool_set_flushing(name_pool_pair.second, FALSE); - } - - self->infer_model->set_batch_size(self->props.m_batch_size.get()); - - auto status = gst_hailonet2_set_format_types(self, self->infer_model); - CHECK_SUCCESS(status); - - status = gst_hailonet2_set_nms_params(self, self->infer_model); - CHECK_SUCCESS(status); - - // In RGB formats, Gstreamer is padding each row to 4. - for (const auto &input_name : self->infer_model->get_input_names()) { - if(self->props.m_no_transform.get()) { - // In case transformation is disabled - format order will be the same as we get from the HW (stream info). - auto input_stream_infos = self->infer_model->hef().get_stream_info_by_name(input_name, HAILO_H2D_STREAM); - CHECK_EXPECTED_AS_STATUS(input_stream_infos); - self->infer_model->input(input_name)->set_format_order(input_stream_infos.value().format.order); - } else if (self->infer_model->input(input_name)->format().order == HAILO_FORMAT_ORDER_NHWC) { - self->infer_model->input(input_name)->set_format_order(HAILO_FORMAT_ORDER_RGB4); - } - } - - if (self->props.m_no_transform.get()) { - for (const auto &output_name : self->infer_model->get_output_names()) { - // In case transformation is disabled - format order will be the same as we get from the HW (stream info). - auto output_stream_infos = self->infer_model->hef().get_stream_info_by_name(output_name, HAILO_D2H_STREAM); - CHECK_EXPECTED_AS_STATUS(output_stream_infos); - self->infer_model->output(output_name)->set_format_order(output_stream_infos.value().format.order); - } - } - - auto configured_infer_model = self->infer_model->configure(); - CHECK_EXPECTED_AS_STATUS(configured_infer_model); - - auto ptr = make_shared_nothrow(configured_infer_model.release()); - CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY); - self->configured_infer_model = ptr; - - status = gst_hailonet2_set_scheduler_params(self, self->configured_infer_model); - CHECK_SUCCESS(status); - - self->is_configured = true; - return HAILO_SUCCESS; -} - -static hailo_status gst_hailonet2_allocate_infer_resources(GstHailoNet2 *self) -{ - auto bindings = self->configured_infer_model->create_bindings(); - CHECK_EXPECTED_AS_STATUS(bindings); - self->infer_bindings = std::move(bindings.release()); - - self->output_buffer_pools = std::unordered_map(); - self->output_vstream_infos = std::unordered_map(); - - auto async_queue_size = self->configured_infer_model->get_async_queue_size(); - CHECK_EXPECTED_AS_STATUS(async_queue_size); - self->input_queue = gst_queue_array_new(static_cast(async_queue_size.value())); - self->thread_queue = gst_queue_array_new(static_cast(async_queue_size.value())); - self->is_thread_running = true; - self->thread = std::thread([self] () { - while (self->is_thread_running) { - GstBuffer *buffer = nullptr; - { - std::unique_lock lock(self->thread_queue_mutex); - self->thread_cv.wait(lock, [self] () { - return (self->buffers_in_thread_queue > 0) || !self->is_thread_running; - }); - if (!self->is_thread_running) { - break; - } - - buffer = static_cast(gst_queue_array_pop_head(self->thread_queue)); - self->buffers_in_thread_queue--; - } - self->thread_cv.notify_all(); - if (GST_IS_PAD(self->srcpad)) { // Checking because we fail here when exiting the application - GstFlowReturn ret = gst_pad_push(self->srcpad, buffer); - if ((GST_FLOW_OK != ret) && (GST_FLOW_FLUSHING != ret) && (!self->has_got_eos)) { - ERROR("gst_pad_push failed with status = %d\n", ret); - break; - } - } - } - }); - - for (auto &output : self->infer_model->outputs()) { - auto buffer_pool = gst_hailonet2_create_buffer_pool(self, output.get_frame_size()); - CHECK_EXPECTED_AS_STATUS(buffer_pool); - - self->output_buffer_pools[output.name()] = buffer_pool.release(); - } - - auto vstream_infos = self->infer_model->hef().get_output_vstream_infos(); - CHECK_EXPECTED_AS_STATUS(vstream_infos); - - for (const auto &vstream_info : vstream_infos.value()) { - self->output_vstream_infos[vstream_info.name] = vstream_info; - } - - return HAILO_SUCCESS; -} - -static GstStateChangeReturn gst_hailonet2_change_state(GstElement *element, GstStateChange transition) -{ - GstStateChangeReturn ret = GST_ELEMENT_CLASS(gst_hailonet2_parent_class)->change_state(element, transition); - if (GST_STATE_CHANGE_FAILURE == ret) { - return ret; - } - - GstHailoNet2 *self = GST_HAILONET2(element); - switch (transition) { - case GST_STATE_CHANGE_PAUSED_TO_PLAYING: - { - auto status = gst_hailonet2_configure(self); - if (HAILO_SUCCESS != status) { - return GST_STATE_CHANGE_FAILURE; - } - break; - } - case GST_STATE_CHANGE_PLAYING_TO_PAUSED: - { - auto status = gst_hailonet2_deconfigure(self); - if (HAILO_SUCCESS != status) { - return GST_STATE_CHANGE_FAILURE; - } - break; - } - case GST_STATE_CHANGE_READY_TO_NULL: - { - auto status = gst_hailonet2_free(self); - if (HAILO_SUCCESS != status) { - return GST_STATE_CHANGE_FAILURE; - } - break; - } - default: - break; - } - - return ret; -} - -static hailo_status gst_hailonet2_toggle_activation(GstHailoNet2 *self, gboolean old_is_active, gboolean new_is_active) -{ - if (self->props.m_scheduling_algorithm.was_changed() && (HAILO_SCHEDULING_ALGORITHM_NONE != self->props.m_scheduling_algorithm.get())) { - g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported."); - return HAILO_INVALID_OPERATION; - } - - if (self->has_called_activate) { - if (old_is_active && !new_is_active) { - self->configured_infer_model->deactivate(); - } else if (!old_is_active && new_is_active) { - auto status = self->configured_infer_model->activate(); - CHECK_SUCCESS(status); - } else { - g_warning("Trying to change is-active property from %d to %d", old_is_active, new_is_active); - } - } - - self->props.m_is_active = new_is_active; - return HAILO_SUCCESS; -} - -static void gst_hailonet2_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec) -{ - GstHailoNet2 *self = GST_HAILONET2(object); - switch (property_id) { - case PROP_HEF_PATH: - if (self->is_configured) { - g_warning("The network was already configured so changing the HEF path will not take place!"); - break; - } - if (nullptr != self->props.m_hef_path.get()) { - g_free(self->props.m_hef_path.get()); - } - self->props.m_hef_path = g_strdup(g_value_get_string(value)); - break; - case PROP_BATCH_SIZE: - if (self->is_configured) { - g_warning("The network was already configured so changing the batch size will not take place!"); - break; - } - self->props.m_batch_size = static_cast(g_value_get_uint(value)); - break; - case PROP_DEVICE_ID: - if (0 != self->props.m_device_count.get()) { - g_error("device-id and device-count excludes eachother. received device-id=%s, device-count=%d", - g_value_get_string(value), self->props.m_device_count.get()); - break; - } - if (self->is_configured) { - g_warning("The network was already configured so changing the device ID will not take place!"); - break; - } - if (nullptr != self->props.m_device_id.get()) { - g_free(self->props.m_device_id.get()); - } - self->props.m_device_id = g_strdup(g_value_get_string(value)); - break; - case PROP_DEVICE_COUNT: - if (nullptr != self->props.m_device_id.get()) { - g_error("device-id and device-count excludes eachother. received device-id=%s, device-count=%d", - self->props.m_device_id.get(), g_value_get_uint(value)); - break; - } - if (self->is_configured) { - g_warning("The network was already configured so changing the device count will not take place!"); - break; - } - self->props.m_device_count = static_cast(g_value_get_uint(value)); - break; - case PROP_VDEVICE_GROUP_ID: - if (self->is_configured) { - g_warning("The network was already configured so changing the vdevice group ID will not take place!"); - break; - } - if (nullptr != self->props.m_vdevice_group_id.get()) { - g_free(self->props.m_vdevice_group_id.get()); - } - self->props.m_vdevice_group_id = g_strdup(g_value_get_string(value)); - break; - case PROP_IS_ACTIVE: - (void)gst_hailonet2_toggle_activation(self, self->props.m_is_active.get(), g_value_get_boolean(value)); - break; - case PROP_PASS_THROUGH: - self->props.m_pass_through = g_value_get_boolean(value); - break; - case PROP_OUTPUTS_MIN_POOL_SIZE: - if (self->is_configured) { - g_warning("The network has already been configured, the output's minimum pool size cannot be changed!"); - break; - } - self->props.m_outputs_min_pool_size = g_value_get_uint(value); - break; - case PROP_OUTPUTS_MAX_POOL_SIZE: - if (self->is_configured) { - g_warning("The network was already configured so changing the outputs maximum pool size will not take place!"); - break; - } - self->props.m_outputs_max_pool_size = g_value_get_uint(value); - break; - case PROP_SCHEDULING_ALGORITHM: - if (self->is_configured) { - g_warning("The network was already configured so changing the scheduling algorithm will not take place!"); - break; - } - if (self->props.m_is_active.was_changed() && (g_value_get_enum(value) != HAILO_SCHEDULING_ALGORITHM_NONE)) { - g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported."); - break; - } - self->props.m_scheduling_algorithm = static_cast(g_value_get_enum(value)); - break; - case PROP_SCHEDULER_TIMEOUT_MS: - if (self->is_configured) { - g_warning("The network was already configured so changing the scheduling timeout will not take place!"); - break; - } - self->props.m_scheduler_timeout_ms = g_value_get_uint(value); - break; - case PROP_SCHEDULER_THRESHOLD: - if (self->is_configured) { - g_warning("The network was already configured so changing the scheduling threshold will not take place!"); - break; - } - self->props.m_scheduler_threshold = g_value_get_uint(value); - break; - case PROP_SCHEDULER_PRIORITY: - if (self->is_configured) { - g_warning("The network was already configured so changing the scheduling priority will not take place!"); - break; - } - self->props.m_scheduler_priority = static_cast(g_value_get_uint(value)); - break; - case PROP_INPUT_FORMAT_TYPE: - if (self->is_configured) { - g_warning("The network was already configured so changing the format type will not take place!"); - break; - } - self->props.m_input_format_type = static_cast(g_value_get_enum(value)); - break; - case PROP_OUTPUT_FORMAT_TYPE: - if (self->is_configured) { - g_warning("The network was already configured so changing the format type will not take place!"); - break; - } - self->props.m_output_format_type = static_cast(g_value_get_enum(value)); - break; - case PROP_NMS_SCORE_THRESHOLD: - if (self->is_configured) { - g_warning("The network was already configured so changing the score threshold will not take place!"); - break; - } - self->props.m_nms_score_threshold = static_cast(g_value_get_float(value)); - break; - case PROP_NMS_IOU_THRESHOLD: - if (self->is_configured) { - g_warning("The network was already configured so changing the IoU threshold will not take place!"); - break; - } - self->props.m_nms_iou_threshold = static_cast(g_value_get_float(value)); - break; - case PROP_NMS_MAX_PROPOSALS_PER_CLASS: - if (self->is_configured) { - g_warning("The network was already configured so changing the max proposals per class will not take place!"); - break; - } - self->props.m_nms_max_proposals_per_class = static_cast(g_value_get_uint(value)); - break; - case PROP_INPUT_FROM_META: - if (self->is_configured) { - g_warning("The network was already configured so changing the input method will not take place!"); - break; - } - self->props.m_input_from_meta = g_value_get_boolean(value); - break; - case PROP_NO_TRANSFORM: - if (self->is_configured) { - g_warning("The network was already configured so disabling the transformation will not take place!"); - } - self->props.m_no_transform = g_value_get_boolean(value); - break; - case PROP_MULTI_PROCESS_SERVICE: - if (self->is_configured) { - g_warning("The network was already configured so changing the multi-process-service property will not take place!"); - break; - } - self->props.m_multi_process_service = g_value_get_boolean(value); // TODO: do something with this - break; - - // Deprecated - case PROP_VDEVICE_KEY: - if (self->is_configured) { - g_warning("The network was already configured so changing the vdevice key will not take place!"); - break; - } - self->props.m_vdevice_key = static_cast(g_value_get_uint(value)); - break; - default: - G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec); - break; - } -} - -static void gst_hailonet2_get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec) -{ - GstHailoNet2 *self = GST_HAILONET2(object); - switch (property_id) { - case PROP_HEF_PATH: - g_value_set_string(value, self->props.m_hef_path.get()); - break; - case PROP_BATCH_SIZE: - g_value_set_uint(value, self->props.m_batch_size.get()); - break; - case PROP_DEVICE_ID: - g_value_set_string(value, self->props.m_device_id.get()); - break; - case PROP_DEVICE_COUNT: - g_value_set_uint(value, self->props.m_device_count.get()); - break; - case PROP_VDEVICE_GROUP_ID: - g_value_set_string(value, self->props.m_vdevice_group_id.get()); - break; - case PROP_IS_ACTIVE: - g_value_set_boolean(value, self->props.m_is_active.get()); - break; - case PROP_PASS_THROUGH: - g_value_set_boolean(value, self->props.m_pass_through.get()); - break; - case PROP_OUTPUTS_MIN_POOL_SIZE: - g_value_set_uint(value, self->props.m_outputs_min_pool_size.get()); - break; - case PROP_OUTPUTS_MAX_POOL_SIZE: - g_value_set_uint(value, self->props.m_outputs_max_pool_size.get()); - break; - case PROP_SCHEDULING_ALGORITHM: - g_value_set_enum(value, self->props.m_scheduling_algorithm.get()); - break; - case PROP_SCHEDULER_TIMEOUT_MS: - g_value_set_uint(value, self->props.m_scheduler_timeout_ms.get()); - break; - case PROP_SCHEDULER_THRESHOLD: - g_value_set_uint(value, self->props.m_scheduler_threshold.get()); - break; - case PROP_SCHEDULER_PRIORITY: - g_value_set_uint(value, self->props.m_scheduler_priority.get()); - break; - case PROP_INPUT_FORMAT_TYPE: - g_value_set_enum(value, self->props.m_input_format_type.get()); - break; - case PROP_OUTPUT_FORMAT_TYPE: - g_value_set_enum(value, self->props.m_output_format_type.get()); - break; - case PROP_NMS_SCORE_THRESHOLD: - g_value_set_float(value, self->props.m_nms_score_threshold.get()); - break; - case PROP_NMS_IOU_THRESHOLD: - g_value_set_float(value, self->props.m_nms_iou_threshold.get()); - break; - case PROP_NMS_MAX_PROPOSALS_PER_CLASS: - g_value_set_uint(value, self->props.m_nms_max_proposals_per_class.get()); - break; - case PROP_INPUT_FROM_META: - g_value_set_boolean(value, self->props.m_input_from_meta.get()); - break; - case PROP_NO_TRANSFORM: - g_value_set_boolean(value, self->props.m_no_transform.get()); - break; - case PROP_MULTI_PROCESS_SERVICE: - g_value_set_boolean(value, self->props.m_multi_process_service.get()); - break; - - // Deprecated - case PROP_VDEVICE_KEY: - g_value_set_uint(value, self->props.m_vdevice_key.get()); - break; - default: - G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec); - break; - } -} - -static void gst_hailonet2_class_init(GstHailoNet2Class *klass) -{ - GObjectClass *gobject_class = G_OBJECT_CLASS(klass); - GstElementClass *element_class = GST_ELEMENT_CLASS(klass); - - gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&sink_template)); - gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&src_template)); - element_class->change_state = gst_hailonet2_change_state; - - gst_element_class_set_static_metadata(element_class, - "hailonet element", "Hailo/Network", - "Configure and Activate Hailo Network. " - "Supports the \"flush\" signal which blocks until there are no buffers currently processesd in the element. " - "When deactivating a hailonet during runtime (via set_property of \"is-active\" to False), make sure that no frames are being pushed into the " - "hailonet, since this operation waits until there are no frames coming in.", - PLUGIN_AUTHOR); - - gobject_class->set_property = gst_hailonet2_set_property; - gobject_class->get_property = gst_hailonet2_get_property; - g_object_class_install_property(gobject_class, PROP_HEF_PATH, - g_param_spec_string("hef-path", "HEF Path Location", "Location of the HEF file to read", nullptr, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_BATCH_SIZE, - g_param_spec_uint("batch-size", "Inference Batch", "How many frame to send in one batch", - MIN_GSTREAMER_BATCH_SIZE, MAX_GSTREAMER_BATCH_SIZE, HAILO_DEFAULT_BATCH_SIZE, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_OUTPUTS_MIN_POOL_SIZE, - g_param_spec_uint("outputs-min-pool-size", "Outputs Minimun Pool Size", "The minimum amount of buffers to allocate for each output layer", - 0, std::numeric_limits::max(), MIN_OUTPUTS_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_OUTPUTS_MAX_POOL_SIZE, - g_param_spec_uint("outputs-max-pool-size", "Outputs Maximum Pool Size", - "The maximum amount of buffers to allocate for each output layer or 0 for unlimited", 0, std::numeric_limits::max(), - MAX_OUTPUTS_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - - g_object_class_install_property(gobject_class, PROP_DEVICE_ID, - g_param_spec_string("device-id", "Device ID", "Device ID ([]::., same as in lspci command). Excludes device-count.", NULL, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_DEVICE_COUNT, - g_param_spec_uint("device-count", "Number of devices to use", "Number of physical devices to use. Excludes device-id.", HAILO_DEFAULT_DEVICE_COUNT, - std::numeric_limits::max(), HAILO_DEFAULT_DEVICE_COUNT, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_VDEVICE_GROUP_ID, - g_param_spec_string("vdevice-group-id", - "VDevice Group ID to share vdevices across hailonets", - "Used to share VDevices across different hailonet instances", HAILO_DEFAULT_VDEVICE_GROUP_ID, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - - // TODO (HRT-12306): Change is-active behavior - g_object_class_install_property(gobject_class, PROP_IS_ACTIVE, - g_param_spec_boolean("is-active", "Is Network Activated", "Controls whether this element should be active. " - "By default, the hailonet element will not be active unless it is the only one. " - "Setting this property in combination with 'scheduling-algorithm' different than HAILO_SCHEDULING_ALGORITHM_NONE is not supported.", false, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_PASS_THROUGH, - g_param_spec_boolean("pass-through", "Is Element pass-through", "Controls whether the element will perform inference or simply pass buffers through. " - "By default, the hailonet element will not be pass-through. " - "Setting this property to true disables inference, regardless of the scheduler settings.", false, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - - g_object_class_install_property(gobject_class, PROP_SCHEDULING_ALGORITHM, - g_param_spec_enum("scheduling-algorithm", "Scheduling policy for automatic network group switching", "Controls the Model Scheduler algorithm of HailoRT. " - "Gets values from the enum GstHailoSchedulingAlgorithms. " - "Using Model Scheduler algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE, excludes the property 'is-active'. " - "When using the same VDevice across multiple hailonets, all should have the same 'scheduling-algorithm'. ", - GST_TYPE_SCHEDULING_ALGORITHM, HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_SCHEDULER_TIMEOUT_MS, - g_param_spec_uint("scheduler-timeout-ms", "Timeout for for scheduler in ms", "The maximum time period that may pass before getting run time from the scheduler," - " as long as at least one send request has been sent.", - HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, std::numeric_limits::max(), HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_SCHEDULER_THRESHOLD, - g_param_spec_uint("scheduler-threshold", "Frames threshold for scheduler", "The minimum number of send requests required before the hailonet is considered ready to get run time from the scheduler.", - HAILO_DEFAULT_SCHEDULER_THRESHOLD, std::numeric_limits::max(), HAILO_DEFAULT_SCHEDULER_THRESHOLD, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_SCHEDULER_PRIORITY, - g_param_spec_uint("scheduler-priority", "Priority index for scheduler", "When the scheduler will choose the next hailonet to run, higher priority will be prioritized in the selection. " - "Bigger number represent higher priority", - HAILO_SCHEDULER_PRIORITY_MIN, HAILO_SCHEDULER_PRIORITY_MAX, HAILO_SCHEDULER_PRIORITY_NORMAL, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - - g_object_class_install_property(gobject_class, PROP_INPUT_FORMAT_TYPE, - g_param_spec_enum("input-format-type", "Input format type", "Input format type(auto, float32, uint16, uint8). Default value is auto." - "Gets values from the enum GstHailoFormatType. ", - GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_OUTPUT_FORMAT_TYPE, - g_param_spec_enum("output-format-type", "Output format type", "Output format type(auto, float32, uint16, uint8). Default value is auto." - "Gets values from the enum GstHailoFormatType. ", - GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_INPUT_FROM_META, - g_param_spec_boolean("input-from-meta", "Enable input from meta", "Take network input from metadata instead of video frame.", false, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - g_object_class_install_property(gobject_class, PROP_NO_TRANSFORM, - g_param_spec_boolean("no-transform", "Disable transformations", "Format will remain the same as the HW format.", false, - (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - - g_object_class_install_property(gobject_class, PROP_NMS_SCORE_THRESHOLD, - g_param_spec_float("nms-score-threshold", "NMS score threshold", "Threshold used for filtering out candidates. Any box with score::max(), 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - - g_object_class_install_property(gobject_class, PROP_MULTI_PROCESS_SERVICE, - g_param_spec_boolean("multi-process-service", "Should run over HailoRT service", "Controls wether to run HailoRT over its service. " - "To use this property, the service should be active and scheduling-algorithm should be set. Defaults to false.", - HAILO_DEFAULT_MULTI_PROCESS_SERVICE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - - // Deprecated - g_object_class_install_property(gobject_class, PROP_VDEVICE_KEY, - g_param_spec_uint("vdevice-key", - "Deprecated: Indicate whether to re-use or re-create vdevice", - "Deprecated: Use vdevice-group-id instead. Relevant only when 'device-count' is passed. If not passed, the created vdevice will be unique to this hailonet." \ - "if multiple hailonets share 'vdevice-key' and 'device-count', the created vdevice will be shared between those hailonets", - MIN_VALID_VDEVICE_KEY, std::numeric_limits::max(), MIN_VALID_VDEVICE_KEY, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); - - // See information about the "flush" signal in the element description - g_signal_new( - "flush", - GST_TYPE_HAILONET2, - G_SIGNAL_ACTION, - 0, nullptr, nullptr, nullptr, G_TYPE_NONE, 0 - ); -} - -static void gst_hailonet2_push_buffer_to_thread(GstHailoNet2 *self, GstBuffer *buffer) -{ - { - std::unique_lock lock(self->thread_queue_mutex); - self->thread_cv.wait(lock, [self] () { - return self->buffers_in_thread_queue < self->props.m_outputs_max_pool_size.get(); - }); - gst_queue_array_push_tail(self->thread_queue, buffer); - self->buffers_in_thread_queue++; - } - self->thread_cv.notify_all(); -} - -// TODO (HRT-12490): reduce code duplication with gst_hailonet2_async_infer -static hailo_status gst_hailonet2_async_infer_multi_input(GstHailoNet2 *self, GstBuffer *buffer, const std::unordered_map &input_buffers) -{ - { - std::unique_lock lock(self->input_queue_mutex); - for (auto name : self->infer_model->get_input_names()) - { - auto status = self->infer_bindings.input(name)->set_buffer(MemoryView(input_buffers.at(name), - self->infer_model->input(name)->get_frame_size())); - CHECK_SUCCESS(status); - } - - gst_queue_array_push_tail(self->input_queue, buffer); - } - - struct TensorInfo { - GstBuffer *buffer; - GstMapInfo buffer_info; - }; - std::unordered_map tensors; - for (auto &output : self->infer_model->outputs()) { - GstBuffer *output_buffer = nullptr; - GstFlowReturn flow_result = gst_buffer_pool_acquire_buffer(self->output_buffer_pools[output.name()], &output_buffer, nullptr); - if (GST_FLOW_FLUSHING == flow_result) { - return HAILO_STREAM_ABORTED_BY_USER; - } - CHECK(GST_FLOW_OK == flow_result, HAILO_INTERNAL_FAILURE, "Acquire buffer failed!"); - - GstMapInfo buffer_info; - gboolean result = gst_buffer_map(output_buffer, &buffer_info, GST_MAP_WRITE); - CHECK(result, HAILO_INTERNAL_FAILURE, "Failed mapping buffer!"); - - auto status = self->infer_bindings.output(output.name())->set_buffer(MemoryView(buffer_info.data, buffer_info.size)); - CHECK_SUCCESS(status); - - tensors[output.name()] = {output_buffer, buffer_info}; - } - - auto status = self->configured_infer_model->wait_for_async_ready(WAIT_FOR_ASYNC_READY_TIMEOUT); - CHECK_SUCCESS(status); - - auto job = self->configured_infer_model->run_async(self->infer_bindings, [self, tensors] (const AsyncInferCompletionInfo &/*completion_info*/) { - GstBuffer *buffer = nullptr; - { - std::unique_lock lock(self->input_queue_mutex); - buffer = static_cast(gst_queue_array_pop_head(self->input_queue)); - } - - for (auto &output : self->infer_model->outputs()) { - auto info = tensors.at(output.name()); - gst_buffer_unmap(info.buffer, &info.buffer_info); - - GstHailoTensorMeta *buffer_meta = GST_TENSOR_META_ADD(info.buffer); - buffer_meta->info = self->output_vstream_infos[output.name()]; - - (void)gst_buffer_add_parent_buffer_meta(buffer, info.buffer); - gst_buffer_unref(info.buffer); - } - - { - std::unique_lock lock(self->flush_mutex); - self->ongoing_frames--; - } - self->flush_cv.notify_all(); - - { - std::unique_lock lock(self->thread_queue_mutex); - gst_queue_array_push_tail(self->thread_queue, buffer); - self->buffers_in_thread_queue++; - } - self->thread_cv.notify_all(); - }); - CHECK_EXPECTED_AS_STATUS(job); - job->detach(); - - return HAILO_SUCCESS; -} - -static hailo_status gst_hailonet2_async_infer(GstHailoNet2 *self, GstBuffer * buffer, hailo_pix_buffer_t pix_buffer) -{ - { - std::unique_lock lock(self->input_queue_mutex); - auto status = self->infer_bindings.input()->set_pix_buffer(pix_buffer); - CHECK_SUCCESS(status); - - gst_queue_array_push_tail(self->input_queue, buffer); - } - - struct TensorInfo { - GstBuffer *buffer; - GstMapInfo buffer_info; - }; - std::unordered_map tensors; - for (auto &output : self->infer_model->outputs()) { - GstBuffer *output_buffer = nullptr; - GstFlowReturn flow_result = gst_buffer_pool_acquire_buffer(self->output_buffer_pools[output.name()], &output_buffer, nullptr); - if (GST_FLOW_FLUSHING == flow_result) { - return HAILO_STREAM_ABORTED_BY_USER; - } - CHECK(GST_FLOW_OK == flow_result, HAILO_INTERNAL_FAILURE, "Acquire buffer failed!"); - - GstMapInfo buffer_info; - gboolean result = gst_buffer_map(output_buffer, &buffer_info, GST_MAP_WRITE); - CHECK(result, HAILO_INTERNAL_FAILURE, "Failed mapping buffer!"); - - auto status = self->infer_bindings.output(output.name())->set_buffer(MemoryView(buffer_info.data, buffer_info.size)); - CHECK_SUCCESS(status); - - tensors[output.name()] = {output_buffer, buffer_info}; - } - - auto status = self->configured_infer_model->wait_for_async_ready(WAIT_FOR_ASYNC_READY_TIMEOUT); - CHECK_SUCCESS(status); - - auto job = self->configured_infer_model->run_async(self->infer_bindings, [self, tensors] (const AsyncInferCompletionInfo &/*completion_info*/) { - GstBuffer *buffer = nullptr; - { - std::unique_lock lock(self->input_queue_mutex); - buffer = static_cast(gst_queue_array_pop_head(self->input_queue)); - } - - for (auto &output : self->infer_model->outputs()) { - auto info = tensors.at(output.name()); - gst_buffer_unmap(info.buffer, &info.buffer_info); - - GstHailoTensorMeta *buffer_meta = GST_TENSOR_META_ADD(info.buffer); - buffer_meta->info = self->output_vstream_infos[output.name()]; - - (void)gst_buffer_add_parent_buffer_meta(buffer, info.buffer); - gst_buffer_unref(info.buffer); - } - - { - std::unique_lock lock(self->flush_mutex); - self->ongoing_frames--; - } - self->flush_cv.notify_all(); - - gst_hailonet2_push_buffer_to_thread(self, buffer); - }); - CHECK_EXPECTED_AS_STATUS(job); - job->detach(); - - return HAILO_SUCCESS; -} - -static Expected gst_hailonet2_construct_pix_buffer(GstHailoNet2 *self, GstBuffer *buffer) -{ - GstVideoFrame frame; - auto result = gst_video_frame_map(&frame, &self->input_frame_info, buffer, - static_cast(GST_MAP_READ | GST_VIDEO_FRAME_MAP_FLAG_NO_REF)); - CHECK_AS_EXPECTED(result,HAILO_INTERNAL_FAILURE, "gst_video_frame_map failed!"); - - hailo_pix_buffer_t pix_buffer = {}; - pix_buffer.index = 0; - pix_buffer.number_of_planes = GST_VIDEO_INFO_N_PLANES(&frame.info); - - for (uint32_t plane_index = 0; plane_index < pix_buffer.number_of_planes; plane_index++) { - pix_buffer.planes[plane_index].bytes_used = GST_VIDEO_INFO_PLANE_STRIDE(&frame.info, plane_index) * GST_VIDEO_INFO_COMP_HEIGHT(&frame.info, plane_index); - pix_buffer.planes[plane_index].plane_size = GST_VIDEO_INFO_PLANE_STRIDE(&frame.info, plane_index) * GST_VIDEO_INFO_COMP_HEIGHT(&frame.info, plane_index); - pix_buffer.planes[plane_index].user_ptr = GST_VIDEO_FRAME_PLANE_DATA(&frame, plane_index); - } - - gst_video_frame_unmap(&frame); - return pix_buffer; -} - -static Expected> gst_hailonet2_read_input_buffers_from_meta(GstHailoNet2 *self, GstBuffer *buffer) -{ - std::unordered_map input_buffer_metas; - gpointer state = NULL; - GstMeta *meta; - - while ((meta = gst_buffer_iterate_meta_filtered(buffer, &state, GST_PARENT_BUFFER_META_API_TYPE))) { - GstParentBufferMeta *parent_buffer_meta = reinterpret_cast(meta); - GstMapInfo info; - gboolean map_succeeded = gst_buffer_map(parent_buffer_meta->buffer, &info, GST_MAP_READ); - if (!map_succeeded) { - // Failed to map, this buffer might not have a GstHailoTensorMeta, continue - continue; - } - GstHailoTensorMeta *tensor_meta = GST_TENSOR_META_GET(parent_buffer_meta->buffer); - if (!tensor_meta) { - // Not a tensor meta (this buffer is not a tensor), unmap and continue - gst_buffer_unmap(parent_buffer_meta->buffer, &info); - continue; - } - const hailo_vstream_info_t vstream_info = tensor_meta->info; - input_buffer_metas[vstream_info.name] = static_cast(info.data); - gst_buffer_unmap(parent_buffer_meta->buffer, &info); - } - CHECK_AS_EXPECTED(!input_buffer_metas.empty(),HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer!"); - - for (auto &input : self->infer_model->inputs()) { - CHECK_AS_EXPECTED(input_buffer_metas.find(input.name()) != input_buffer_metas.end(), - HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer for output: %s", input.name().c_str()); - } - - return input_buffer_metas; -} - -static GstFlowReturn gst_hailonet2_chain(GstPad * /*pad*/, GstObject * parent, GstBuffer * buffer) -{ - GstHailoNet2 *self = GST_HAILONET2(parent); - std::unique_lock lock(self->infer_mutex); - - if (self->props.m_pass_through.get() || !self->props.m_is_active.get()) { - gst_hailonet2_push_buffer_to_thread(self, buffer); - return GST_FLOW_OK; - } - - { - std::unique_lock lock(self->flush_mutex); - self->ongoing_frames++; - } - - if (self->props.m_input_from_meta.get()) { - auto input_buffer_metas = gst_hailonet2_read_input_buffers_from_meta(self, buffer); - if (!input_buffer_metas) { - return GST_FLOW_ERROR; - } - auto status = gst_hailonet2_async_infer_multi_input(self, buffer, input_buffer_metas.value()); - if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) { - return GST_FLOW_ERROR; - } - } else { - auto pix_buffer = gst_hailonet2_construct_pix_buffer(self, buffer); - if (!pix_buffer) { - return GST_FLOW_ERROR; - } - auto status = gst_hailonet2_async_infer(self, buffer, pix_buffer.value()); - if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) { - return GST_FLOW_ERROR; - } - } - - return GST_FLOW_OK; -} - -static hailo_status gst_hailonet2_init_infer_model(GstHailoNet2 * self) -{ - auto vdevice_params = HailoRTDefaults::get_vdevice_params(); - - hailo_device_id_t device_id = {0}; - if (self->props.m_device_id.was_changed()) { - auto expected_device_id = HailoRTCommon::to_device_id(self->props.m_device_id.get()); - CHECK_EXPECTED_AS_STATUS(expected_device_id); - device_id = std::move(expected_device_id.release()); - - vdevice_params.device_ids = &device_id; - } - if (self->props.m_device_count.was_changed()) { - vdevice_params.device_count = self->props.m_device_count.get(); - } - if (self->props.m_vdevice_group_id.was_changed()) { - vdevice_params.group_id = self->props.m_vdevice_group_id.get(); - } else if (self->props.m_vdevice_key.was_changed()) { - auto key_str = std::to_string(self->props.m_vdevice_key.get()); - vdevice_params.group_id = key_str.c_str(); - } - if (self->props.m_scheduling_algorithm.was_changed()) { - vdevice_params.scheduling_algorithm = self->props.m_scheduling_algorithm.get(); - } - - auto vdevice = VDevice::create(vdevice_params); - CHECK_EXPECTED_AS_STATUS(vdevice); - self->vdevice = std::move(vdevice.release()); - - auto infer_model = self->vdevice->create_infer_model(self->props.m_hef_path.get()); - CHECK_EXPECTED_AS_STATUS(infer_model); - self->infer_model = infer_model.release(); - - return HAILO_SUCCESS; -} - -static const gchar *gst_hailonet2_get_format_string(const InferModel::InferStream &input) -{ - switch (input.format().order) { - case HAILO_FORMAT_ORDER_RGB4: - case HAILO_FORMAT_ORDER_NHWC: - if (input.shape().features == RGBA_FEATURES_SIZE) { - return "RGBA"; - } - if (input.shape().features == GRAY8_FEATURES_SIZE) { - return "GRAY8"; - } - /* Fallthrough */ - case HAILO_FORMAT_ORDER_NHCW: - case HAILO_FORMAT_ORDER_FCR: - case HAILO_FORMAT_ORDER_F8CR: - if (input.shape().features == GRAY8_FEATURES_SIZE) { - return "GRAY8"; - } - CHECK(RGB_FEATURES_SIZE == input.shape().features, nullptr, - "Features of input %s is not %d for RGB format! (features=%d)", input.name().c_str(), RGB_FEATURES_SIZE, - input.shape().features); - return "RGB"; - case HAILO_FORMAT_ORDER_YUY2: - CHECK(YUY2_FEATURES_SIZE == input.shape().features, nullptr, - "Features of input %s is not %d for YUY2 format! (features=%d)", input.name().c_str(), YUY2_FEATURES_SIZE, - input.shape().features); - return "YUY2"; - case HAILO_FORMAT_ORDER_NV12: - CHECK(NV12_FEATURES_SIZE == input.shape().features, nullptr, - "Features of input %s is not %d for NV12 format! (features=%d)", input.name().c_str(), NV12_FEATURES_SIZE, - input.shape().features); - return "NV12"; - case HAILO_FORMAT_ORDER_NV21: - CHECK(NV21_FEATURES_SIZE == input.shape().features, nullptr, - "Features of input %s is not %d for NV21 format! (features=%d)", input.name().c_str(), NV21_FEATURES_SIZE, - input.shape().features); - return "NV21"; - case HAILO_FORMAT_ORDER_I420: - CHECK(I420_FEATURES_SIZE == input.shape().features, nullptr, - "Features of input %s is not %d for I420 format! (features=%d)", input.name().c_str(), I420_FEATURES_SIZE, - input.shape().features); - return "I420"; - default: - ERROR("Input %s has an unsupported format order! order = %d\n", input.name().c_str(), input.format().order); - return nullptr; - } -} - -static uint32_t get_height_by_order(uint32_t original_height, hailo_format_order_t order) -{ - switch (order) { - case HAILO_FORMAT_ORDER_NV12: - case HAILO_FORMAT_ORDER_NV21: - return original_height * 2; - default: - break; - } - return original_height; -} - -static GstCaps *gst_hailonet2_get_caps(GstHailoNet2 *self) -{ - if (nullptr == self->vdevice) { - auto status = gst_hailonet2_init_infer_model(self); - if (HAILO_SUCCESS != status) { - return nullptr; - } - } - - // TODO (HRT-12491): check caps based on incoming metadata - if (self->props.m_input_from_meta.get()) { - GstCaps *new_caps = gst_caps_new_any(); - self->input_caps = new_caps; - return gst_caps_copy(new_caps); - } - - auto input = self->infer_model->input(); - if (!input) { - ERROR("Getting input has failed\n"); - return nullptr; - } - - const gchar *format = gst_hailonet2_get_format_string(input.value()); - if (nullptr == format) { - return nullptr; - } - - GstCaps *new_caps = gst_caps_new_simple("video/x-raw", - "format", G_TYPE_STRING, format, - "width", G_TYPE_INT, input->shape().width, - "height", G_TYPE_INT, get_height_by_order(input->shape().height, input->format().order), - nullptr); - - if (!gst_video_info_from_caps(&self->input_frame_info, new_caps)) { - ERROR("gst_video_info_from_caps failed\n"); - return nullptr; - } - - self->input_caps = new_caps; - return gst_caps_copy(new_caps); -} - -static gboolean gst_hailonet2_handle_sink_query(GstPad * pad, GstObject * parent, GstQuery * query) -{ - GstHailoNet2 *self = GST_HAILONET2(parent); - switch (GST_QUERY_TYPE (query)) { - case GST_QUERY_CAPS: - { - GstCaps *caps = gst_hailonet2_get_caps(self); - gst_query_set_caps_result(query, caps); - gst_caps_unref(caps); - return TRUE; - } - case GST_QUERY_ALLOCATION: - { - // We implement this to make sure buffers are contiguous in memory - gst_query_add_allocation_meta(query, GST_VIDEO_META_API_TYPE, NULL); - return gst_pad_query_default(pad, parent, query); - } - default: - return gst_pad_query_default(pad, parent, query); - } -} - -static gboolean gst_hailonet2_handle_caps_event(GstHailoNet2 *self, GstCaps */*caps*/) -{ - if (nullptr == self->input_caps) { - return FALSE; - } - - GstCaps *caps_result = gst_pad_peer_query_caps(self->srcpad, self->input_caps); - if (gst_caps_is_empty(caps_result)) { - return FALSE; - } - - if (gst_caps_is_any(caps_result)) { - gst_caps_unref(caps_result); - return TRUE; - } - - GstCaps *outcaps = gst_caps_fixate(caps_result); - gboolean res = gst_pad_set_caps(self->srcpad, outcaps); - gst_caps_unref(outcaps); - return res; -} - -static gboolean gst_hailonet2_sink_event(GstPad *pad, GstObject *parent, GstEvent *event) -{ - GstHailoNet2 *self = GST_HAILONET2(parent); - switch (GST_EVENT_TYPE(event)) { - case GST_EVENT_CAPS: - { - GstCaps *caps; - gst_event_parse_caps(event, &caps); - auto result = gst_hailonet2_handle_caps_event(self, caps); - gst_event_unref(event); - return result; - } - case GST_EVENT_EOS: - self->has_got_eos = true; - return gst_pad_push_event(self->srcpad, event); - default: - return gst_pad_event_default(pad, parent, event); - } -} - -static GstPadProbeReturn gst_hailonet2_sink_probe(GstPad */*pad*/, GstPadProbeInfo */*info*/, gpointer user_data) -{ - GstHailoNet2 *self = static_cast(user_data); - auto status = gst_hailonet2_configure(self); - if (HAILO_SUCCESS != status) { - return GST_PAD_PROBE_DROP; - } - - status = gst_hailonet2_allocate_infer_resources(self); - if (HAILO_SUCCESS != status) { - return GST_PAD_PROBE_DROP; - } - - if (HAILO_SCHEDULING_ALGORITHM_NONE != self->props.m_scheduling_algorithm.get()) { - self->props.m_is_active = true; - return GST_PAD_PROBE_REMOVE; - } - - if ((1 == hailonet_count) && (!self->props.m_is_active.was_changed())) { - self->props.m_is_active = true; - } - - if (self->props.m_is_active.get()) { - status = self->configured_infer_model->activate(); - if (HAILO_SUCCESS != status) { - return GST_PAD_PROBE_DROP; - } - } - - self->has_called_activate = true; - return GST_PAD_PROBE_REMOVE; -} - -static void gst_hailonet2_flush_callback(GstHailoNet2 *self, gpointer /*data*/) -{ - std::unique_lock lock(self->flush_mutex); - self->flush_cv.wait(lock, [self] () { - return 0 == self->ongoing_frames; - }); -} - -static void gst_hailonet2_init(GstHailoNet2 *self) -{ - if (!do_versions_match(GST_ELEMENT(self))) { - return; - } - - self->sinkpad = gst_pad_new_from_static_template(&sink_template, "sink"); - gst_pad_set_chain_function(self->sinkpad, gst_hailonet2_chain); - gst_pad_set_query_function(self->sinkpad, gst_hailonet2_handle_sink_query); - gst_pad_set_event_function(self->sinkpad, GST_DEBUG_FUNCPTR(gst_hailonet2_sink_event)); - gst_element_add_pad(GST_ELEMENT (self), self->sinkpad); - gst_pad_add_probe(self->sinkpad, GST_PAD_PROBE_TYPE_BUFFER, static_cast(gst_hailonet2_sink_probe), self, nullptr); - - self->srcpad = gst_pad_new_from_static_template(&src_template, "src"); - gst_element_add_pad(GST_ELEMENT (self), self->srcpad); - - self->input_caps = nullptr; - self->input_queue = nullptr; - self->thread_queue = nullptr; - self->is_thread_running = false; - self->has_got_eos = false; - self->buffers_in_thread_queue = 0; - self->props = HailoNet2Properties(); - self->vdevice = nullptr; - self->is_configured = false; - self->has_called_activate = false; - self->ongoing_frames = 0; - - gchar *parent_name = gst_object_get_name(GST_OBJECT(self)); - gchar *name = g_strconcat(parent_name, ":hailo_allocator", NULL); - g_free(parent_name); - - self->allocator = GST_HAILO_ALLOCATOR(g_object_new(GST_TYPE_HAILO_ALLOCATOR, "name", name, NULL)); - gst_object_ref_sink(self->allocator); - g_free(name); - - g_signal_connect(self, "flush", G_CALLBACK(gst_hailonet2_flush_callback), nullptr); - - hailonet_count++; -} diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.hpp deleted file mode 100644 index 119e7a86..00000000 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.hpp +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2021-2023 Hailo Technologies Ltd. All rights reserved. - * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt) - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ -#ifndef _GST_HAILONET2_HPP_ -#define _GST_HAILONET2_HPP_ - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#include -#pragma GCC diagnostic pop - -#include -#include - -#include "hailo/infer_model.hpp" -#include "common.hpp" - -#include -#include -#include -#include - -using namespace hailort; - -G_BEGIN_DECLS - -#define GST_TYPE_HAILO_ALLOCATOR (gst_hailo_allocator_get_type()) -#define GST_HAILO_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_HAILO_ALLOCATOR, GstHailoAllocator)) -#define GST_HAILO_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_HAILO_ALLOCATOR, GstHailoAllocatorClass)) -#define GST_IS_HAILO_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_HAILO_ALLOCATOR)) -#define GST_IS_HAILO_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_HAILO_ALLOCATOR)) - -#define MIN_OUTPUTS_POOL_SIZE (MAX_GSTREAMER_BATCH_SIZE) -#define MAX_OUTPUTS_POOL_SIZE (MAX_GSTREAMER_BATCH_SIZE * 4) - -struct GstHailoAllocator -{ - GstAllocator parent; - std::unordered_map buffers; -}; - -struct GstHailoAllocatorClass -{ - GstAllocatorClass parent; -}; - -GType gst_hailo_allocator_get_type(void); - -struct HailoNet2Properties final -{ -public: - HailoNet2Properties() : m_hef_path(nullptr), m_batch_size(HAILO_DEFAULT_BATCH_SIZE), - m_device_id(nullptr), m_device_count(0), m_vdevice_group_id(nullptr), m_is_active(false), m_pass_through(false), - m_outputs_min_pool_size(MIN_OUTPUTS_POOL_SIZE), m_outputs_max_pool_size(MAX_OUTPUTS_POOL_SIZE), - m_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN), m_scheduler_timeout_ms(HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS), - m_scheduler_threshold(HAILO_DEFAULT_SCHEDULER_THRESHOLD), m_scheduler_priority(HAILO_SCHEDULER_PRIORITY_NORMAL), - m_input_format_type(HAILO_FORMAT_TYPE_AUTO), m_output_format_type(HAILO_FORMAT_TYPE_AUTO), - m_nms_score_threshold(0), m_nms_iou_threshold(0), m_nms_max_proposals_per_class(0), m_input_from_meta(false), - m_no_transform(false), m_multi_process_service(HAILO_DEFAULT_MULTI_PROCESS_SERVICE), - m_vdevice_key(DEFAULT_VDEVICE_KEY) - {} - - void free_strings() - { - if (m_hef_path.was_changed()) { - g_free(m_hef_path.get()); - } - if (m_device_id.was_changed()) { - g_free(m_device_id.get()); - } - if (m_vdevice_group_id.was_changed()) { - g_free(m_vdevice_group_id.get()); - } - } - - HailoElemProperty m_hef_path; - HailoElemProperty m_batch_size; - HailoElemProperty m_device_id; - HailoElemProperty m_device_count; - HailoElemProperty m_vdevice_group_id; - HailoElemProperty m_is_active; - HailoElemProperty m_pass_through; - HailoElemProperty m_outputs_min_pool_size; - HailoElemProperty m_outputs_max_pool_size; - HailoElemProperty m_scheduling_algorithm; - HailoElemProperty m_scheduler_timeout_ms; - HailoElemProperty m_scheduler_threshold; - HailoElemProperty m_scheduler_priority; - HailoElemProperty m_input_format_type; - HailoElemProperty m_output_format_type; - HailoElemProperty m_nms_score_threshold; - HailoElemProperty m_nms_iou_threshold; - HailoElemProperty m_nms_max_proposals_per_class; - HailoElemProperty m_input_from_meta; - HailoElemProperty m_no_transform; - HailoElemProperty m_multi_process_service; - - // Deprecated - HailoElemProperty m_vdevice_key; -}; - -typedef struct _GstHailoNet2 { - GstElement element; - GstPad *sinkpad; - GstPad *srcpad; - GstQueueArray *input_queue; - GstQueueArray *thread_queue; - std::atomic_uint32_t buffers_in_thread_queue; - std::thread thread; - HailoNet2Properties props; - GstCaps *input_caps; - std::atomic_bool is_thread_running; - std::atomic_bool has_got_eos; - - std::unique_ptr vdevice; - std::shared_ptr infer_model; - std::shared_ptr configured_infer_model; - ConfiguredInferModel::Bindings infer_bindings; - bool is_configured; - std::mutex infer_mutex; - - bool has_called_activate; - std::atomic_uint32_t ongoing_frames; - std::condition_variable flush_cv; - std::mutex flush_mutex; - - GstVideoInfo input_frame_info; - - GstHailoAllocator *allocator; - std::unordered_map output_buffer_pools; - std::unordered_map output_vstream_infos; - - std::mutex input_queue_mutex; - std::mutex thread_queue_mutex; - std::condition_variable thread_cv; -} GstHailoNet2; - -typedef struct _GstHailoNet2Class { - GstElementClass parent_class; -} GstHailoNet2Class; - -#define GST_TYPE_HAILONET2 (gst_hailonet2_get_type()) -#define GST_HAILONET2(obj) \ - (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_HAILONET2,GstHailoNet2)) -#define GST_HAILONET2_CLASS(klass) \ - (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_HAILONET2,GstHailoNet2Class)) -#define GST_IS_HAILONET2(obj) \ - (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_HAILONET2)) -#define GST_IS_HAILONET2_CLASS(klass) \ - (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_HAILONET2)) - -GType gst_hailonet2_get_type (void); - -G_END_DECLS - -#endif /* _GST_HAILONET2_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp index 4ae413ee..bcbc350b 100644 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp @@ -17,10 +17,10 @@ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301, USA. */ +#include "sync_gsthailonet.hpp" +#include "sync_gst_hailosend.hpp" +#include "sync_gst_hailorecv.hpp" #include "gsthailonet.hpp" -#include "gsthailosend.hpp" -#include "gsthailorecv.hpp" -#include "gsthailonet2.hpp" #include "gsthailodevicestats.hpp" #include "metadata/tensor_meta.hpp" @@ -29,11 +29,11 @@ static gboolean plugin_init(GstPlugin *plugin) (void)gst_tensor_meta_get_info(); (void)gst_tensor_meta_api_get_type(); - return gst_element_register(plugin, "hailonet", GST_RANK_PRIMARY, GST_TYPE_HAILONET) && + return gst_element_register(plugin, "synchailonet", GST_RANK_PRIMARY, GST_TYPE_SYNC_HAILONET) && gst_element_register(plugin, "hailodevicestats", GST_RANK_PRIMARY, GST_TYPE_HAILODEVICESTATS) && gst_element_register(nullptr, "hailosend", GST_RANK_PRIMARY, GST_TYPE_HAILOSEND) && gst_element_register(nullptr, "hailorecv", GST_RANK_PRIMARY, GST_TYPE_HAILORECV) && - gst_element_register(plugin, "hailonet2", GST_RANK_PRIMARY, GST_TYPE_HAILONET2); + gst_element_register(plugin, "hailonet", GST_RANK_PRIMARY, GST_TYPE_HAILONET); } GST_PLUGIN_DEFINE(GST_VERSION_MAJOR, GST_VERSION_MINOR, hailo, "hailo gstreamer plugin", plugin_init, VERSION, diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp index e2b0d085..f5ab1db3 100644 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp @@ -302,7 +302,7 @@ Expected> NetworkGroupConfigManager::con GST_CHECK_EXPECTED(infos, element, RESOURCE, "Failed getting network infos"); if ((infos.release().size() > 1) || (scheduling_algorithm == HAILO_SCHEDULING_ALGORITHM_NONE)) { // If cng was already configured - // But hailonet is not running all networks in the cng (or if not using scheduler) - + // But sync_hailonet is not running all networks in the cng (or if not using scheduler) - // Do not use multiplexer! return found_cng; } diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp index c9897143..d205814c 100644 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp @@ -30,7 +30,7 @@ using device_id_t = std::string; using network_name_t = std::string; -using hailonet_name_t = std::string; +using sync_hailonet_name_t = std::string; class NetworkGroupConfigManager final { @@ -52,7 +52,7 @@ class NetworkGroupConfigManager final // TODO: change this map to store only the shared network_groups (used by multiple hailonets with the same vdevices) std::unordered_map> m_configured_net_groups; - std::unordered_map> m_configured_networks; + std::unordered_map> m_configured_networks; std::mutex m_mutex; }; diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailorecv.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.cpp similarity index 98% rename from hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailorecv.cpp rename to hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.cpp index 322545a5..d678fa98 100644 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailorecv.cpp +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.cpp @@ -17,8 +17,8 @@ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301, USA. */ -#include "gsthailorecv.hpp" -#include "gsthailonet.hpp" +#include "sync_gst_hailorecv.hpp" +#include "sync_gsthailonet.hpp" #include "common.hpp" #include "network_group_handle.hpp" #include "metadata/hailo_buffer_flag_meta.hpp" @@ -182,7 +182,7 @@ GstFlowReturn HailoRecvImpl::handle_frame(GstVideoFilter */*filter*/, GstVideoFr switch (meta->flag) { case BUFFER_FLAG_FLUSH: { - hailo_status status = GST_HAILONET(GST_ELEMENT_PARENT(m_element))->impl->signal_was_flushed_event(); + hailo_status status = GST_SYNC_HAILONET(GST_ELEMENT_PARENT(m_element))->impl->signal_was_flushed_event(); GST_CHECK(HAILO_SUCCESS == status, GST_FLOW_ERROR, m_element, RESOURCE, "Signalling was flushed event has failed, status = %d", status); return GST_BASE_TRANSFORM_FLOW_DROPPED; } @@ -195,7 +195,7 @@ GstFlowReturn HailoRecvImpl::handle_frame(GstVideoFilter */*filter*/, GstVideoFr } } - if (!GST_HAILONET(GST_ELEMENT_PARENT(m_element))->impl->is_active()) { + if (!GST_SYNC_HAILONET(GST_ELEMENT_PARENT(m_element))->impl->is_active()) { return GST_FLOW_OK; } @@ -235,7 +235,7 @@ hailo_status HailoRecvImpl::read_from_vstreams(bool should_print_latency) GST_DEBUG("%s latency: %f milliseconds", output_info.vstream().name().c_str(), latency.count()); } gst_buffer_unmap(*buffer, &buffer_info); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { return status; } GST_CHECK_SUCCESS(status, m_element, STREAM, "Reading from vstream failed, status = %d", status); diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailorecv.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.hpp similarity index 100% rename from hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailorecv.hpp rename to hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.hpp diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.cpp similarity index 97% rename from hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.cpp rename to hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.cpp index 1c4f536c..ecc1f122 100644 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.cpp +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.cpp @@ -17,8 +17,8 @@ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301, USA. */ -#include "gsthailosend.hpp" -#include "gsthailonet.hpp" +#include "sync_gst_hailosend.hpp" +#include "sync_gsthailonet.hpp" #include "metadata/hailo_buffer_flag_meta.hpp" #include @@ -87,7 +87,7 @@ Expected> HailoSendImpl::create(GstHailoSend *ele return ptr; } -HailoSendImpl::HailoSendImpl(GstHailoSend *element) : m_element(element), m_hailonet(nullptr), m_props(), +HailoSendImpl::HailoSendImpl(GstHailoSend *element) : m_element(element), m_sync_hailonet(nullptr), m_props(), m_batch_size(HAILO_DEFAULT_BATCH_SIZE), m_last_frame_pts(0) { GST_DEBUG_CATEGORY_INIT(gst_hailosend_debug_category, "hailosend", 0, "debug category for hailosend element"); @@ -136,13 +136,14 @@ GstFlowReturn HailoSendImpl::handle_frame(GstVideoFilter */*filter*/, GstVideoFr assert(nullptr != frame); m_last_frame_pts = GST_BUFFER_TIMESTAMP(frame->buffer); - if (!GST_HAILONET(GST_ELEMENT_PARENT(m_element))->impl->is_active()) { + if (!GST_SYNC_HAILONET(GST_ELEMENT_PARENT(m_element))->impl->is_active()) { GstHailoBufferFlagMeta *meta = GST_HAILO_BUFFER_FLAG_META_ADD(frame->buffer); meta->flag = BUFFER_FLAG_SKIP; return GST_FLOW_OK; } hailo_pix_buffer_t pix_buffer = {}; + pix_buffer.memory_type = HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR; pix_buffer.index = 0; pix_buffer.number_of_planes = GST_VIDEO_INFO_N_PLANES(&frame->info); for (uint32_t plane_index = 0; plane_index < pix_buffer.number_of_planes; plane_index++) { @@ -174,7 +175,7 @@ hailo_status HailoSendImpl::write_to_vstreams(const hailo_pix_buffer_t &pix_buff { for (auto &in_vstream : m_input_vstreams) { auto status = in_vstream.write(pix_buffer); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { return status; } GST_CHECK_SUCCESS(status, m_element, STREAM, "Failed writing to input vstream %s, status = %d", in_vstream.name().c_str(), status); @@ -201,9 +202,9 @@ GstCaps *HailoSendImpl::get_caps(GstBaseTransform */*trans*/, GstPadDirection /* if (0 == m_input_vstream_infos.size()) { // Init here because it is guaranteed that we have a parent element - m_hailonet = GST_HAILONET(GST_ELEMENT_PARENT(m_element)); + m_sync_hailonet = GST_SYNC_HAILONET(GST_ELEMENT_PARENT(m_element)); - hailo_status status = m_hailonet->impl->set_hef(); + hailo_status status = m_sync_hailonet->impl->set_hef(); if (HAILO_SUCCESS != status) { return NULL; } diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.hpp similarity index 97% rename from hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.hpp rename to hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.hpp index 33a4d7a3..9b84c68f 100644 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.hpp +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.hpp @@ -22,7 +22,7 @@ #include "common.hpp" #include "network_group_handle.hpp" -#include "gsthailonet.hpp" +#include "sync_gsthailonet.hpp" #include #include @@ -92,7 +92,7 @@ class HailoSendImpl final hailo_status write_to_vstreams(const hailo_pix_buffer_t &pix_buffer); GstHailoSend *m_element; - GstHailoNet *m_hailonet; + GstSyncHailoNet *m_sync_hailonet; HailoSendProperties m_props; std::vector m_input_vstream_infos; uint32_t m_batch_size; diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.cpp new file mode 100644 index 00000000..dbfed03c --- /dev/null +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.cpp @@ -0,0 +1,992 @@ +/* + * Copyright (c) 2021-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ +#include "sync_gsthailonet.hpp" +#include "sync_gst_hailosend.hpp" +#include "sync_gst_hailorecv.hpp" +#include "hailo_events/hailo_events.hpp" +#include "metadata/hailo_buffer_flag_meta.hpp" +#include "hailo/hailort_common.hpp" +#include "hailo/hailort_defaults.hpp" + +#include +#include + +GST_DEBUG_CATEGORY_STATIC(gst_sync_hailonet_debug_category); +#define GST_CAT_DEFAULT gst_sync_hailonet_debug_category + +constexpr std::chrono::milliseconds WAIT_FOR_FLUSH_TIMEOUT_MS(1000); + +static void gst_sync_hailonet_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec); +static void gst_sync_hailonet_get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec); +static gboolean gst_hailorecv_src_pad_event(GstPad *pad, GstObject *parent, GstEvent *event); +static GstPadProbeReturn gst_sync_hailonet_sink_probe(GstPad *pad, GstPadProbeInfo *info, gpointer user_data); +static GstStateChangeReturn gst_sync_hailonet_change_state(GstElement *element, GstStateChange transition); +static void gst_sync_hailonet_flush_callback(GstSyncHailoNet *hailonet, gpointer data); +static void gst_sync_hailonet_inner_queue_overrun_callback(GstElement *queue, gpointer udata); +static void gst_sync_hailonet_inner_queue_underrun_callback(GstElement *queue, gpointer udata); + +enum +{ + PROP_0, + PROP_DEBUG, + PROP_DEVICE_ID, + PROP_HEF_PATH, + PROP_NETWORK_NAME, + PROP_BATCH_SIZE, + PROP_OUTPUTS_MIN_POOL_SIZE, + PROP_OUTPUTS_MAX_POOL_SIZE, + PROP_IS_ACTIVE, + PROP_DEVICE_COUNT, + PROP_VDEVICE_KEY, + PROP_SCHEDULING_ALGORITHM, + PROP_SCHEDULER_TIMEOUT_MS, + PROP_SCHEDULER_THRESHOLD, + PROP_SCHEDULER_PRIORITY, + PROP_MULTI_PROCESS_SERVICE, + PROP_INPUT_FORMAT_TYPE, + PROP_OUTPUT_FORMAT_TYPE, + PROP_NMS_SCORE_THRESHOLD, + PROP_NMS_IOU_THRESHOLD, + PROP_NMS_MAX_PROPOSALS_PER_CLASS, +}; + +G_DEFINE_TYPE(GstSyncHailoNet, gst_sync_hailonet, GST_TYPE_BIN); + +static void gst_sync_hailonet_class_init(GstSyncHailoNetClass *klass) +{ + GObjectClass *gobject_class = G_OBJECT_CLASS(klass); + GstElementClass *element_class = GST_ELEMENT_CLASS(klass); + + GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY); + gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&src_template)); + + GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY); + gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&sink_template)); + + gst_element_class_set_static_metadata(element_class, + "sync hailonet element", "Hailo/Network", + "Configure and Activate Hailo Network. " + "Supports the \"flush\" signal which blocks until there are no buffers currently processesd in the element. " + "When deactivating a sync hailonet during runtime (via set_property of \"is-active\" to False), make sure that no frames are being pushed into the " + "hailonet, since this operation waits until there are no frames coming in.", + PLUGIN_AUTHOR); + + element_class->change_state = GST_DEBUG_FUNCPTR(gst_sync_hailonet_change_state); + + gobject_class->set_property = gst_sync_hailonet_set_property; + gobject_class->get_property = gst_sync_hailonet_get_property; + g_object_class_install_property(gobject_class, PROP_DEBUG, + g_param_spec_boolean("debug", "Debug flag", "Should print debug information", false, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_DEVICE_ID, + g_param_spec_string("device-id", "Device ID", "Device ID ([]::., same as in lspci command). Excludes device-count.", NULL, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_DEVICE_COUNT, + g_param_spec_uint("device-count", "Number of devices to use", "Number of physical devices to use. Excludes device-id.", HAILO_DEFAULT_DEVICE_COUNT, + std::numeric_limits::max(), HAILO_DEFAULT_DEVICE_COUNT, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_VDEVICE_KEY, + g_param_spec_uint("vdevice-key", + "Indicate whether to re-use or re-create vdevice", + "Relevant only when 'device-count' is passed. If not passed, the created vdevice will be unique to this hailonet." \ + "if multiple hailonets share 'vdevice-key' and 'device-count', the created vdevice will be shared between those hailonets", + MIN_VALID_VDEVICE_KEY, std::numeric_limits::max(), MIN_VALID_VDEVICE_KEY, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_HEF_PATH, + g_param_spec_string("hef-path", "HEF Path Location", "Location of the HEF file to read", NULL, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_NETWORK_NAME, + g_param_spec_string("net-name", "Network Name", + "Configure and run this specific network. " + "If not passed, configure and run the default network - ONLY if there is one network in the HEF!", NULL, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_BATCH_SIZE, + g_param_spec_uint("batch-size", "Inference Batch", "How many frame to send in one batch", MIN_GSTREAMER_BATCH_SIZE, MAX_GSTREAMER_BATCH_SIZE, HAILO_DEFAULT_BATCH_SIZE, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_OUTPUTS_MIN_POOL_SIZE, + g_param_spec_uint("outputs-min-pool-size", "Outputs Minimun Pool Size", "The minimum amount of buffers to allocate for each output layer", + 0, std::numeric_limits::max(), DEFAULT_OUTPUTS_MIN_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_OUTPUTS_MAX_POOL_SIZE, + g_param_spec_uint("outputs-max-pool-size", "Outputs Maximum Pool Size", + "The maximum amount of buffers to allocate for each output layer or 0 for unlimited", 0, std::numeric_limits::max(), + DEFAULT_OUTPUTS_MAX_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_IS_ACTIVE, + g_param_spec_boolean("is-active", "Is Network Activated", "Controls whether this element should be active. " + "By default, the hailonet element will not be active unless it is the only one. " + "Setting this property in combination with 'scheduling-algorithm' different than HAILO_SCHEDULING_ALGORITHM_NONE is not supported.", false, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + + g_object_class_install_property(gobject_class, PROP_SCHEDULING_ALGORITHM, + g_param_spec_enum("scheduling-algorithm", "Scheduling policy for automatic network group switching", "Controls the Model Scheduler algorithm of HailoRT. " + "Gets values from the enum GstHailoSchedulingAlgorithms. " + "Using Model Scheduler algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE, excludes the property 'is-active'. " + "When using the same VDevice across multiple hailonets, all should have the same 'scheduling-algorithm'. ", + GST_TYPE_SCHEDULING_ALGORITHM, HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_SCHEDULER_TIMEOUT_MS, + g_param_spec_uint("scheduler-timeout-ms", "Timeout for for scheduler in ms", "The maximum time period that may pass before getting run time from the scheduler," + " as long as at least one send request has been sent.", + HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, std::numeric_limits::max(), HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_SCHEDULER_THRESHOLD, + g_param_spec_uint("scheduler-threshold", "Frames threshold for scheduler", "The minimum number of send requests required before the hailonet is considered ready to get run time from the scheduler.", + HAILO_DEFAULT_SCHEDULER_THRESHOLD, std::numeric_limits::max(), HAILO_DEFAULT_SCHEDULER_THRESHOLD, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_SCHEDULER_PRIORITY, + g_param_spec_uint("scheduler-priority", "Priority index for scheduler", "When the scheduler will choose the next hailonet to run, higher priority will be prioritized in the selection. " + "Bigger number represent higher priority", + HAILO_SCHEDULER_PRIORITY_MIN, HAILO_SCHEDULER_PRIORITY_MAX, HAILO_SCHEDULER_PRIORITY_NORMAL, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_MULTI_PROCESS_SERVICE, + g_param_spec_boolean("multi-process-service", "Should run over HailoRT service", "Controls wether to run HailoRT over its service. " + "To use this property, the service should be active and scheduling-algorithm should be set. Defaults to false.", + HAILO_DEFAULT_MULTI_PROCESS_SERVICE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_INPUT_FORMAT_TYPE, + g_param_spec_enum("input-format-type", "Input format type", "Input format type(auto, float32, uint16, uint8). Default value is auto." + "Gets values from the enum GstHailoFormatType. ", + GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_OUTPUT_FORMAT_TYPE, + g_param_spec_enum("output-format-type", "Output format type", "Output format type(auto, float32, uint16, uint8). Default value is auto." + "Gets values from the enum GstHailoFormatType. ", + GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO, + (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_NMS_SCORE_THRESHOLD, + g_param_spec_float("nms-score-threshold", "NMS score threshold", "Threshold used for filtering out candidates. Any box with score::max(), 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + + // See information about the "flush" signal in the element description + g_signal_new( + "flush", + GST_TYPE_SYNC_HAILONET, + G_SIGNAL_ACTION, + 0, nullptr, nullptr, nullptr, G_TYPE_NONE, 0 + ); +} + +std::string create_name(std::string prefix, uint32_t id) +{ + return prefix + std::to_string(id); +} + +Expected> HailoSyncNetImpl::create(GstSyncHailoNet *element) +{ + if (nullptr == element) { + return make_unexpected(HAILO_INVALID_ARGUMENT); + } + + auto hailosend_name = create_name("hailosend", HailoSyncNetImpl::m_sync_hailonet_count); + GstElement *hailosend = gst_element_factory_make("hailosend", hailosend_name.c_str()); + if (nullptr == hailosend) { + GST_ELEMENT_ERROR(element, RESOURCE, FAILED, ("Failed creating hailosend element in bin!"), (NULL)); + return make_unexpected(HAILO_INTERNAL_FAILURE); + } + + g_object_set(hailosend, "qos", FALSE, NULL); + + auto hailoqueue_name = create_name("hailoqueue", HailoSyncNetImpl::m_sync_hailonet_count); + GstElement *queue = gst_element_factory_make("queue", hailoqueue_name.c_str()); + if (nullptr == queue) { + GST_ELEMENT_ERROR(element, RESOURCE, FAILED, ("Failed creating queue element in bin!"), (NULL)); + gst_object_unref(hailosend); + return make_unexpected(HAILO_INTERNAL_FAILURE); + } + + // Passing 0 disables the features here + g_object_set(queue, "max-size-time", (guint64)0, NULL); + g_object_set(queue, "max-size-bytes", (guint)0, NULL); + g_signal_connect(queue, "overrun", G_CALLBACK(gst_sync_hailonet_inner_queue_overrun_callback), nullptr); + g_signal_connect(queue, "underrun", G_CALLBACK(gst_sync_hailonet_inner_queue_underrun_callback), nullptr); + + auto hailorecv_name = create_name("hailorecv", HailoSyncNetImpl::m_sync_hailonet_count); + GstElement *hailorecv = gst_element_factory_make("hailorecv", hailorecv_name.c_str()); + if (nullptr == hailorecv) { + GST_ELEMENT_ERROR(element, RESOURCE, FAILED, ("Failed creating hailorecv element in bin!"), (NULL)); + gst_object_unref(hailosend); + gst_object_unref(queue); + return make_unexpected(HAILO_INTERNAL_FAILURE); + } + + g_object_set(hailorecv, "qos", FALSE, NULL); + + g_signal_connect(element, "flush", G_CALLBACK(gst_sync_hailonet_flush_callback), nullptr); + + auto was_flushed_event = Event::create_shared(Event::State::not_signalled); + GST_CHECK_EXPECTED(was_flushed_event, element, RESOURCE, "Failed allocating memory for event!"); + + auto ptr = make_unique_nothrow(element, hailosend, queue, hailorecv, was_flushed_event.release()); + if (nullptr == ptr) { + return make_unexpected(HAILO_OUT_OF_HOST_MEMORY); + } + + return ptr; +} + +std::atomic_uint32_t HailoSyncNetImpl::m_sync_hailonet_count(0); +std::mutex HailoSyncNetImpl::m_mutex; +HailoSyncNetImpl::HailoSyncNetImpl(GstSyncHailoNet *element, GstElement *hailosend, GstElement *queue, GstElement *hailorecv, EventPtr was_flushed_event) : + m_element(element), m_props(), m_output_formats(), m_hailosend(hailosend), m_queue(queue), m_hailorecv(hailorecv), + m_net_group_handle(nullptr), m_was_configured(false), m_has_called_activate(false), + m_was_flushed_event(was_flushed_event), m_pool(nullptr) +{ + GST_DEBUG_CATEGORY_INIT(gst_sync_hailonet_debug_category, "sync hailonet", 0, "debug category for sync hailonet element"); + + /* gst_bin_add_many cannot fail. I use this function because the elements are created here and does not come from the outside so, + * gst_bin_add will not fail */ + gst_bin_add_many(GST_BIN(m_element), m_hailosend, m_queue, m_hailorecv, NULL); + init_ghost_sink(); + init_ghost_src(); + + ++m_sync_hailonet_count; +} + +HailoSyncNetImpl::~HailoSyncNetImpl() +{ + if (nullptr != m_pool) { + (void)gst_buffer_pool_set_active(m_pool, FALSE); + } +} + +void HailoSyncNetImpl::init_ghost_sink() +{ + GstPad *pad = gst_element_get_static_pad(m_hailosend, "sink"); + + GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY); + GstPadTemplate *pad_tmpl = gst_static_pad_template_get(&sink_template); + + GstPad *ghost_pad = gst_ghost_pad_new_from_template("sink", pad, pad_tmpl); + gst_pad_set_active(ghost_pad, TRUE); + gst_element_add_pad(GST_ELEMENT(m_element), ghost_pad); + + gst_pad_add_probe(pad, GST_PAD_PROBE_TYPE_BUFFER, static_cast(gst_sync_hailonet_sink_probe), nullptr, nullptr); + + gst_object_unref(pad_tmpl); + gst_object_unref(pad); +} + +void HailoSyncNetImpl::init_ghost_src() +{ + GstPad *pad = gst_element_get_static_pad(m_hailorecv, "src"); + + GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY); + GstPadTemplate *pad_tmpl = gst_static_pad_template_get(&src_template); + + GstPad *ghost_pad = gst_ghost_pad_new_from_template("src", pad, pad_tmpl); + gst_pad_set_active(ghost_pad, TRUE); + gst_element_add_pad(GST_ELEMENT(m_element), ghost_pad); + + gst_pad_set_event_function(pad, gst_hailorecv_src_pad_event); + + gst_object_unref(pad_tmpl); + gst_object_unref(pad); +} + +void HailoSyncNetImpl::set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec) +{ + GST_DEBUG_OBJECT(m_element, "set_property"); + + if ((object == nullptr) || (value == nullptr) || (pspec == nullptr)) { + g_error("set_property got null parameter!"); + return; + } + + switch (property_id) { + case PROP_DEBUG: + { + gboolean debug = g_value_get_boolean(value); + g_object_set(m_hailosend, "debug", debug, NULL); + g_object_set(m_hailorecv, "debug", debug, NULL); + break; + } + case PROP_DEVICE_ID: + if (0 != m_props.m_device_count.get()) { + g_error("device-id and device-count excludes eachother. received device-id=%s, device-count=%d", + g_value_get_string(value), m_props.m_device_count.get()); + break; + } + if (m_was_configured) { + g_warning("The network was already configured so changing the device ID will not take place!"); + break; + } + if (nullptr != m_props.m_device_id.get()) { + g_free(m_props.m_device_id.get()); + } + m_props.m_device_id = g_strdup(g_value_get_string(value)); + break; + case PROP_DEVICE_COUNT: + if (nullptr != m_props.m_device_id.get()) { + g_error("device-id and device-count excludes eachother. received device-id=%s, device-count=%d", + m_props.m_device_id.get(), g_value_get_uint(value)); + break; + } + if (m_was_configured) { + g_warning("The network was already configured so changing the device count will not take place!"); + break; + } + m_props.m_device_count = static_cast(g_value_get_uint(value)); + break; + case PROP_VDEVICE_KEY: + if (m_was_configured) { + g_warning("The network was already configured so changing the vdevice key will not take place!"); + break; + } + m_props.m_vdevice_key = static_cast(g_value_get_uint(value)); + break; + case PROP_HEF_PATH: + if (m_was_configured) { + g_warning("The network was already configured so changing the HEF path will not take place!"); + break; + } + if (nullptr != m_props.m_hef_path.get()) { + g_free(m_props.m_hef_path.get()); + } + m_props.m_hef_path = g_strdup(g_value_get_string(value)); + break; + case PROP_NETWORK_NAME: + if (m_was_configured) { + g_warning("The network was already configured so changing the network name will not take place!"); + break; + } + if (nullptr != m_props.m_network_name.get()) { + g_free(m_props.m_network_name.get()); + } + m_props.m_network_name = g_strdup(g_value_get_string(value)); + break; + case PROP_BATCH_SIZE: + if (m_was_configured) { + g_warning("The network was already configured so changing the batch size will not take place!"); + break; + } + m_props.m_batch_size = static_cast(g_value_get_uint(value)); + break; + case PROP_OUTPUTS_MIN_POOL_SIZE: + if (m_was_configured) { + g_warning("The network was already configured so changing the outputs minimum pool size will not take place!"); + break; + } + g_object_set(m_hailorecv, "outputs-min-pool-size", g_value_get_uint(value), NULL); + break; + case PROP_OUTPUTS_MAX_POOL_SIZE: + if (m_was_configured) { + g_warning("The network was already configured so changing the outputs maximum pool size will not take place!"); + break; + } + g_object_set(m_hailorecv, "outputs-max-pool-size", g_value_get_uint(value), NULL); + break; + case PROP_IS_ACTIVE: + { + gboolean new_is_active = g_value_get_boolean(value); + + if (m_props.m_scheduling_algorithm.was_changed() && (HAILO_SCHEDULING_ALGORITHM_NONE != m_props.m_scheduling_algorithm.get())) { + g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported."); + break; + } + + if (m_has_called_activate) { + if (m_props.m_is_active.get() && !new_is_active) { + // Setting this to false before deactivating to signal hailosend and hailorecv to stop inferring + m_props.m_is_active = false; + hailo_status status = deactivate_network_group(); + if (HAILO_SUCCESS != status) { + g_error("Deactivating network group failed, status = %d", status); + return; + } + } else if (!m_props.m_is_active.get() && new_is_active) { + hailo_status status = m_net_group_handle->activate_network_group(); + if (HAILO_SUCCESS != status) { + g_error("Failed activating network group, status = %d", status); + break; + } + m_props.m_is_active = true; + } else { + g_warning("Trying to change is-active property state from %d to %d", m_props.m_is_active.get(), new_is_active); + break; + } + } else { + m_props.m_is_active = new_is_active; + } + break; + } + case PROP_SCHEDULING_ALGORITHM: + if (m_was_configured) { + g_warning("The network was already configured so changing the scheduling algorithm will not take place!"); + break; + } + if (m_props.m_is_active.was_changed() && (g_value_get_enum(value) != HAILO_SCHEDULING_ALGORITHM_NONE)) { + g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported."); + break; + } + m_props.m_scheduling_algorithm = static_cast(g_value_get_enum(value)); + break; + case PROP_SCHEDULER_TIMEOUT_MS: + if (m_was_configured) { + g_warning("The network was already configured so changing the scheduling timeout will not take place!"); + break; + } + if (m_props.m_is_active.was_changed()) { + g_error("scheduler usage (scheduler-timeout-ms) in combination with 'is-active' is not supported."); + break; + } + m_props.m_scheduler_timeout_ms = g_value_get_uint(value); + break; + case PROP_SCHEDULER_THRESHOLD: + if (m_was_configured) { + g_warning("The network was already configured so changing the scheduling threshold will not take place!"); + break; + } + if (m_props.m_is_active.was_changed()) { + g_error("scheduler usage (scheduler-threshold) in combination with 'is-active' is not supported."); + break; + } + m_props.m_scheduler_threshold = g_value_get_uint(value); + break; + case PROP_SCHEDULER_PRIORITY: + if (m_was_configured) { + g_warning("The network was already configured so changing the scheduling priority will not take place!"); + break; + } + if (m_props.m_is_active.was_changed()) { + g_error("scheduler usage (scheduler-priority) in combination with 'is-active' is not supported."); + break; + } + m_props.m_scheduler_priority = static_cast(g_value_get_uint(value)); + break; + case PROP_MULTI_PROCESS_SERVICE: + if (m_was_configured) { + g_warning("The network was already configured so changing the multi-process-service property will not take place!"); + break; + } + m_props.m_multi_process_service = g_value_get_boolean(value); + break; + case PROP_INPUT_FORMAT_TYPE: + if (m_was_configured) { + g_warning("The network was already configured so changing the format type will not take place!"); + break; + } + m_props.m_input_format_type = static_cast(g_value_get_enum(value)); + break; + case PROP_OUTPUT_FORMAT_TYPE: + if (m_was_configured) { + g_warning("The network was already configured so changing the format type will not take place!"); + break; + } + m_props.m_output_format_type = static_cast(g_value_get_enum(value)); + break; + case PROP_NMS_SCORE_THRESHOLD: + if (m_was_configured) { + g_warning("The network was already configured so changing the score threshold will not take place!"); + break; + } + m_props.m_nms_score_threshold = static_cast(g_value_get_float(value)); + break; + case PROP_NMS_IOU_THRESHOLD: + if (m_was_configured) { + g_warning("The network was already configured so changing the IoU threshold will not take place!"); + break; + } + m_props.m_nms_iou_threshold = static_cast(g_value_get_float(value)); + break; + case PROP_NMS_MAX_PROPOSALS_PER_CLASS: + if (m_was_configured) { + g_warning("The network was already configured so changing the max proposals per class will not take place!"); + break; + } + m_props.m_nms_max_proposals_per_class = static_cast(g_value_get_uint(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec); + break; + } +} + +void HailoSyncNetImpl::get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec) +{ + GST_DEBUG_OBJECT(m_element, "get_property"); + + if ((object == nullptr) || (value == nullptr) || (pspec == nullptr)) { + g_error("get_property got null parameter!"); + return; + } + + switch (property_id) { + case PROP_DEBUG: + { + gboolean debug; + g_object_get(m_hailosend, "debug", &debug, nullptr); + g_value_set_boolean(value, debug); + break; + } + case PROP_DEVICE_ID: + g_value_set_string(value, m_props.m_device_id.get()); + break; + case PROP_DEVICE_COUNT: + g_value_set_uint(value, m_props.m_device_count.get()); + break; + case PROP_VDEVICE_KEY: + g_value_set_uint(value, m_props.m_vdevice_key.get()); + break; + case PROP_HEF_PATH: + g_value_set_string(value, m_props.m_hef_path.get()); + break; + case PROP_NETWORK_NAME: + g_value_set_string(value, m_props.m_network_name.get()); + break; + case PROP_BATCH_SIZE: + g_value_set_uint(value, m_props.m_batch_size.get()); + break; + case PROP_OUTPUTS_MIN_POOL_SIZE: + { + guint outputs_min_pool_size; + g_object_get(m_hailorecv, "outputs-min-pool-size", &outputs_min_pool_size, nullptr); + g_value_set_uint(value, outputs_min_pool_size); + break; + } + case PROP_OUTPUTS_MAX_POOL_SIZE: + { + guint outputs_max_pool_size; + g_object_get(m_hailorecv, "outputs-max-pool-size", &outputs_max_pool_size, nullptr); + g_value_set_uint(value, outputs_max_pool_size); + break; + } + case PROP_IS_ACTIVE: + g_value_set_boolean(value, m_props.m_is_active.get()); + break; + case PROP_SCHEDULING_ALGORITHM: + g_value_set_enum(value, m_props.m_scheduling_algorithm.get()); + break; + case PROP_SCHEDULER_TIMEOUT_MS: + g_value_set_uint(value, m_props.m_scheduler_timeout_ms.get()); + break; + case PROP_SCHEDULER_THRESHOLD: + g_value_set_uint(value, m_props.m_scheduler_threshold.get()); + break; + case PROP_SCHEDULER_PRIORITY: + g_value_set_uint(value, m_props.m_scheduler_priority.get()); + break; + case PROP_MULTI_PROCESS_SERVICE: + g_value_set_boolean(value, m_props.m_multi_process_service.get()); + break; + case PROP_INPUT_FORMAT_TYPE: + g_value_set_enum(value, m_props.m_input_format_type.get()); + break; + case PROP_OUTPUT_FORMAT_TYPE: + g_value_set_enum(value, m_props.m_output_format_type.get()); + break; + case PROP_NMS_SCORE_THRESHOLD: + g_value_set_float(value, m_props.m_nms_score_threshold.get()); + break; + case PROP_NMS_IOU_THRESHOLD: + g_value_set_float(value, m_props.m_nms_iou_threshold.get()); + break; + case PROP_NMS_MAX_PROPOSALS_PER_CLASS: + g_value_set_uint(value, m_props.m_nms_max_proposals_per_class.get()); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec); + break; + } +} + +hailo_status HailoSyncNetImpl::set_hef() +{ + m_net_group_handle = make_unique_nothrow(GST_ELEMENT(m_element)); + GST_CHECK(nullptr != m_net_group_handle, HAILO_OUT_OF_HOST_MEMORY, m_element, RESOURCE, "Failed allocating memory for network handle!"); + + hailo_status status = m_net_group_handle->set_hef(m_props.m_device_id.get(), m_props.m_device_count.get(), + m_props.m_vdevice_key.get(), m_props.m_scheduling_algorithm.get(), static_cast(m_props.m_multi_process_service.get()), + m_props.m_hef_path.get()); + if (HAILO_SUCCESS != status) { + return status; + } + + if (m_props.m_multi_process_service.get()) { + GST_CHECK(m_props.m_scheduling_algorithm.get() != HAILO_SCHEDULING_ALGORITHM_NONE, + HAILO_INVALID_OPERATION, m_element, RESOURCE, "To use multi-process-service please set scheduling-algorithm."); + } + + if (nullptr == m_props.m_network_name.get()) { + // TODO: HRT-4957 + GST_CHECK(m_net_group_handle->hef()->get_network_groups_names().size() == 1, HAILO_INVALID_ARGUMENT, m_element, RESOURCE, + "Network group has to be specified when there are more than one network groups in the HEF!"); + auto network_group_name = m_net_group_handle->hef()->get_network_groups_names()[0]; + + auto networks_infos = m_net_group_handle->hef()->get_network_infos(network_group_name.c_str()); + GST_CHECK_EXPECTED_AS_STATUS(networks_infos, m_element, RESOURCE, "Getting network infos from network group name was failed, status %d", networks_infos.status()); + GST_CHECK(networks_infos.value().size() == 1, HAILO_INVALID_ARGUMENT, m_element, RESOURCE, + "Network has to be specified when there are more than one network in the network group!"); + + std::string default_ng_name = HailoRTDefaults::get_network_name(network_group_name); + m_props.m_network_name = g_strdup(default_ng_name.c_str()); + } + + auto input_vstream_infos = m_net_group_handle->hef()->get_input_vstream_infos(m_props.m_network_name.get()); + GST_CHECK_EXPECTED_AS_STATUS(input_vstream_infos, m_element, RESOURCE, "Getting input vstream infos from HEF has failed, status = %d", + input_vstream_infos.status()); + + // TODO: HRT-4095 + GST_CHECK(1 == input_vstream_infos->size(), HAILO_INVALID_OPERATION, m_element, RESOURCE, "sync hailonet element supports only HEFs with one input for now!"); + + auto input_vstream_info = input_vstream_infos.value()[0]; + GST_HAILOSEND(m_hailosend)->impl->set_input_vstream_infos(input_vstream_infos.release()); + GST_HAILOSEND(m_hailosend)->impl->set_batch_size(m_props.m_batch_size.get()); + + GstBufferPool *pool = gst_buffer_pool_new(); + GstStructure *config = gst_buffer_pool_get_config(pool); + + auto frame_size = HailoRTCommon::get_frame_size(input_vstream_info, input_vstream_info.format); + gst_buffer_pool_config_set_params(config, nullptr, frame_size, 1, 1); + + gboolean result = gst_buffer_pool_set_config(pool, config); + GST_CHECK(result, HAILO_INTERNAL_FAILURE, m_element, RESOURCE, "Could not set config buffer pool"); + + result = gst_buffer_pool_set_active(pool, TRUE); + GST_CHECK(result, HAILO_INTERNAL_FAILURE, m_element, RESOURCE, "Could not set buffer pool active"); + + m_pool = pool; + + return HAILO_SUCCESS; +} + +hailo_status HailoSyncNetImpl::configure_network_group() +{ + std::unique_lock lock(m_mutex); + g_object_set(m_queue, "max-size-buffers", MAX_BUFFER_COUNT(m_props.m_batch_size.get()), NULL); + + auto network_group_name = get_network_group_name(m_props.m_network_name.get()); + GST_CHECK_EXPECTED_AS_STATUS(network_group_name, m_element, RESOURCE, "Could not get network group name from name %s, status = %d", + m_props.m_network_name.get(), network_group_name.status()); + + hailo_status status = m_net_group_handle->configure_network_group(network_group_name->c_str(), m_props.m_scheduling_algorithm.get(), m_props.m_batch_size.get()); + if (HAILO_SUCCESS != status) { + return status; + } + m_was_configured = true; + + if (m_props.m_scheduler_timeout_ms.was_changed()) { + status = m_net_group_handle->set_scheduler_timeout(m_props.m_network_name.get(), m_props.m_scheduler_timeout_ms.get()); + GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler timeout failed, status = %d", status); + } + if (m_props.m_scheduler_threshold.was_changed()) { + status = m_net_group_handle->set_scheduler_threshold(m_props.m_network_name.get(), m_props.m_scheduler_threshold.get()); + GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler threshold failed, status = %d", status); + } + if (m_props.m_scheduler_priority.was_changed()) { + status = m_net_group_handle->set_scheduler_priority(m_props.m_network_name.get(), m_props.m_scheduler_priority.get()); + GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler priority failed, status = %d", status); + } + + auto vstreams = m_net_group_handle->create_vstreams(m_props.m_network_name.get(), m_props.m_scheduling_algorithm.get(), m_output_formats, + m_props.m_input_format_type.get(), m_props.m_output_format_type.get()); + GST_CHECK_EXPECTED_AS_STATUS(vstreams, m_element, RESOURCE, "Creating vstreams failed, status = %d", status); + + GST_HAILOSEND(m_hailosend)->impl->set_input_vstreams(std::move(vstreams->first)); + + // Check that if one of the NMS params are changed, we have NMS outputs in the model + auto has_nms_output = std::any_of(vstreams->second.begin(), vstreams->second.end(), [](const auto &vs) + { + return HailoRTCommon::is_nms(vs.get_info()); + }); + + for (auto &out_vs : vstreams->second) { + if (m_props.m_nms_score_threshold.was_changed()) { + GST_CHECK(has_nms_output, HAILO_INVALID_OPERATION, m_element, RESOURCE, "NMS score threshold is set, but there is no NMS output in this model."); + if (HailoRTCommon::is_nms(out_vs.get_info())) { + status = out_vs.set_nms_score_threshold(m_props.m_nms_score_threshold.get()); + GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting NMS score threshold failed, status = %d", status); + } + } + if (m_props.m_nms_iou_threshold.was_changed()) { + GST_CHECK(has_nms_output, HAILO_INVALID_OPERATION, m_element, RESOURCE, "NMS IoU threshold is set, but there is no NMS output in this model."); + if (HailoRTCommon::is_nms(out_vs.get_info())) { + status = out_vs.set_nms_iou_threshold(m_props.m_nms_iou_threshold.get()); + GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting NMS IoU threshold failed, status = %d", status); + } + } + if (m_props.m_nms_max_proposals_per_class.was_changed()) { + GST_CHECK(has_nms_output, HAILO_INVALID_OPERATION, m_element, RESOURCE, "NMS max proposals per class is set, but there is no NMS output in this model."); + if (HailoRTCommon::is_nms(out_vs.get_info())) { + status = out_vs.set_nms_max_proposals_per_class(m_props.m_nms_max_proposals_per_class.get()); + GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting NMS max proposals per class failed, status = %d", status); + } + } + } + + status = GST_HAILORECV(m_hailorecv)->impl->set_output_vstreams(std::move(vstreams->second), m_props.m_batch_size.get()); + GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting output vstreams failed, status = %d", status); + + return HAILO_SUCCESS; +} + +hailo_status HailoSyncNetImpl::activate_hailonet() +{ + if (HAILO_SCHEDULING_ALGORITHM_NONE != m_props.m_scheduling_algorithm.get()) { + m_props.m_is_active = true; + return HAILO_SUCCESS; + } + + if ((1 == m_sync_hailonet_count) && (!m_props.m_is_active.was_changed())) { + m_props.m_is_active = true; + } + + if (m_props.m_is_active.get()) { + std::unique_lock lock(m_mutex); + hailo_status status = m_net_group_handle->activate_network_group(); + if (HAILO_SUCCESS != status) { + return status; + } + } + + m_has_called_activate = true; + + return HAILO_SUCCESS; +} + +Expected HailoSyncNetImpl::get_network_group_name(const std::string &network_name) +{ + for (const auto &network_group_name : m_net_group_handle->hef()->get_network_groups_names()) { + // Look for network_group with the given name + if ((network_name == network_group_name) || (network_name == HailoRTDefaults::get_network_name(network_group_name))) { + return std::string(network_group_name); + } + + auto network_infos = m_net_group_handle->hef()->get_network_infos(network_group_name); + GST_CHECK_EXPECTED(network_infos, m_element, RESOURCE, "Could not get network infos of group %s, status = %d", network_group_name.c_str(), + network_infos.status()); + + // Look for network with the given name + for (const auto &network_info : network_infos.value()) { + if (network_name == network_info.name) { + return std::string(network_group_name); + } + } + } + + GST_ELEMENT_ERROR(m_element, RESOURCE, FAILED, ("Failed to get network group name from the name %s!", network_name.c_str()), (NULL)); + return make_unexpected(HAILO_NOT_FOUND); +} + +hailo_status HailoSyncNetImpl::link_elements() +{ + /* Link elements here because only here we have the HEF and the Caps format */ + if (!gst_element_link_many(m_hailosend, m_queue, m_hailorecv, NULL)) { + GST_ELEMENT_ERROR(m_element, RESOURCE, FAILED, ("Could not add link elements in bin!"), (NULL)); + return HAILO_INTERNAL_FAILURE; + } + + return HAILO_SUCCESS; +} + +hailo_status HailoSyncNetImpl::abort_streams() +{ + if (!m_props.m_is_active.get()) { + return HAILO_SUCCESS; + } + + auto status = GST_HAILOSEND(m_hailosend)->impl->abort_vstreams(); + GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed aborting input VStreams of hailosend, status = %d", status); + status = GST_HAILORECV(m_hailorecv)->impl->abort_vstreams(); + GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed aborting output VStreams of hailorecv, status = %d", status); + return HAILO_SUCCESS; +} + +hailo_status HailoSyncNetImpl::deactivate_network_group() +{ + auto was_deactivated = m_net_group_handle->remove_network_group(); + GST_CHECK_EXPECTED_AS_STATUS(was_deactivated, m_element, RESOURCE, "Failed removing network, status = %d", was_deactivated.status()); + + if (was_deactivated.value()) { + return clear_vstreams(); + } + return HAILO_SUCCESS; +} + +hailo_status HailoSyncNetImpl::clear_vstreams() +{ + if (nullptr != GST_HAILOSEND(m_hailosend)->impl) { + hailo_status status = GST_HAILOSEND(m_hailosend)->impl->clear_vstreams(); + GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed clearing input VStreams of hailosend, status = %d", status); + } + + if (nullptr != GST_HAILORECV(m_hailorecv)->impl) { + hailo_status status = GST_HAILORECV(m_hailorecv)->impl->clear_vstreams(); + GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed clearing output VStreams of hailorecv, status = %d", status); + } + + return HAILO_SUCCESS; +} + +gboolean HailoSyncNetImpl::src_pad_event(GstEvent *event) +{ + assert(nullptr != event); + + auto parsed_event = HailoSetOutputFormatEvent::parse(event); + if (HAILO_SUCCESS != parsed_event.status()) { + return FALSE; + } + + m_output_formats = std::move(parsed_event->formats); + return TRUE; +} + +GstPadProbeReturn HailoSyncNetImpl::sink_probe() +{ + hailo_status status = activate_hailonet(); + GST_CHECK(HAILO_SUCCESS == status, GST_PAD_PROBE_REMOVE, m_element, RESOURCE, "Failed activating network, status = %d", status); + return GST_PAD_PROBE_REMOVE; +} + +gboolean HailoSyncNetImpl::is_active() +{ + return m_props.m_is_active.get(); +} + +hailo_status HailoSyncNetImpl::flush() +{ + GstBuffer *buffer = nullptr; + GstFlowReturn flow_result = gst_buffer_pool_acquire_buffer(m_pool, &buffer, nullptr); + GST_CHECK(GST_FLOW_OK == flow_result, HAILO_INTERNAL_FAILURE, m_element, RESOURCE, "Acquire buffer failed!"); + + GstHailoBufferFlagMeta *buffer_meta = GST_HAILO_BUFFER_FLAG_META_ADD(buffer); + buffer_meta->flag = BUFFER_FLAG_FLUSH; + GST_BUFFER_TIMESTAMP(buffer) = GST_HAILOSEND(m_hailosend)->impl->last_frame_pts(); + + GstPad *pad = gst_element_get_static_pad(m_hailosend, "src"); + flow_result = gst_pad_push(pad, buffer); + GST_CHECK(GST_FLOW_OK == flow_result, HAILO_INTERNAL_FAILURE, m_element, RESOURCE, "Pushing buffer to queue has failed!"); + + hailo_status status = m_was_flushed_event->wait(WAIT_FOR_FLUSH_TIMEOUT_MS); + GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed waiting for flushed event, status = %d", status); + + status = m_was_flushed_event->reset(); + GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed resetting flushed event, status = %d", status); + + return HAILO_SUCCESS; +} + +hailo_status HailoSyncNetImpl::signal_was_flushed_event() +{ + return m_was_flushed_event->signal(); +} + +static void gst_sync_hailonet_init(GstSyncHailoNet *self) +{ + if (!do_versions_match(GST_ELEMENT(self))) { + return; + } + + auto sync_hailonet_impl = HailoSyncNetImpl::create(self); + if (!sync_hailonet_impl) { + GST_ELEMENT_ERROR(self, RESOURCE, FAILED, ("Creating sync hailonet implementation has failed! status = %d", sync_hailonet_impl.status()), (NULL)); + return; + } + + self->impl = sync_hailonet_impl.release(); +} + +static void gst_sync_hailonet_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec) +{ + GST_SYNC_HAILONET(object)->impl->set_property(object, property_id, value, pspec); +} + +static void gst_sync_hailonet_get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec) +{ + GST_SYNC_HAILONET(object)->impl->get_property(object, property_id, value, pspec); +} + +static gboolean gst_hailorecv_src_pad_event(GstPad */*pad*/, GstObject *parent, GstEvent *event) +{ + gboolean result = GST_SYNC_HAILONET(GST_ELEMENT_PARENT(parent))->impl->src_pad_event(event); + if (result) { + return TRUE; + } + + GstBaseTransform *trans = GST_BASE_TRANSFORM_CAST(parent); + return GST_BASE_TRANSFORM_GET_CLASS(trans)->src_event(trans, event); +} + +static GstPadProbeReturn gst_sync_hailonet_sink_probe(GstPad *pad, GstPadProbeInfo */*info*/, gpointer /*user_data*/) +{ + return GST_SYNC_HAILONET(GST_ELEMENT_PARENT(gst_pad_get_parent(pad)))->impl->sink_probe(); +} + +static GstStateChangeReturn gst_sync_hailonet_change_state(GstElement *element, GstStateChange transition) +{ + GstStateChangeReturn ret = GST_ELEMENT_CLASS(gst_sync_hailonet_parent_class)->change_state(element, transition); + if (GST_STATE_CHANGE_FAILURE == ret) { + return ret; + } + + auto &sync_hailonet = GST_SYNC_HAILONET(element)->impl; + switch (transition) { + case GST_STATE_CHANGE_NULL_TO_READY: + { + hailo_status status = sync_hailonet->link_elements(); + GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Linking elements has failed, status = %d\n", status); + break; + } + case GST_STATE_CHANGE_READY_TO_PAUSED: + { + hailo_status status = sync_hailonet->configure_network_group(); + GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Configuring network group failed, status = %d\n", status); + break; + } + case GST_STATE_CHANGE_PLAYING_TO_PAUSED: + { + hailo_status status = sync_hailonet->abort_streams(); + GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Aborting streams has failed, status = %d\n", status); + break; + } + case GST_STATE_CHANGE_READY_TO_NULL: + { + if (HAILO_SCHEDULING_ALGORITHM_NONE == sync_hailonet->get_props().m_scheduling_algorithm.get()) { + auto status = sync_hailonet->deactivate_network_group(); + GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Deactivating network group failed, status = %d\n", status); + } + + // Cleanup all of hailonet memory + sync_hailonet.reset(); + break; + } + default: + break; + } + + return ret; +} + +static void gst_sync_hailonet_flush_callback(GstSyncHailoNet *sync_hailonet, gpointer /*data*/) +{ + (void)sync_hailonet->impl->flush(); +} + +static void gst_sync_hailonet_inner_queue_overrun_callback(GstElement *queue, gpointer /*udata*/) +{ + if (GST_SYNC_HAILONET(GST_ELEMENT_PARENT(queue))->impl->is_active()) { + GST_INFO("Inner queue of %s is overrun!", GST_ELEMENT_NAME(GST_ELEMENT_PARENT(queue))); + } +} + +static void gst_sync_hailonet_inner_queue_underrun_callback(GstElement *queue, gpointer /*udata*/) +{ + if (GST_SYNC_HAILONET(GST_ELEMENT_PARENT(queue))->impl->is_active()) { + GST_INFO("Inner queue of %s is underrun!", GST_ELEMENT_NAME(GST_ELEMENT_PARENT(queue))); + } +} \ No newline at end of file diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.hpp new file mode 100644 index 00000000..00b26125 --- /dev/null +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.hpp @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2021-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ +#ifndef _GST_SYNC_HAILONET_HPP_ +#define _GST_SYNC_HAILONET_HPP_ + +#include "common.hpp" +#include "network_group_handle.hpp" +#include "hailo/expected.hpp" +#include "hailo/event.hpp" + +#include +#include + +G_BEGIN_DECLS + +#define GST_TYPE_SYNC_HAILONET (gst_sync_hailonet_get_type()) +#define GST_SYNC_HAILONET(obj) (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_SYNC_HAILONET,GstSyncHailoNet)) +#define GST_SYNC_HAILONET_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_SYNC_HAILONET,GstSyncHailoNetClass)) +#define GST_IS_SYNC_HAILONET(obj) (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_SYNC_HAILONET)) +#define GST_IS_SYNC_HAILONET_CLASS(obj) (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_SYNC_HAILONET)) + +class HailoSyncNetImpl; +struct GstSyncHailoNet +{ + GstBin parent; + std::unique_ptr impl; +}; + +struct GstSyncHailoNetClass +{ + GstBinClass parent; +}; + +struct SyncHailoNetProperties final +{ +public: + SyncHailoNetProperties() : m_device_id(nullptr), m_hef_path(nullptr), m_network_name(nullptr), m_batch_size(HAILO_DEFAULT_BATCH_SIZE), + m_is_active(false), m_device_count(0), m_vdevice_key(DEFAULT_VDEVICE_KEY), m_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN), + m_scheduler_timeout_ms(HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS), m_scheduler_threshold(HAILO_DEFAULT_SCHEDULER_THRESHOLD), m_scheduler_priority(HAILO_SCHEDULER_PRIORITY_NORMAL), + m_multi_process_service(HAILO_DEFAULT_MULTI_PROCESS_SERVICE), m_input_format_type(HAILO_FORMAT_TYPE_AUTO), + m_output_format_type(HAILO_FORMAT_TYPE_AUTO), m_nms_score_threshold(0), m_nms_iou_threshold(0), m_nms_max_proposals_per_class(0) + + {} + + HailoElemProperty m_device_id; + HailoElemProperty m_hef_path; + HailoElemProperty m_network_name; // This property can be network group name or a network name + HailoElemProperty m_batch_size; + HailoElemProperty m_is_active; + HailoElemProperty m_device_count; + HailoElemProperty m_vdevice_key; + HailoElemProperty m_scheduling_algorithm; + HailoElemProperty m_scheduler_timeout_ms; + HailoElemProperty m_scheduler_threshold; + HailoElemProperty m_scheduler_priority; + HailoElemProperty m_multi_process_service; + HailoElemProperty m_input_format_type; + HailoElemProperty m_output_format_type; + HailoElemProperty m_nms_score_threshold; + HailoElemProperty m_nms_iou_threshold; + HailoElemProperty m_nms_max_proposals_per_class; +}; + +class HailoSyncNetImpl final +{ +public: + static Expected> create(GstSyncHailoNet *element); + HailoSyncNetImpl(GstSyncHailoNet *element, GstElement *hailosend, GstElement *queue, GstElement *hailorecv, EventPtr was_flushed_event); + ~HailoSyncNetImpl(); + + void set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec); + void get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec); + hailo_status set_hef(); + hailo_status link_elements(); + hailo_status configure_network_group(); + hailo_status activate_hailonet(); + hailo_status abort_streams(); + + gboolean src_pad_event(GstEvent *event); + GstPadProbeReturn sink_probe(); + gboolean is_active(); + hailo_status flush(); + hailo_status signal_was_flushed_event(); + + hailo_status deactivate_network_group(); + SyncHailoNetProperties &get_props() { + return m_props; + } + +private: + void init_ghost_sink(); + void init_ghost_src(); + Expected get_network_group_name(const std::string &network_name); + + hailo_status clear_vstreams(); + + static std::atomic_uint32_t m_sync_hailonet_count; + static std::mutex m_mutex; + GstSyncHailoNet *m_element; + SyncHailoNetProperties m_props; + std::vector m_output_formats; + GstElement *m_hailosend; + GstElement *m_queue; + GstElement *m_hailorecv; + std::unique_ptr m_net_group_handle; + bool m_was_configured; + bool m_has_called_activate; + EventPtr m_was_flushed_event; + GstBufferPool *m_pool; +}; + +GType gst_sync_hailonet_get_type(void); + +G_END_DECLS + +#endif /* _GST_SYNC_HAILONET_HPP_ */ diff --git a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py index 2c3e5475..f0d8ea98 100644 --- a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py +++ b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py @@ -127,10 +127,8 @@ def _raise_indicative_status_exception(self, libhailort_exception): raise HailoRTInvalidFrameException("An invalid frame was received") from libhailort_exception if string_error_code == "HAILO_TIMEOUT": raise HailoRTTimeout("Received a timeout - hailort has failed because a timeout had occurred") from libhailort_exception - if string_error_code == "HAILO_STREAM_ABORTED_BY_HW": - raise HailoRTStreamAborted("Stream aborted due to an external event") from libhailort_exception - if string_error_code == "HAILO_STREAM_ABORTED_BY_USER": - raise HailoRTStreamAbortedByUser("Stream was aborted by user") from libhailort_exception + if string_error_code == "HAILO_STREAM_ABORT": + raise HailoRTStreamAborted("Stream was aborted") from libhailort_exception if string_error_code == "HAILO_INVALID_OPERATION": raise HailoRTInvalidOperationException("Invalid operation. See hailort.log for more information") from libhailort_exception @@ -720,11 +718,10 @@ def get_vstream_names_from_stream_name(self, stream_name): return self._configured_network.get_vstream_names_from_stream_name(stream_name) def set_scheduler_timeout(self, timeout_ms, network_name=None): - """Sets the maximum time period that may pass before getting run time from the scheduler, - even without reaching the minimum required send requests (e.g. threshold - see set_scheduler_threshold()), - as long as at least one send request has been sent. - This time period is measured since the last time the scheduler gave this network group run time. - + """Sets the maximum time period that may pass before receiving run time from the scheduler. + This will occur providing at least one send request has been sent, there is no minimum requirement for send + requests, (e.g. threshold - see set_scheduler_threshold()). + Args: timeout_ms (int): Timeout in milliseconds. """ @@ -1058,10 +1055,25 @@ def set_nms_max_proposals_per_class(self, max_proposals_per_class): max_proposals_per_class (int): NMS max proposals per class to set. Note: + This function must be called before starting inference! This function will fail in cases where there is no output with NMS operations on the CPU. """ return self._infer_pipeline.set_nms_max_proposals_per_class(max_proposals_per_class) + def set_nms_max_accumulated_mask_size(self, max_accumulated_mask_size): + """Set maximum accumulated mask size for all the detections in a frame. + Used in order to change the output buffer frame size, + in cases where the output buffer is too small for all the segmentation detections. + + Args: + max_accumulated_mask_size (int): NMS max accumulated mask size. + + Note: + This function must be called before starting inference! + This function will fail in cases where there is no output with NMS operations on the CPU. + """ + return self._infer_pipeline.set_nms_max_accumulated_mask_size(max_accumulated_mask_size) + def __exit__(self, *args): self._infer_pipeline.release() return False @@ -1487,8 +1499,8 @@ class HailoFormatFlags(_pyhailort.FormatFlags): SUPPORTED_PROTOCOL_VERSION = 2 SUPPORTED_FW_MAJOR = 4 -SUPPORTED_FW_MINOR = 16 -SUPPORTED_FW_REVISION = 2 +SUPPORTED_FW_MINOR = 17 +SUPPORTED_FW_REVISION = 0 MEGA_MULTIPLIER = 1000.0 * 1000.0 @@ -3120,6 +3132,20 @@ def set_nms_max_proposals_per_class(self, max_proposals_per_class): """ return self._recv_object.set_nms_max_proposals_per_class(max_proposals_per_class) + def set_nms_max_accumulated_mask_size(self, max_accumulated_mask_size): + """Set maximum accumulated mask size for all the detections in a frame. + Used in order to change the output buffer frame size, + in cases where the output buffer is too small for all the segmentation detections. + + Args: + max_accumulated_mask_size (int): NMS max accumulated mask size. + + Note: + This function must be called before starting inference! + This function will fail in cases where there is no output with NMS operations on the CPU. + """ + return self._recv_object.set_nms_max_accumulated_mask_size(max_accumulated_mask_size) + class OutputVStreams(object): """Output virtual streams pipelines that allows to receive data, to be used as a context manager.""" diff --git a/hailort/libhailort/bindings/python/platform/hailo_platform/tools/hailocli/main.py b/hailort/libhailort/bindings/python/platform/hailo_platform/tools/hailocli/main.py index b6620c6e..a3590887 100644 --- a/hailort/libhailort/bindings/python/platform/hailo_platform/tools/hailocli/main.py +++ b/hailort/libhailort/bindings/python/platform/hailo_platform/tools/hailocli/main.py @@ -23,7 +23,7 @@ class PlatformCommands: 'fw-update': ('Firmware update tool', FWUpdaterCLI), 'ssb-update': ('Second stage boot update tool', SSBUpdaterCLI), 'fw-config': ('Firmware configuration tool', FWConfigCommandCLI), - 'udp-rate-limiter': ('Limit UDP rate', UDPRateLimiterCLI), + 'udp-rate-limiter': ('Limit the UDP rate', UDPRateLimiterCLI), 'fw-control': ('Useful firmware control operations', ControlCommandCLI), 'fw-logger': ('Download fw logs to a file', LoggerCommandCLI), 'scan': ('Scans for devices (Ethernet or PCIE)', ScanCommandCLI), diff --git a/hailort/libhailort/bindings/python/platform/setup.py b/hailort/libhailort/bindings/python/platform/setup.py index f6b5f5ad..5ba4ce3c 100644 --- a/hailort/libhailort/bindings/python/platform/setup.py +++ b/hailort/libhailort/bindings/python/platform/setup.py @@ -69,6 +69,6 @@ def _get_package_paths(): "linux_aarch64", ], url="https://hailo.ai/", - version="4.16.2", + version="4.17.0", zip_safe=False, ) diff --git a/hailort/libhailort/bindings/python/src/CMakeLists.txt b/hailort/libhailort/bindings/python/src/CMakeLists.txt index 91c03fb5..d22a0987 100644 --- a/hailort/libhailort/bindings/python/src/CMakeLists.txt +++ b/hailort/libhailort/bindings/python/src/CMakeLists.txt @@ -49,7 +49,7 @@ set_target_properties(_pyhailort PROPERTIES # VISIBILITY_INLINES_HIDDEN YES ) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) target_link_libraries(_pyhailort PRIVATE HailoRT::libhailort) if(WIN32) diff --git a/hailort/libhailort/bindings/python/src/bindings_common.hpp b/hailort/libhailort/bindings/python/src/bindings_common.hpp index d135e52c..2367af4e 100644 --- a/hailort/libhailort/bindings/python/src/bindings_common.hpp +++ b/hailort/libhailort/bindings/python/src/bindings_common.hpp @@ -48,7 +48,7 @@ class HailoRTBindingsCommon case HAILO_FORMAT_ORDER_HAILO_NMS: return { HailoRTCommon::get_nms_host_shape_size(vstream_info.nms_shape) }; case HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK: { - return { HailoRTCommon::get_nms_with_byte_mask_host_shape_size(vstream_info.nms_shape, user_format) }; + return {HailoRTCommon::get_nms_host_frame_size(vstream_info.nms_shape, user_format) / HailoRTCommon::get_format_data_bytes(user_format)}; } case HAILO_FORMAT_ORDER_NC: return {shape.features}; diff --git a/hailort/libhailort/bindings/python/src/internal/CMakeLists.txt b/hailort/libhailort/bindings/python/src/internal/CMakeLists.txt deleted file mode 100644 index 0e85942c..00000000 --- a/hailort/libhailort/bindings/python/src/internal/CMakeLists.txt +++ /dev/null @@ -1,44 +0,0 @@ -cmake_minimum_required(VERSION 3.15.0) - -include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/spdlog.cmake) -include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/readerwriterqueue.cmake) - -pybind11_add_module(_pyhailort_internal SHARED - pyhailort_internal.cpp - control_api.cpp - $ -) - -add_dependencies(_pyhailort_internal libhailort) - -set_target_properties(_pyhailort_internal PROPERTIES - CXX_STANDARD 14 - CXX_STANDARD_REQUIRED YES -) - -target_include_directories(_pyhailort_internal - PRIVATE - $ - $ - $ - $ - $ - $ -) - -target_link_libraries(_pyhailort_internal PRIVATE - hef_proto - spdlog::spdlog - readerwriterqueue - profiler_proto - scheduler_mon_proto) -if(HAILO_BUILD_SERVICE) - target_link_libraries(_pyhailort_internal PRIVATE grpc++_unsecure hailort_rpc_grpc_proto) -endif() - -if(WIN32) - target_link_libraries(_pyhailort_internal PRIVATE Ws2_32 Iphlpapi Shlwapi) -endif() - -target_compile_options(_pyhailort_internal PRIVATE ${HAILORT_COMPILE_OPTIONS}) -exclude_archive_libs_symbols(_pyhailort_internal) diff --git a/hailort/libhailort/bindings/python/src/internal/control_api.cpp b/hailort/libhailort/bindings/python/src/internal/control_api.cpp deleted file mode 100644 index 07d790c7..00000000 --- a/hailort/libhailort/bindings/python/src/internal/control_api.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file control_api.cpp - * @brief Defines binding to control functions - * - **/ - -#include "control_api.hpp" -#include "utils.hpp" -#include "hailo/device.hpp" -#include "common/utils.hpp" - -namespace hailort -{ - -void ControlWrapper::set_clock_freq(DeviceWrapper &device, uint32_t clock_freq) -{ - auto status = Control::set_clock_freq(*device, clock_freq); - VALIDATE_STATUS(status); -} - -void ControlWrapper::close_all_streams(DeviceWrapper &device) -{ - auto status = Control::close_all_streams(*device); - VALIDATE_STATUS(status); -} - -void ControlWrapper::config_ahb_to_axi(DeviceWrapper &device, bool use_64bit_data_only) -{ - CONTROL_PROTOCOL__config_core_top_type_t config_type = CONTROL_PROTOCOL__CONFIG_CORE_TOP_TYPE_AHB_TO_AXI; - CONTROL_PROTOCOL__config_core_top_params_t params = {0}; - params.ahb_to_axi.enable_use_64bit_data_only = use_64bit_data_only; - - auto status = Control::config_core_top(*device, config_type, ¶ms); - VALIDATE_STATUS(status); -} - -void ControlWrapper::phy_operation(DeviceWrapper &device, CONTROL_PROTOCOL__phy_operation_t operation_type) -{ - auto status = Control::phy_operation(*device, operation_type); - VALIDATE_STATUS(status); -} - -uint32_t ControlWrapper::latency_measurement_read(DeviceWrapper &device) -{ - uint32_t inbound_to_outbound_latency_nsec = 0; - - auto status = Control::latency_measurement_read(*device, &inbound_to_outbound_latency_nsec); - VALIDATE_STATUS(status); - - return inbound_to_outbound_latency_nsec; -} - -void ControlWrapper::latency_measurement_config(DeviceWrapper &device, uint8_t latency_measurement_en, - uint32_t inbound_start_buffer_number, uint32_t outbound_stop_buffer_number, uint32_t inbound_stream_index, - uint32_t outbound_stream_index) -{ - auto status = Control::latency_measurement_config(*device, latency_measurement_en, inbound_start_buffer_number, - outbound_stop_buffer_number, inbound_stream_index, outbound_stream_index); - VALIDATE_STATUS(status); -} - -void ControlWrapper::start_firmware_update(DeviceWrapper &device) -{ - auto status = Control::start_firmware_update(*device); - VALIDATE_STATUS(status); -} - -void ControlWrapper::finish_firmware_update(DeviceWrapper &device) -{ - auto status = Control::finish_firmware_update(*device); - VALIDATE_STATUS(status); -} - -void ControlWrapper::write_firmware_update(DeviceWrapper &device, uint32_t offset, py::bytes data, uint32_t length) -{ - auto status = Control::write_firmware_update(*device, offset, (uint8_t*)std::string(data).c_str(), length); - VALIDATE_STATUS(status); -} - -void ControlWrapper::validate_firmware_update(DeviceWrapper &device, py::bytes md5_raw_data, uint32_t firmware_size) -{ - MD5_SUM_t expected_md5 = {0}; - memcpy(&expected_md5, (uint8_t*)std::string(md5_raw_data).c_str(), sizeof(expected_md5)); - - auto status = Control::validate_firmware_update(*device, &expected_md5, firmware_size); - VALIDATE_STATUS(status); -} - -py::bytes ControlWrapper::sensor_get_config(DeviceWrapper &device, uint32_t section_index, uint32_t offset, uint32_t data_length) -{ - std::unique_ptr response = make_unique_nothrow(data_length, '\x00'); - VALIDATE_NOT_NULL(response, HAILO_OUT_OF_HOST_MEMORY); - - auto status = Control::sensor_get_config(*device, section_index, offset, data_length, (uint8_t*)(response->data())); - VALIDATE_STATUS(status); - - return *response; -} - -void ControlWrapper::idle_time_set_measurement(DeviceWrapper &device, bool measurement_enable) -{ - auto status = Control::idle_time_set_measurement(*device, measurement_enable); - VALIDATE_STATUS(status); -} - -uint64_t ControlWrapper::idle_time_get_measurement(DeviceWrapper &device) -{ - uint64_t measurement = 0; - - auto status = Control::idle_time_get_measurement(*device, &measurement); - VALIDATE_STATUS(status); - - return measurement; -} - -void ControlWrapper::d2h_notification_manager_set_host_info(DeviceWrapper &device, uint16_t host_port, uint32_t host_ip_address) -{ - auto status = Control::d2h_notification_manager_set_host_info(*device, host_port, host_ip_address); - VALIDATE_STATUS(status); -} - -void ControlWrapper::d2h_notification_manager_send_host_info_notification(DeviceWrapper &device, uint8_t notification_priority) -{ - auto status = Control::d2h_notification_manager_send_host_info_notification(*device, notification_priority); - VALIDATE_STATUS(status); -} - -/* Context switch */ -void ControlWrapper::set_context_switch_breakpoint(DeviceWrapper &device, - uint8_t breakpoint_id, - bool break_at_any_network_group_index, uint8_t network_group_index, - bool break_at_any_batch_index, uint16_t batch_index, - bool break_at_any_context_index,uint8_t context_index, - bool break_at_any_action_index, uint16_t action_index) -{ - CONTROL_PROTOCOL__context_switch_breakpoint_control_t breakpoint_control = - CONTROL_PROTOCOL__CONTEXT_SWITCH_BREAKPOINT_CONTROL_SET; - CONTROL_PROTOCOL__context_switch_breakpoint_data_t breakpoint_data = { - break_at_any_network_group_index, - network_group_index, - break_at_any_batch_index, - batch_index, - break_at_any_context_index, - context_index, - break_at_any_action_index, - action_index}; - - auto status = Control::config_context_switch_breakpoint(*device, breakpoint_id, breakpoint_control, &breakpoint_data); - VALIDATE_STATUS(status); -} - -void ControlWrapper::continue_context_switch_breakpoint(DeviceWrapper &device, uint8_t breakpoint_id) -{ - CONTROL_PROTOCOL__context_switch_breakpoint_control_t breakpoint_control = - CONTROL_PROTOCOL__CONTEXT_SWITCH_BREAKPOINT_CONTROL_CONTINUE; - CONTROL_PROTOCOL__context_switch_breakpoint_data_t breakpoint_data = {false,0,false,0,false,0,false,0}; - - auto status = Control::config_context_switch_breakpoint(*device, breakpoint_id, - breakpoint_control, &breakpoint_data); - VALIDATE_STATUS(status); -} - -void ControlWrapper::clear_context_switch_breakpoint(DeviceWrapper &device, uint8_t breakpoint_id) -{ - CONTROL_PROTOCOL__context_switch_breakpoint_control_t breakpoint_control = - CONTROL_PROTOCOL__CONTEXT_SWITCH_BREAKPOINT_CONTROL_CLEAR; - CONTROL_PROTOCOL__context_switch_breakpoint_data_t breakpoint_data = {false,0,false,0,false,0,false,0}; - - auto status = Control::config_context_switch_breakpoint(*device, breakpoint_id, - breakpoint_control, &breakpoint_data); - VALIDATE_STATUS(status); -} - -uint8_t ControlWrapper::get_context_switch_breakpoint_status(DeviceWrapper &device, uint8_t breakpoint_id) -{ - CONTROL_PROTOCOL__context_switch_debug_sys_status_t breakpoint_status = - CONTROL_PROTOCOL__CONTEXT_SWITCH_DEBUG_SYS_STATUS_COUNT; - - auto status = Control::get_context_switch_breakpoint_status(*device, breakpoint_id, - &breakpoint_status); - VALIDATE_STATUS(status); - - return static_cast(breakpoint_status); -} - -void ControlWrapper::config_context_switch_timestamp(DeviceWrapper &device, uint16_t batch_index) -{ - auto status = Control::config_context_switch_timestamp(*device, batch_index, true); - VALIDATE_STATUS(status); -} - -void ControlWrapper::remove_context_switch_timestamp_configuration(DeviceWrapper &device) -{ - auto status = Control::config_context_switch_timestamp(*device, 0, false); - VALIDATE_STATUS(status); -} - -void ControlWrapper::enable_debugging(DeviceWrapper &device, bool is_rma) -{ - auto status = Control::enable_debugging(*device, is_rma); - VALIDATE_STATUS(status); -} - -void ControlWrapper::add_to_python_module(py::module &m) -{ - m.def("_set_clock_freq", &ControlWrapper::set_clock_freq); - m.def("close_all_streams", &ControlWrapper::close_all_streams); - m.def("config_ahb_to_axi", &ControlWrapper::config_ahb_to_axi); - m.def("phy_operation", &ControlWrapper::phy_operation); - m.def("latency_measurement_read", &ControlWrapper::latency_measurement_read); - m.def("latency_measurement_config", &ControlWrapper::latency_measurement_config); - m.def("start_firmware_update", &ControlWrapper::start_firmware_update); - m.def("finish_firmware_update", &ControlWrapper::finish_firmware_update); - m.def("write_firmware_update", &ControlWrapper::write_firmware_update); - m.def("validate_firmware_update", &ControlWrapper::validate_firmware_update); - m.def("sensor_get_config", &ControlWrapper::sensor_get_config); - m.def("idle_time_set_measurement", &ControlWrapper::idle_time_set_measurement); - m.def("idle_time_get_measurement", &ControlWrapper::idle_time_get_measurement); - m.def("d2h_notification_manager_set_host_info", &ControlWrapper::d2h_notification_manager_set_host_info); - m.def("d2h_notification_manager_send_host_info_notification", &ControlWrapper::d2h_notification_manager_send_host_info_notification); - m.def("set_context_switch_breakpoint", &set_context_switch_breakpoint); - m.def("continue_context_switch_breakpoint", &continue_context_switch_breakpoint); - m.def("clear_context_switch_breakpoint", &clear_context_switch_breakpoint); - m.def("get_context_switch_breakpoint_status", &get_context_switch_breakpoint_status); - m.def("config_context_switch_timestamp", &config_context_switch_timestamp); - m.def("remove_context_switch_timestamp_configuration", &remove_context_switch_timestamp_configuration); - m.def("enable_debugging", &enable_debugging); - - // TODO: HRT-5764 - Remove 'py::module_local()' when removing _pyhailort_internal from external - // py::module_local() is needed because these enums are currently in both _pyhailort and _pyhailort_internal, - // and when trying to import one of them on the python side you will get the error: - // ImportError: generic_type: type "enum_name" is already registered! - // py::module_local() tells pybind11 to keep the external class/enum binding localized to the module. - py::enum_(m, "ContextSwitchBreakpointStatus", py::module_local()) - .value("CONTEXT_SWITCH_BREAKPOINT_STATUS_CLEARED",CONTROL_PROTOCOL__CONTEXT_SWITCH_DEBUG_SYS_STATUS_CLEARED) - .value("CONTEXT_SWITCH_BREAKPOINT_STATUS_WAITING_FOR_BREAKPOINT",CONTROL_PROTOCOL__CONTEXT_SWITCH_DEBUG_SYS_STATUS_WAITING_FOR_BREAKPOINT) - .value("CONTEXT_SWITCH_BREAKPOINT_STATUS_REACHED_BREAKPOINT",CONTROL_PROTOCOL__CONTEXT_SWITCH_DEBUG_SYS_STATUS_REACHED_BREAKPOINT) - ; - - py::enum_(m, "CONTROL_PROTOCOL__phy_operation_t", py::module_local()) - .value("PHY_OPERATION_RESET", CONTROL_PROTOCOL__PHY_OPERATION_RESET) - ; - - py::enum_(m, "CONTROL_PROTOCOL__mipi_deskew_enable_t", py::module_local()) - .value("MIPI__DESKEW_FORCE_DISABLE", CONTROL_PROTOCOL__MIPI_DESKEW__FORCE_DISABLE) - .value("MIPI__DESKEW_FORCE_ENABLE", CONTROL_PROTOCOL__MIPI_DESKEW__FORCE_ENABLE) - .value("MIPI__DESKEW_DEFAULT", CONTROL_PROTOCOL__MIPI_DESKEW__DEFAULT) - ; - -} - -} /* namespace hailort */ \ No newline at end of file diff --git a/hailort/libhailort/bindings/python/src/internal/control_api.hpp b/hailort/libhailort/bindings/python/src/internal/control_api.hpp deleted file mode 100644 index ffcf26de..00000000 --- a/hailort/libhailort/bindings/python/src/internal/control_api.hpp +++ /dev/null @@ -1,68 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file control_api.hpp - * @brief Defines binding to control functions - * - **/ - -#ifndef _CONTROL_API_HPP_ -#define _CONTROL_API_HPP_ - -#include "device_common/control.hpp" -#include "utils.hpp" - -#include "device_api.hpp" - -#include -#include -#include -#include -#include -#include -#include - -namespace hailort -{ - -class ControlWrapper { -public: - static void add_to_python_module(py::module &m); - - static void set_clock_freq(DeviceWrapper &device, uint32_t clock_freq); - static void close_all_streams(DeviceWrapper &device); - static void config_ahb_to_axi(DeviceWrapper &device, bool use_64bit_data_only); - static void phy_operation(DeviceWrapper &device, CONTROL_PROTOCOL__phy_operation_t operation_type); - static uint32_t latency_measurement_read(DeviceWrapper &device); - static void latency_measurement_config(DeviceWrapper &device, uint8_t latency_measurement_en, - uint32_t inbound_start_buffer_number, uint32_t outbound_stop_buffer_number, uint32_t inbound_stream_index, - uint32_t outbound_stream_index); - static void start_firmware_update(DeviceWrapper &device); - static void finish_firmware_update(DeviceWrapper &device); - static void write_firmware_update(DeviceWrapper &device, uint32_t offset, py::bytes data, uint32_t length); - static void validate_firmware_update(DeviceWrapper &device, py::bytes md5_raw_data, uint32_t firmware_size); - static py::bytes sensor_get_config(DeviceWrapper &device, uint32_t section_index, uint32_t offset, uint32_t data_length); - static void idle_time_set_measurement(DeviceWrapper &device, bool measurement_enable); - static uint64_t idle_time_get_measurement(DeviceWrapper &device); - static void d2h_notification_manager_set_host_info(DeviceWrapper &device, uint16_t host_port, uint32_t host_ip_address); - static void d2h_notification_manager_send_host_info_notification(DeviceWrapper &device, uint8_t notification_priority); - static void enable_debugging(DeviceWrapper &device, bool is_rma); - - /* Context switch */ - static void set_context_switch_breakpoint(DeviceWrapper &device, uint8_t breakpoint_id, - bool break_at_any_network_group_index, uint8_t network_group_index, - bool break_at_any_batch_index, uint16_t batch_index, - bool break_at_any_context_index,uint8_t context_index, - bool break_at_any_action_index, uint16_t action_index); - static void continue_context_switch_breakpoint(DeviceWrapper &device, uint8_t breakpoint_id); - static void clear_context_switch_breakpoint(DeviceWrapper &device, uint8_t breakpoint_id); - static uint8_t get_context_switch_breakpoint_status(DeviceWrapper &device, uint8_t breakpoint_id); - static void config_context_switch_timestamp(DeviceWrapper &device, uint16_t batch_index); - static void remove_context_switch_timestamp_configuration(DeviceWrapper &device); -}; - -} /* namespace hailort */ - -#endif /* _CONTROL_API_HPP_ */ diff --git a/hailort/libhailort/bindings/python/src/internal/pyhailort_internal.cpp b/hailort/libhailort/bindings/python/src/internal/pyhailort_internal.cpp deleted file mode 100644 index c9d8e728..00000000 --- a/hailort/libhailort/bindings/python/src/internal/pyhailort_internal.cpp +++ /dev/null @@ -1,405 +0,0 @@ - - -#include "hailo/hailort.h" - -#include "transform/transform_internal.hpp" -#include "bindings_common.hpp" - -#include "pyhailort_internal.hpp" -#include "control_api.hpp" -#include "utils.hpp" -#include "utils.h" - -#include -#include -#include -#include -#include -#include -#include - - -namespace hailort -{ -// TODO: Remove (HRT-9944) -// Duplicated for hailo post process test with python API. -static const uint32_t TEST_NUM_OF_CLASSES = 80; - - -Expected get_expected_buffer_float32() -{ - static const uint32_t DETECTION_CLASS_ID_1 = 0; - static const float32_t CLASS_ID_1_DETECTION_COUNT = 5; - static const uint32_t DETECTION_CLASS_ID_3 = 2; - static const float32_t CLASS_ID_3_DETECTION_COUNT = 2; - static const uint32_t DETECTION_CLASS_ID_8 = 7; - static const float32_t CLASS_ID_8_DETECTION_COUNT = 1; - static const uint32_t DETECTION_CLASS_ID_26 = 25; - static const float32_t CLASS_ID_26_DETECTION_COUNT = 1; - - static const hailo_bbox_float32_t bbox1_0 = { - /*.y_min =*/ 0.5427529811859131f, - /*.x_min =*/ 0.2485126256942749f, - /*.y_max =*/ 0.6096446067f, - /*.x_max =*/ 0.27035075984f, - /*.score =*/ 0.7761699557304382f, - }; - - static const hailo_bbox_float32_t bbox1_1 = { - /*.y_min =*/ 0.5454554557800293f, - /*.x_min =*/ 0.33257606625556948f, - /*.y_max =*/ 0.7027952075f, - /*.x_max =*/ 0.40901548415f, - /*.score =*/ 0.7637669444084168f, - }; - - static const hailo_bbox_float32_t bbox1_2 = { - /*.y_min =*/ 0.5521867275238037f, - /*.x_min =*/ 0.19988654553890229f, - /*.y_max =*/ 0.60256312787f, - /*.x_max =*/ 0.21917282976f, - /*.score =*/ 0.7451231479644775f, - }; - - static const hailo_bbox_float32_t bbox1_3 = { - /*.y_min =*/ 0.5514537692070007f, - /*.x_min =*/ 0.2693796157836914f, - /*.y_max =*/ 0.60397491604f, - /*.x_max =*/ 0.28537025302f, - /*.score =*/ 0.3756354749202728f, - }; - - static const hailo_bbox_float32_t bbox1_4 = { - /*.y_min =*/ 0.553998589515686f, - /*.x_min =*/ 0.18612079322338105f, - /*.y_max =*/ 0.58339602686f, - /*.x_max =*/ 0.2008818537f, - /*.score =*/ 0.3166312277317047f, - }; - - static const hailo_bbox_float32_t bbox3_0 = { - /*.y_min =*/ 0.5026738047599793f, - /*.x_min =*/ -0.005611047148704529f, - /*.y_max =*/ 0.65071095526f, - /*.x_max =*/ 0.13888412714f, - /*.score =*/ 0.5734351277351379f, - }; - - static const hailo_bbox_float32_t bbox3_1 = { - /*.y_min =*/ 0.5620155334472656f, - /*.x_min =*/ 0.16757474839687348f, - /*.y_max =*/ 0.58410947769f, - /*.x_max =*/ 0.19325175508f, - /*.score =*/ 0.4062519371509552f, - }; - - static const hailo_bbox_float32_t bbox8_0 = { - /*.y_min =*/ 0.5028372406959534f, - /*.x_min =*/ -0.0017736181616783143f, - /*.y_max =*/ 0.65114967525f, - /*.x_max =*/ 0.13592261821f, - /*.score =*/ 0.4223918318748474f, - }; - - static const hailo_bbox_float32_t bbox26_0 = { - /*.y_min =*/ 0.5854946374893189f, - /*.x_min =*/ 0.2693060040473938f, - /*.y_max =*/ 0.68259389698f, - /*.x_max =*/ 0.38090330362f, - /*.score =*/ 0.6338639259338379f, - }; - - static const uint32_t DETECTION_COUNT = 9; - auto buffer_size = (DETECTION_COUNT * sizeof(hailo_bbox_float32_t)) + (TEST_NUM_OF_CLASSES * sizeof(float32_t)); - auto buffer_expected = Buffer::create(buffer_size, 0); - CHECK_EXPECTED(buffer_expected); - auto buffer = buffer_expected.release(); - - size_t offset = 0; - for (uint32_t class_index = 0; class_index < TEST_NUM_OF_CLASSES; class_index++) { - if (DETECTION_CLASS_ID_1 == class_index) { - memcpy(buffer.data() + offset, &CLASS_ID_1_DETECTION_COUNT, sizeof(CLASS_ID_1_DETECTION_COUNT)); - offset += sizeof(CLASS_ID_1_DETECTION_COUNT); - - memcpy(buffer.data() + offset, &bbox1_0, sizeof(bbox1_0)); - offset += sizeof(bbox1_0); - - memcpy(buffer.data() + offset, &bbox1_1, sizeof(bbox1_1)); - offset += sizeof(bbox1_1); - - memcpy(buffer.data() + offset, &bbox1_2, sizeof(bbox1_2)); - offset += sizeof(bbox1_2); - - memcpy(buffer.data() + offset, &bbox1_3, sizeof(bbox1_3)); - offset += sizeof(bbox1_3); - - memcpy(buffer.data() + offset, &bbox1_4, sizeof(bbox1_4)); - offset += sizeof(bbox1_4); - } - else if (DETECTION_CLASS_ID_3 == class_index) { - memcpy(buffer.data() + offset, &CLASS_ID_3_DETECTION_COUNT, sizeof(CLASS_ID_3_DETECTION_COUNT)); - offset += sizeof(CLASS_ID_3_DETECTION_COUNT); - - memcpy(buffer.data() + offset, &bbox3_0, sizeof(bbox3_0)); - offset += sizeof(bbox3_0); - - memcpy(buffer.data() + offset, &bbox3_1, sizeof(bbox3_1)); - offset += sizeof(bbox3_1); - } - else if (DETECTION_CLASS_ID_8 == class_index) { - memcpy(buffer.data() + offset, &CLASS_ID_8_DETECTION_COUNT, sizeof(CLASS_ID_8_DETECTION_COUNT)); - offset += sizeof(CLASS_ID_8_DETECTION_COUNT); - - memcpy(buffer.data() + offset, &bbox8_0, sizeof(bbox8_0)); - offset += sizeof(bbox8_0); - } - else if (DETECTION_CLASS_ID_26 == class_index) { - memcpy(buffer.data() + offset, &CLASS_ID_26_DETECTION_COUNT, sizeof(CLASS_ID_26_DETECTION_COUNT)); - offset += sizeof(CLASS_ID_26_DETECTION_COUNT); - - memcpy(buffer.data() + offset, &bbox26_0, sizeof(bbox26_0)); - offset += sizeof(bbox26_0); - } - else { - offset += sizeof(float32_t); - } - } - - return buffer; -} - -py::array PyhailortInternal::get_yolov5_post_process_expected_buffer() -{ - auto buffer = get_expected_buffer_float32(); - VALIDATE_EXPECTED(buffer); - - auto type = py::dtype(HailoRTBindingsCommon::convert_format_type_to_string(HAILO_FORMAT_TYPE_FLOAT32)); - auto shape = *py::array::ShapeContainer({buffer->size()}); - - // Note: The ownership of the buffer is transferred to Python wrapped as a py::array. - // When the py::array isn't referenced anymore in Python and is destructed, the py::capsule's dtor - // is called too (and it deletes the raw buffer) - auto unmanaged_addr_exp = buffer->storage().release(); - VALIDATE_EXPECTED(unmanaged_addr_exp); - const auto unmanaged_addr = unmanaged_addr_exp.release(); - return py::array(type, shape, unmanaged_addr, - py::capsule(unmanaged_addr, [](void *p) { delete reinterpret_cast(p); })); -} - -void PyhailortInternal::demux_output_buffer( - py::bytes src, const hailo_format_t &src_format, const hailo_3d_image_shape_t &src_shape, - std::map dst_buffers, const LayerInfo &mux_layer_info) -{ - const size_t hw_frame_size = HailoRTCommon::get_frame_size(src_shape, src_format); - auto expected_output_demuxer = OutputDemuxerBase::create(hw_frame_size, mux_layer_info); - VALIDATE_EXPECTED(expected_output_demuxer); - - auto demuxer = expected_output_demuxer.release(); - - std::map dst_ptrs; - for (auto &dst_buffer_pair : dst_buffers) { - dst_ptrs.insert(std::make_pair(dst_buffer_pair.first, - MemoryView(reinterpret_cast(dst_buffer_pair.second.mutable_data()), - dst_buffer_pair.second.nbytes()))); - } - - const auto src_str = static_cast(src); - auto status = demuxer.transform_demux( - MemoryView(const_cast(reinterpret_cast(src_str.c_str())), src_str.length()), dst_ptrs); - VALIDATE_STATUS(status); -} - -void PyhailortInternal::transform_input_buffer( - py::array src, const hailo_format_t &src_format, const hailo_3d_image_shape_t &src_shape, - uintptr_t dst, size_t dst_size, const hailo_format_t &dst_format, const hailo_3d_image_shape_t &dst_shape, - const std::vector &dst_quant_infos) -{ - auto transform_context = InputTransformContext::create(src_shape, src_format, dst_shape, dst_format, - dst_quant_infos); - VALIDATE_EXPECTED(transform_context); - - MemoryView dst_buffer(reinterpret_cast(dst), dst_size); - auto status = transform_context.value()->transform( - MemoryView::create_const(const_cast(reinterpret_cast(src.data())), src.nbytes()), - dst_buffer); - VALIDATE_STATUS(status); -} - -void PyhailortInternal::transform_output_buffer( - py::bytes src, const hailo_format_t &src_format, const hailo_3d_image_shape_t &src_shape, - py::array dst, const hailo_format_t &dst_format, const hailo_3d_image_shape_t &dst_shape, - const std::vector &dst_quant_infos) -{ - auto transform_context = OutputTransformContext::create(src_shape, src_format, dst_shape, dst_format, - dst_quant_infos, {}); - VALIDATE_EXPECTED(transform_context); - - const auto src_str = static_cast(src); - MemoryView dst_buffer(reinterpret_cast(dst.mutable_data()), dst.nbytes()); - auto status = transform_context.value()->transform(MemoryView::create_const(src_str.c_str(), - src_str.length()), dst_buffer); - VALIDATE_STATUS(status); -} - -void PyhailortInternal::transform_output_buffer_nms( - py::bytes src, const hailo_format_t &src_format, const hailo_3d_image_shape_t &src_shape, - py::array dst, const hailo_format_t &dst_format, const hailo_3d_image_shape_t &dst_shape, - const std::vector &dst_quant_infos, const hailo_nms_info_t &nms_info) -{ - auto transform_context = OutputTransformContext::create(src_shape, src_format, dst_shape, dst_format, - dst_quant_infos, nms_info); - VALIDATE_EXPECTED(transform_context); - - const auto src_str = static_cast(src); - MemoryView dst_buffer(reinterpret_cast(dst.mutable_data()), dst.nbytes()); - auto status = transform_context.value()->transform(MemoryView::create_const(src_str.c_str(), - src_str.size()), dst_buffer); - VALIDATE_STATUS(status); -} - -bool PyhailortInternal::is_input_transformation_required( - const hailo_3d_image_shape_t &src_shape, const hailo_format_t &src_format, - const hailo_3d_image_shape_t &dst_shape, const hailo_format_t &dst_format, - const std::vector &quant_infos) -{ - auto expected_is_transforamtion_required = InputTransformContext::is_transformation_required(src_shape, src_format, dst_shape, dst_format, - quant_infos); - VALIDATE_EXPECTED(expected_is_transforamtion_required); - - return expected_is_transforamtion_required.release(); -} - -bool PyhailortInternal::is_output_transformation_required( - const hailo_3d_image_shape_t &src_shape, const hailo_format_t &src_format, - const hailo_3d_image_shape_t &dst_shape, const hailo_format_t &dst_format, - const std::vector &quant_infos) -{ - auto expected_is_transforamtion_required = OutputTransformContext::is_transformation_required(src_shape, src_format, dst_shape, dst_format, - quant_infos); - VALIDATE_EXPECTED(expected_is_transforamtion_required); - - return expected_is_transforamtion_required.release(); -} - -py::list PyhailortInternal::get_all_layers_info(const HefWrapper &hef, const std::string &net_group_name) -{ - auto core_op_metadata = hef.hef_ptr()->pimpl->get_core_op_metadata(net_group_name); - VALIDATE_EXPECTED(core_op_metadata); - - return py::cast(core_op_metadata.value()->get_all_layer_infos()); -} - -PYBIND11_MODULE(_pyhailort_internal, m) { - ControlWrapper::add_to_python_module(m); - m.def("get_yolov5_post_process_expected_buffer", &PyhailortInternal::get_yolov5_post_process_expected_buffer); - m.def("demux_output_buffer", &PyhailortInternal::demux_output_buffer); - m.def("transform_input_buffer", &PyhailortInternal::transform_input_buffer); - m.def("transform_output_buffer", &PyhailortInternal::transform_output_buffer); - m.def("transform_output_buffer_nms", &PyhailortInternal::transform_output_buffer_nms); - m.def("is_input_transformation_required", &PyhailortInternal::is_input_transformation_required); - m.def("is_output_transformation_required", &PyhailortInternal::is_output_transformation_required); - m.def("get_all_layers_info", &PyhailortInternal::get_all_layers_info); - - py::class_(m, "BufferIndices", py::module_local()) - .def_readonly("index", &BufferIndices::index) - .def_readonly("cluster_index", &BufferIndices::cluster_index) - ; - - py::enum_(m, "SensorConfigOpCode") - .value("SENSOR_CONFIG_OPCODES_WR", SENSOR_CONFIG_OPCODES_WR) - .value("SENSOR_CONFIG_OPCODES_RD", SENSOR_CONFIG_OPCODES_RD) - .value("SENSOR_CONFIG_OPCODES_RMW", SENSOR_CONFIG_OPCODES_RMW) - .value("SENSOR_CONFIG_OPCODES_DELAY", SENSOR_CONFIG_OPCODES_DELAY) - ; - - py::class_(m, "HailoLayerInfo", py::module_local()) - .def_readonly("is_mux", &LayerInfo::is_mux) - .def_readonly("mux_predecessors", &LayerInfo::predecessor) - .def_readonly("is_multi_planar", &LayerInfo::is_multi_planar) - .def_readonly("planes", &LayerInfo::planes) - .def_readonly("plane_index", &LayerInfo::plane_index) - .def_readonly("is_defused_nms", &LayerInfo::is_defused_nms) - .def_readonly("fused_nms_layer", &LayerInfo::fused_nms_layer) - .def_property_readonly("shape", [](LayerInfo& self) - { - switch (self.format.order) { - case HAILO_FORMAT_ORDER_NC: - return py::make_tuple(self.shape.features); - case HAILO_FORMAT_ORDER_NHW: - return py::make_tuple(self.shape.height, self.shape.width); - default: - return py::make_tuple(self.shape.height, self.shape.width, self.shape.features); - } - }) - .def_property_readonly("height", [](LayerInfo& self) - { - return self.shape.height; - }) - .def_property_readonly("width", [](LayerInfo& self) - { - return self.shape.width; - }) - .def_property_readonly("features", [](LayerInfo& self) - { - return self.shape.features; - }) - .def("hw_shape", [](LayerInfo& self) - { - return py::make_tuple(self.hw_shape.height, self.hw_shape.width, self.hw_shape.features); - }) - .def_property_readonly("padded_height", [](LayerInfo& self) - { - return self.hw_shape.height; - }) - .def_property_readonly("padded_width", [](LayerInfo& self) - { - return self.hw_shape.width; - }) - .def_property_readonly("padded_features", [](LayerInfo& self) - { - return self.hw_shape.features; - }) - .def_readonly("data_bytes", &LayerInfo::hw_data_bytes) - .def_readonly("format", &LayerInfo::format) - .def_property_readonly("format_order", [](LayerInfo& self) - { - return self.format.order; - }) - .def_readonly("direction", &LayerInfo::direction) - .def_readonly("sys_index", &LayerInfo::stream_index) - .def_readonly("name", &LayerInfo::name) - .def_readonly("quant_infos", &LayerInfo::quant_infos) - // For backwards compatibility (accessing qp through layer_info directly) - .def_property_readonly("qp_zp", [](LayerInfo& self) - { - return self.quant_info.qp_zp; - }) - .def_property_readonly("qp_scale", [](LayerInfo& self) - { - return self.quant_info.qp_scale; - }) - .def_property_readonly("limvals_min", [](LayerInfo& self) - { - return self.quant_info.limvals_min; - }) - .def_property_readonly("limvals_max", [](LayerInfo& self) - { - return self.quant_info.limvals_max; - }) - .def_readonly("nms_info", &LayerInfo::nms_info) - .def_readonly("height_gcd", &LayerInfo::height_gcd) - .def_readonly("height_ratios", &LayerInfo::height_ratios) - .def_readonly("buffer_indices", &LayerInfo::buffer_indices) - .def_property_readonly("core_bytes_per_buffer", [](LayerInfo& self) - { - return self.nn_stream_config.core_bytes_per_buffer; - }) - .def_property_readonly("core_buffers_per_frame", [](LayerInfo& self) - { - return self.nn_stream_config.core_buffers_per_frame; - }) - .def_readonly("network_name", &LayerInfo::network_name) - ; -} - -} /* namespace hailort */ \ No newline at end of file diff --git a/hailort/libhailort/bindings/python/src/internal/pyhailort_internal.hpp b/hailort/libhailort/bindings/python/src/internal/pyhailort_internal.hpp deleted file mode 100644 index bb6f0de7..00000000 --- a/hailort/libhailort/bindings/python/src/internal/pyhailort_internal.hpp +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file pyhailort_internal.hpp - * @brief Defines binding of internal functions over Python. - **/ - -#ifndef _PYHAILORT_INTERNAL_ -#define _PYHAILORT_INTERNAL_ - -#include "hef/hef_internal.hpp" - -#include "hef_api.hpp" -#include "utils.hpp" -#include "utils.h" -#include -#include -#include -#include -#include -#include -#include - - -namespace hailort -{ - -class PyhailortInternal { -public: - static py::array get_yolov5_post_process_expected_buffer(); - static void demux_output_buffer(py::bytes src, const hailo_format_t &src_format, const hailo_3d_image_shape_t &src_shape, - std::map dst_buffers, const LayerInfo &mux_layer_info); - static void transform_input_buffer(py::array src, const hailo_format_t &src_format, const hailo_3d_image_shape_t &src_shape, - uintptr_t dst, size_t dst_size, const hailo_format_t &dst_format, const hailo_3d_image_shape_t &dst_shape, - const std::vector &dst_quant_infos); - static void transform_output_buffer(py::bytes src, const hailo_format_t &src_format, - const hailo_3d_image_shape_t &src_shape, py::array dst, const hailo_format_t &dst_format, - const hailo_3d_image_shape_t &dst_shape, const std::vector &dst_quant_infos); - static void transform_output_buffer_nms(py::bytes src, const hailo_format_t &src_format, - const hailo_3d_image_shape_t &src_shape, py::array dst, const hailo_format_t &dst_format, - const hailo_3d_image_shape_t &dst_shape, const std::vector &dst_quant_infos, const hailo_nms_info_t &nms_info); - static bool is_input_transformation_required(const hailo_3d_image_shape_t &src_shape, const hailo_format_t &src_format, - const hailo_3d_image_shape_t &dst_shape, const hailo_format_t &dst_format, const std::vector &quant_infos); - static bool is_output_transformation_required(const hailo_3d_image_shape_t &src_shape, const hailo_format_t &src_format, - const hailo_3d_image_shape_t &dst_shape, const hailo_format_t &dst_format, const std::vector &quant_infos); - static py::list get_all_layers_info(const HefWrapper &hef, const std::string &net_group_name); -}; - -} /* namespace hailort */ - -#endif /* _PYHAILORT_INTERNAL_ */ \ No newline at end of file diff --git a/hailort/libhailort/bindings/python/src/network_group_api.cpp b/hailort/libhailort/bindings/python/src/network_group_api.cpp index 925f5f2c..db13dd46 100644 --- a/hailort/libhailort/bindings/python/src/network_group_api.cpp +++ b/hailort/libhailort/bindings/python/src/network_group_api.cpp @@ -71,7 +71,7 @@ void ActivatedAppContextManagerWrapper::add_to_python_module(py::module &m) ; py::class_(m, "ActivatedNetworkGroup") - .def("get_intermediate_buffer", [](ActivatedNetworkGroup& self, uint8_t src_context_index, + .def("get_intermediate_buffer", [](ActivatedNetworkGroup& self, uint16_t src_context_index, uint8_t src_stream_index) { auto buff = self.get_intermediate_buffer(std::make_pair(src_context_index, src_stream_index)); diff --git a/hailort/libhailort/bindings/python/src/pyhailort.cpp b/hailort/libhailort/bindings/python/src/pyhailort.cpp index 5d9d4670..5b0a8c54 100644 --- a/hailort/libhailort/bindings/python/src/pyhailort.cpp +++ b/hailort/libhailort/bindings/python/src/pyhailort.cpp @@ -501,19 +501,19 @@ PYBIND11_MODULE(_pyhailort, m) { .def(py::init<>()) .def_readonly("number_of_classes", &hailo_nms_shape_t::number_of_classes) .def_readonly("max_bboxes_per_class", &hailo_nms_shape_t::max_bboxes_per_class) - .def_readonly("max_mask_size", &hailo_nms_shape_t::max_mask_size) + .def_readonly("max_accumulated_mask_size", &hailo_nms_shape_t::max_accumulated_mask_size) .def(py::pickle( [](const hailo_nms_shape_t &nms_shape) { // __getstate__ return py::make_tuple( nms_shape.number_of_classes, nms_shape.max_bboxes_per_class, - nms_shape.max_mask_size); + nms_shape.max_accumulated_mask_size); }, [](py::tuple t) { // __setstate__ hailo_nms_shape_t nms_shape; nms_shape.number_of_classes = t[0].cast(); nms_shape.max_bboxes_per_class = t[1].cast(); - nms_shape.max_mask_size = t[2].cast(); + nms_shape.max_accumulated_mask_size = t[2].cast(); return nms_shape; } )) diff --git a/hailort/libhailort/bindings/python/src/vstream_api.cpp b/hailort/libhailort/bindings/python/src/vstream_api.cpp index 1bf05780..82dbb54a 100644 --- a/hailort/libhailort/bindings/python/src/vstream_api.cpp +++ b/hailort/libhailort/bindings/python/src/vstream_api.cpp @@ -155,7 +155,7 @@ void OutputVStreamWrapper::add_to_python_module(py::module &m) // Note: The ownership of the buffer is transferred to Python wrapped as a py::array. // When the py::array isn't referenced anymore in Python and is destructed, the py::capsule's dtor // is called too (and it deletes the raw buffer) - auto unmanaged_addr_exp = buffer->storage().release(); + auto unmanaged_addr_exp = buffer->release(); VALIDATE_EXPECTED(unmanaged_addr_exp); const auto unmanaged_addr = unmanaged_addr_exp.release(); return py::array(get_dtype(self), get_shape(self), unmanaged_addr, @@ -176,6 +176,11 @@ void OutputVStreamWrapper::add_to_python_module(py::module &m) hailo_status status = self.set_nms_max_proposals_per_class(max_proposals_per_class); VALIDATE_STATUS(status); }) + .def("set_nms_max_accumulated_mask_size", [](OutputVStream &self, uint32_t max_accumulated_mask_size) + { + hailo_status status = self.set_nms_max_accumulated_mask_size(max_accumulated_mask_size); + VALIDATE_STATUS(status); + }) .def_property_readonly("info", [](OutputVStream &self) { return self.get_info(); @@ -403,6 +408,10 @@ void InferVStreamsWrapper::add_to_python_module(py::module &m) { VALIDATE_STATUS(self.m_infer_pipeline->set_nms_max_proposals_per_class(max_proposals_per_class)); }) + .def("set_nms_max_accumulated_mask_size", [](InferVStreamsWrapper &self, uint32_t max_accumulated_mask_size) + { + VALIDATE_STATUS(self.m_infer_pipeline->set_nms_max_accumulated_mask_size(max_accumulated_mask_size)); + }) ; } diff --git a/hailort/libhailort/examples/README.md b/hailort/libhailort/examples/README.md index 47cc9a44..b4b19f11 100644 --- a/hailort/libhailort/examples/README.md +++ b/hailort/libhailort/examples/README.md @@ -57,8 +57,8 @@ The following examples are provided, demonstrating the HailoRT API: - For Windows, in case of restricted execution policy, either change the policy, or run the script with "PowerShell -NoProfile -ExecutionPolicy Bypass -File " - `notification_callback_example` - Demonstrates how to work with notification callbacks, same as `notification_callback_example` C example. You can find more details about each example in the HailoRT user guide. - - `async_infer_example` - Basic asynchronous inference of a shortcut network, uses HailoRT C++ api. - - `async_infer_functionality_example` - More advanced asynchronous inference of a multiple input and output model, uses HailoRT C++ api. + - `async_infer_basic_example` - Basic asynchronous inference of a multiple input and output model, uses HailoRT C++ api. + - `async_infer_advanced_example` - More advanced asynchronous inference of a multi planar model, uses HailoRT C++ api. ## Compiling with CMake Examples are configured and compiled using the following commands: ```sh @@ -87,3 +87,11 @@ To run an example, use (from this examples directory): ```sh build/// [params..] ``` + +## Hailo Application Code Examples + +The examples in this page are for demonstrating HailoRT API usage. + +Hailo also offers an additional set of +[Application Code Examples](https://github.com/hailo-ai/Hailo-Application-Code-Examples), +which are more application-oriented. \ No newline at end of file diff --git a/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt b/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt index 6a2e675b..ded5ebce 100644 --- a/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(data_quantization_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt b/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt index 1fd6b7aa..acc59088 100644 --- a/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(infer_pipeline_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt b/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt index 5ed386f1..f191cc8f 100644 --- a/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(multi_device_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt b/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt index bea6c1cc..d894e5bc 100644 --- a/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(multi_network_vstream_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt b/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt index fe145e20..f659f135 100644 --- a/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(notification_callback_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt b/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt index 76d85fd1..16a7faa3 100644 --- a/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.0.0) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(power_measurement_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt index 8477fc8a..305c47a6 100644 --- a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.0.0) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(raw_async_streams_single_thread_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c index 1fa7838c..d4ce56e7 100644 --- a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c +++ b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c @@ -50,11 +50,11 @@ static void output_done_callback(const hailo_stream_read_async_completion_info_t // Real applications can forward the buffer to post-process/display. Here we just re-launch new async reads. status = hailo_stream_read_raw_buffer_async(stream, completion_info->buffer_addr, completion_info->buffer_size, output_done_callback, stream); - if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) { + if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORT != status)) { fprintf(stderr, "Failed read async with status=%d\n", status); } break; - case HAILO_STREAM_ABORTED_BY_USER: + case HAILO_STREAM_ABORT: // Transfer was canceled, finish gracefully. break; default: @@ -73,11 +73,11 @@ static void input_done_callback(const hailo_stream_write_async_completion_info_t // new async writes. status = hailo_stream_write_raw_buffer_async(stream, completion_info->buffer_addr, completion_info->buffer_size, input_done_callback, stream); - if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) { + if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORT != status)) { fprintf(stderr, "Failed write async with status=%d\n", status); } break; - case HAILO_STREAM_ABORTED_BY_USER: + case HAILO_STREAM_ABORT: // Transfer was canceled, finish gracefully. break; default: @@ -85,7 +85,13 @@ static void input_done_callback(const hailo_stream_write_async_completion_info_t } } -static hailo_status infer(hailo_configured_network_group network_group, size_t number_input_streams, +typedef struct { + void *addr; + size_t size; + hailo_dma_buffer_direction_t direction; +} allocated_buffer_t; + +static hailo_status infer(hailo_device device, hailo_configured_network_group network_group, size_t number_input_streams, hailo_input_stream *input_streams, size_t number_output_streams, hailo_output_stream *output_streams, size_t ongoing_transfers) { @@ -95,7 +101,8 @@ static hailo_status infer(hailo_configured_network_group network_group, size_t n size_t frame_size = 0; size_t stream_index = 0; void *current_buffer = NULL; - void *buffers[MAX_EDGE_LAYERS * MAX_ONGOING_TRANSFERS] = {0}; + + allocated_buffer_t buffers[MAX_EDGE_LAYERS * MAX_ONGOING_TRANSFERS] = {0}; size_t allocated_buffers = 0; // We launch "ongoing_transfers" async operations for both input and output streams. On each async callback, we launch @@ -108,7 +115,12 @@ static hailo_status infer(hailo_configured_network_group network_group, size_t n // Buffers read from async operation must be page aligned. current_buffer = page_aligned_alloc(frame_size); REQUIRE_ACTION(INVALID_ADDR != current_buffer, status=HAILO_OUT_OF_HOST_MEMORY, l_shutdown, "allocation failed"); - buffers[allocated_buffers++] = current_buffer; + buffers[allocated_buffers++] = (allocated_buffer_t){ current_buffer, frame_size, HAILO_DMA_BUFFER_DIRECTION_D2H }; + + // If the same buffer is used multiple times on async-io, to improve performance, it is recommended to + // pre-map it into the device. + status = hailo_device_dma_map_buffer(device, current_buffer, frame_size, HAILO_DMA_BUFFER_DIRECTION_D2H); + REQUIRE_SUCCESS(status, l_shutdown, "Failed map buffer with status=%d", status); status = hailo_stream_read_raw_buffer_async(output_streams[stream_index], current_buffer, frame_size, output_done_callback, output_streams[stream_index]); @@ -124,7 +136,12 @@ static hailo_status infer(hailo_configured_network_group network_group, size_t n // Buffers written to async operation must be page aligned. current_buffer = page_aligned_alloc(frame_size); REQUIRE_ACTION(INVALID_ADDR != current_buffer, status=HAILO_OUT_OF_HOST_MEMORY, l_shutdown, "allocation failed"); - buffers[allocated_buffers++] = current_buffer; + buffers[allocated_buffers++] = (allocated_buffer_t){ current_buffer, frame_size, HAILO_DMA_BUFFER_DIRECTION_H2D }; + + // If the same buffer is used multiple times on async-io, to improve performance, it is recommended to + // pre-map it into the device. + status = hailo_device_dma_map_buffer(device, current_buffer, frame_size, HAILO_DMA_BUFFER_DIRECTION_H2D); + REQUIRE_SUCCESS(status, l_shutdown, "Failed map buffer with status=%d", status); status = hailo_stream_write_raw_buffer_async(input_streams[stream_index], current_buffer, frame_size, input_done_callback, input_streams[stream_index]); @@ -138,11 +155,14 @@ static hailo_status infer(hailo_configured_network_group network_group, size_t n status = HAILO_SUCCESS; l_shutdown: // Calling hailo_shutdown_network_group will ensure that all async operations are done. All pending async I/O - // operations will be canceled and their callbacks called with status=HAILO_STREAM_ABORTED_BY_USER. + // operations will be canceled and their callbacks called with status=HAILO_STREAM_ABORT. (void) hailo_shutdown_network_group(network_group); // There are no async I/O operations ongoing so it is safe to free the buffers now. - for (i = 0; i < allocated_buffers; i++) page_aligned_free(buffers[i], frame_size); + for (i = 0; i < allocated_buffers; i++) { + (void) hailo_device_dma_unmap_buffer(device, buffers[i].addr, buffers[i].size, buffers[i].direction); + page_aligned_free(buffers[i].addr, buffers[i].size); + } return status; } @@ -239,7 +259,7 @@ int main() REQUIRE_SUCCESS(status, l_release_device, "Failed activate network group"); // Run infer. - status = infer(network_group, number_input_streams, input_streams, number_output_streams, output_streams, + status = infer(device, network_group, number_input_streams, input_streams, number_output_streams, output_streams, ongoing_transfers); REQUIRE_SUCCESS(status, l_deactivate, "Failed performing inference"); diff --git a/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt b/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt index 2d4245eb..31ce2faf 100644 --- a/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(raw_streams_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt b/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt index e71deee9..466b8e3d 100644 --- a/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(switch_network_groups_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c b/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c index 8b290202..9a84f3f1 100644 --- a/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c +++ b/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c @@ -192,7 +192,7 @@ int main() write_thread_args_t write_args[HEF_COUNT][MAX_EDGE_LAYERS]; read_thread_args_t read_args[HEF_COUNT][MAX_EDGE_LAYERS]; - char HEF_FILES[HEF_COUNT][MAX_HEF_PATH_LEN] = {"hefs/multi_network_shortcut_net.hef", "hefs/shortcut_net.hef"}; + char HEF_FILES[HEF_COUNT][MAX_HEF_PATH_LEN] = {"hefs/shortcut_net_nv12.hef", "hefs/shortcut_net.hef"}; // Note: default batch_size is 0, which is not used in this example uint16_t batch_sizes[HEF_COUNT] = {BATCH_SIZE_1, BATCH_SIZE_2}; diff --git a/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt b/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt index aab5aec0..0fab3bb5 100644 --- a/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(switch_network_groups_manually_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt b/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt index 0ef434b9..b6657446 100644 --- a/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(vstreams_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/cpp/CMakeLists.txt b/hailort/libhailort/examples/cpp/CMakeLists.txt index a39e2867..c0b31e4b 100644 --- a/hailort/libhailort/examples/cpp/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/CMakeLists.txt @@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.0.0) add_subdirectory(vstreams_example) add_subdirectory(infer_pipeline_example) -add_subdirectory(async_infer_example) -add_subdirectory(async_infer_functionality_example) +add_subdirectory(async_infer_basic_example) +add_subdirectory(async_infer_advanced_example) add_subdirectory(raw_streams_example) add_subdirectory(multi_network_vstream_example) add_subdirectory(switch_network_groups_example) @@ -17,8 +17,8 @@ add_subdirectory(notification_callback_example) set(CPP_EXAMPLE_TARGETS cpp_vstreams_example cpp_infer_pipeline_example - cpp_async_infer_example - cpp_async_infer_functionality_example + cpp_async_infer_basic_example + cpp_async_infer_advanced_example cpp_raw_streams_example cpp_multi_network_vstream_example cpp_switch_network_groups_example @@ -39,4 +39,4 @@ if(NOT CMAKE_SYSTEM_NAME STREQUAL QNX) endif() add_custom_target(cpp_hailort_examples) -add_dependencies(cpp_hailort_examples ${CPP_EXAMPLE_TARGETS}) \ No newline at end of file +add_dependencies(cpp_hailort_examples ${CPP_EXAMPLE_TARGETS}) diff --git a/hailort/libhailort/examples/cpp/async_infer_advanced_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/async_infer_advanced_example/CMakeLists.txt new file mode 100644 index 00000000..4b7789a8 --- /dev/null +++ b/hailort/libhailort/examples/cpp/async_infer_advanced_example/CMakeLists.txt @@ -0,0 +1,16 @@ +cmake_minimum_required(VERSION 3.0.0) + +find_package(HailoRT 4.17.0 EXACT REQUIRED) + +add_executable(cpp_async_infer_advanced_example async_infer_advanced_example.cpp) +target_link_libraries(cpp_async_infer_advanced_example PRIVATE HailoRT::libhailort) + +if(WIN32) + target_compile_options(cpp_async_infer_advanced_example PRIVATE + /DWIN32_LEAN_AND_MEAN + /DNOMINMAX # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own) + /wd4201 /wd4251 + ) +endif() + +set_target_properties(cpp_async_infer_advanced_example PROPERTIES CXX_STANDARD 14) diff --git a/hailort/libhailort/examples/cpp/async_infer_functionality_example/async_infer_functionality_example.cpp b/hailort/libhailort/examples/cpp/async_infer_advanced_example/async_infer_advanced_example.cpp similarity index 69% rename from hailort/libhailort/examples/cpp/async_infer_functionality_example/async_infer_functionality_example.cpp rename to hailort/libhailort/examples/cpp/async_infer_advanced_example/async_infer_advanced_example.cpp index e2a5e228..b5ed3df4 100644 --- a/hailort/libhailort/examples/cpp/async_infer_functionality_example/async_infer_functionality_example.cpp +++ b/hailort/libhailort/examples/cpp/async_infer_advanced_example/async_infer_advanced_example.cpp @@ -3,9 +3,10 @@ * Distributed under the MIT license (https://opensource.org/licenses/MIT) **/ /** - * @file async_infer_functionality_example.cpp - * This example demonstrates the Async Infer API usage with a specific model with multiple inputs and outputs + * @file async_infer_advanced_example.cpp + * This example demonstrates the Async Infer API usage with a specific model that has multi-planar input * and changes configutrations of the streams. + * Multiple infer jobs are triggered, and waiting for the last one ensures that all the rest will arrive as well. **/ #include "hailo/hailort.hpp" @@ -43,46 +44,67 @@ int main() return vdevice.status(); } - auto infer_model_exp = vdevice.value()->create_infer_model("hefs/multi_network_shortcut_net.hef"); + // Create infer model from HEF file. + auto infer_model_exp = vdevice.value()->create_infer_model("hefs/shortcut_net_nv12.hef"); if (!infer_model_exp) { std::cerr << "Failed to create infer model, status = " << infer_model_exp.status() << std::endl; return infer_model_exp.status(); } auto infer_model = infer_model_exp.release(); - infer_model->input("multi_network_shortcut_net_scope1/input_layer_0")->set_format_type(HAILO_FORMAT_TYPE_FLOAT32); - infer_model->output("multi_network_shortcut_net_scope1/shortcut0")->set_format_type(HAILO_FORMAT_TYPE_FLOAT32); - infer_model->input("multi_network_shortcut_net_scope2/input_layer_1")->set_format_type(HAILO_FORMAT_TYPE_FLOAT32); - infer_model->output("multi_network_shortcut_net_scope2/shortcut1")->set_format_type(HAILO_FORMAT_TYPE_FLOAT32); + infer_model->output()->set_format_type(HAILO_FORMAT_TYPE_FLOAT32); + // Configure the infer model auto configured_infer_model = infer_model->configure(); if (!configured_infer_model) { std::cerr << "Failed to create configured infer model, status = " << configured_infer_model.status() << std::endl; return configured_infer_model.status(); } - // We store buffers vector here as a guard for the memory. The buffer will be freed only after + // The buffers are stored here as a guard for the memory. The buffer will be freed only after // configured_infer_model will be released. std::vector> buffer_guards; + // Create infer bindings auto bindings = configured_infer_model->create_bindings(); if (!bindings) { std::cerr << "Failed to create infer bindings, status = " << bindings.status() << std::endl; return bindings.status(); } + // Set the input buffers of the bindings. for (const auto &input_name : infer_model->get_input_names()) { size_t input_frame_size = infer_model->input(input_name)->get_frame_size(); - auto input_buffer = page_aligned_alloc(input_frame_size); - auto status = bindings->input(input_name)->set_buffer(MemoryView(input_buffer.get(), input_frame_size)); + + // create pix_buffer + const auto Y_PLANE_SIZE = static_cast(input_frame_size * 2 / 3); + const auto UV_PLANE_SIZE = static_cast(input_frame_size * 1 / 3); + assert (Y_PLANE_SIZE + UV_PLANE_SIZE == input_frame_size); + auto y_plane_buffer = page_aligned_alloc(Y_PLANE_SIZE); + auto uv_plane_buffer = page_aligned_alloc(UV_PLANE_SIZE); + hailo_pix_buffer_t pix_buffer{}; + pix_buffer.memory_type = HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR; + pix_buffer.number_of_planes = 2; + // Y Plane + pix_buffer.planes[0].bytes_used = Y_PLANE_SIZE; + pix_buffer.planes[0].plane_size = Y_PLANE_SIZE; + pix_buffer.planes[0].user_ptr = reinterpret_cast(y_plane_buffer.get()); + // UV Plane + pix_buffer.planes[1].bytes_used = UV_PLANE_SIZE; + pix_buffer.planes[1].plane_size = UV_PLANE_SIZE; + pix_buffer.planes[1].user_ptr = reinterpret_cast(uv_plane_buffer.get()); + + auto status = bindings->input(input_name)->set_pix_buffer(pix_buffer); if (HAILO_SUCCESS != status) { std::cerr << "Failed to set infer input buffer, status = " << status << std::endl; return status; } - buffer_guards.push_back(input_buffer); + buffer_guards.push_back(y_plane_buffer); + buffer_guards.push_back(uv_plane_buffer); } + // Set the output buffers of the bindings. for (const auto &output_name : infer_model->get_output_names()) { size_t output_frame_size = infer_model->output(output_name)->get_frame_size(); auto output_buffer = page_aligned_alloc(output_frame_size); @@ -111,6 +133,7 @@ int main() std::cerr << "Failed to start async infer job, status = " << job.status() << std::endl; return job.status(); } + // detach() is called in order for jobs to run in parallel (and not one after the other) job->detach(); if (i == FRAMES_COUNT - 1) { @@ -124,6 +147,7 @@ int main() std::cerr << "Failed to wait for infer to finish, status = " << status << std::endl; return status; } - + + std::cout << "Inference finished successfully" << std::endl; return HAILO_SUCCESS; } diff --git a/hailort/libhailort/examples/cpp/async_infer_basic_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/async_infer_basic_example/CMakeLists.txt new file mode 100644 index 00000000..4ebb8599 --- /dev/null +++ b/hailort/libhailort/examples/cpp/async_infer_basic_example/CMakeLists.txt @@ -0,0 +1,16 @@ +cmake_minimum_required(VERSION 3.0.0) + +find_package(HailoRT 4.17.0 EXACT REQUIRED) + +add_executable(cpp_async_infer_basic_example async_infer_basic_example.cpp) +target_link_libraries(cpp_async_infer_basic_example PRIVATE HailoRT::libhailort) + +if(WIN32) + target_compile_options(cpp_async_infer_basic_example PRIVATE + /DWIN32_LEAN_AND_MEAN + /DNOMINMAX # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own) + /wd4201 /wd4251 + ) +endif() + +set_target_properties(cpp_async_infer_basic_example PROPERTIES CXX_STANDARD 14) diff --git a/hailort/libhailort/examples/cpp/async_infer_example/async_infer_example.cpp b/hailort/libhailort/examples/cpp/async_infer_basic_example/async_infer_basic_example.cpp similarity index 59% rename from hailort/libhailort/examples/cpp/async_infer_example/async_infer_example.cpp rename to hailort/libhailort/examples/cpp/async_infer_basic_example/async_infer_basic_example.cpp index 30c744de..ec78ac77 100644 --- a/hailort/libhailort/examples/cpp/async_infer_example/async_infer_example.cpp +++ b/hailort/libhailort/examples/cpp/async_infer_basic_example/async_infer_basic_example.cpp @@ -3,8 +3,8 @@ * Distributed under the MIT license (https://opensource.org/licenses/MIT) **/ /** - * @file async_infer_example.cpp - * This example demonstrates the Async Infer API usage and assumes the model has only one input and output. + * @file async_infer_basic_example.cpp + * This example demonstrates the Async Infer API usage with a specific model. **/ #include "hailo/hailort.hpp" @@ -42,6 +42,7 @@ int main() return vdevice.status(); } + // Create infer model from HEF file. auto infer_model_exp = vdevice.value()->create_infer_model(HEF_FILE); if (!infer_model_exp) { std::cerr << "Failed to create infer model, status = " << infer_model_exp.status() << std::endl; @@ -49,45 +50,60 @@ int main() } auto infer_model = infer_model_exp.release(); + // Configure the infer model auto configured_infer_model = infer_model->configure(); if (!configured_infer_model) { std::cerr << "Failed to create configured infer model, status = " << configured_infer_model.status() << std::endl; return configured_infer_model.status(); } + // The buffers are stored here as a guard for the memory. The buffer will be freed only after + // configured_infer_model will be released. + std::vector> buffer_guards; + auto bindings = configured_infer_model->create_bindings(); if (!bindings) { std::cerr << "Failed to create infer bindings, status = " << bindings.status() << std::endl; return bindings.status(); } - size_t input_frame_size = infer_model->input()->get_frame_size(); - auto input_buffer = page_aligned_alloc(input_frame_size); - auto status = bindings->input()->set_buffer(MemoryView(input_buffer.get(), input_frame_size)); - if (HAILO_SUCCESS != status) { - std::cerr << "Failed to set infer input buffer, status = " << status << std::endl; - return status; + for (const auto &input_name : infer_model->get_input_names()) { + size_t input_frame_size = infer_model->input(input_name)->get_frame_size(); + auto input_buffer = page_aligned_alloc(input_frame_size); + auto status = bindings->input(input_name)->set_buffer(MemoryView(input_buffer.get(), input_frame_size)); + if (HAILO_SUCCESS != status) { + std::cerr << "Failed to set infer input buffer, status = " << status << std::endl; + return status; + } + + buffer_guards.push_back(input_buffer); } - size_t output_frame_size = infer_model->output()->get_frame_size(); - auto output_buffer = page_aligned_alloc(output_frame_size); - status = bindings->output()->set_buffer(MemoryView(output_buffer.get(), output_frame_size)); - if (HAILO_SUCCESS != status) { - std::cerr << "Failed to set infer input buffer, status = " << status << std::endl; - return status; + for (const auto &output_name : infer_model->get_output_names()) { + size_t output_frame_size = infer_model->output(output_name)->get_frame_size(); + auto output_buffer = page_aligned_alloc(output_frame_size); + auto status = bindings->output(output_name)->set_buffer(MemoryView(output_buffer.get(), output_frame_size)); + if (HAILO_SUCCESS != status) { + std::cerr << "Failed to set infer output buffer, status = " << status << std::endl; + return status; + } + + buffer_guards.push_back(output_buffer); } + // Run the async infer job. auto job = configured_infer_model->run_async(bindings.value()); if (!job) { std::cerr << "Failed to start async infer job, status = " << job.status() << std::endl; return job.status(); } - status = job->wait(std::chrono::milliseconds(1000)); + auto status = job->wait(std::chrono::milliseconds(1000)); if (HAILO_SUCCESS != status) { std::cerr << "Failed to wait for infer to finish, status = " << status << std::endl; return status; } - + + std::cout << "Inference finished successfully" << std::endl; return HAILO_SUCCESS; } diff --git a/hailort/libhailort/examples/cpp/async_infer_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/async_infer_example/CMakeLists.txt deleted file mode 100644 index 650edb46..00000000 --- a/hailort/libhailort/examples/cpp/async_infer_example/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -cmake_minimum_required(VERSION 3.0.0) - -find_package(HailoRT 4.16.2 EXACT REQUIRED) - -add_executable(cpp_async_infer_example async_infer_example.cpp) -target_link_libraries(cpp_async_infer_example PRIVATE HailoRT::libhailort) - -if(WIN32) - target_compile_options(cpp_async_infer_example PRIVATE - /DWIN32_LEAN_AND_MEAN - /DNOMINMAX # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own) - /wd4201 /wd4251 - ) -endif() - -set_target_properties(cpp_async_infer_example PROPERTIES CXX_STANDARD 14) diff --git a/hailort/libhailort/examples/cpp/async_infer_functionality_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/async_infer_functionality_example/CMakeLists.txt deleted file mode 100644 index 06d4e340..00000000 --- a/hailort/libhailort/examples/cpp/async_infer_functionality_example/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -cmake_minimum_required(VERSION 3.0.0) - -find_package(HailoRT 4.16.2 EXACT REQUIRED) - -add_executable(cpp_async_infer_functionality_example async_infer_functionality_example.cpp) -target_link_libraries(cpp_async_infer_functionality_example PRIVATE HailoRT::libhailort) - -if(WIN32) - target_compile_options(cpp_async_infer_functionality_example PRIVATE - /DWIN32_LEAN_AND_MEAN - /DNOMINMAX # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own) - /wd4201 /wd4251 - ) -endif() - -set_target_properties(cpp_async_infer_functionality_example PROPERTIES CXX_STANDARD 14) diff --git a/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt index eccb90be..9a55ec16 100644 --- a/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.0.0) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) add_executable(cpp_infer_pipeline_example infer_pipeline_example.cpp) target_link_libraries(cpp_infer_pipeline_example PRIVATE HailoRT::libhailort) diff --git a/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt index 6913af9d..c5d1a80f 100644 --- a/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) add_executable(cpp_multi_device_example multi_device_example.cpp) target_link_libraries(cpp_multi_device_example PRIVATE HailoRT::libhailort Threads::Threads) diff --git a/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt index 6f2ccbcf..a3b00bc7 100644 --- a/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) add_executable(cpp_multi_network_vstream_example multi_network_vstream_example.cpp) target_link_libraries(cpp_multi_network_vstream_example PRIVATE HailoRT::libhailort Threads::Threads) diff --git a/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt index 433c6c4c..dac78781 100644 --- a/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) add_executable(cpp_multi_process_example multi_process_example.cpp) target_link_libraries(cpp_multi_process_example PRIVATE HailoRT::libhailort Threads::Threads) diff --git a/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.ps1 b/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.ps1 index 884f7915..158f7741 100644 --- a/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.ps1 +++ b/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.ps1 @@ -5,7 +5,7 @@ Param( ) $max_processes_count = 8 -$first_hef="hefs\multi_network_shortcut_net.hef" +$first_hef="hefs\shortcut_net_nv12.hef" $second_hef="hefs\shortcut_net.hef" $executable_base_name="cpp_multi_process_example" $executable_name="$executable_base_name.exe" diff --git a/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.sh b/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.sh index 7b7e6fd9..38933711 100755 --- a/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.sh +++ b/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.sh @@ -1,6 +1,6 @@ #!/bin/bash -readonly first_hef="hefs/multi_network_shortcut_net.hef" +readonly first_hef="hefs/shortcut_net_nv12.hef" readonly second_hef="hefs/shortcut_net.hef" readonly max_processes_count=8 readonly default_processes_count=1 diff --git a/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt index 8929fafb..b95cda57 100644 --- a/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.0.0) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) add_executable(cpp_notification_callback_example notification_callback_example.cpp) target_link_libraries(cpp_notification_callback_example PRIVATE HailoRT::libhailort) diff --git a/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt index 83f4dd5c..837e7c56 100644 --- a/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.0.0) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) add_executable(cpp_power_measurement_example power_measurement_example.cpp) target_link_libraries(cpp_power_measurement_example PRIVATE HailoRT::libhailort) diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt index 1b03be65..9b30a355 100644 --- a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) add_executable(cpp_raw_async_streams_multi_thread_example raw_async_streams_multi_thread_example.cpp) target_link_libraries(cpp_raw_async_streams_multi_thread_example PRIVATE HailoRT::libhailort Threads::Threads) diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp index cbc99fad..c48cb153 100644 --- a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp +++ b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp @@ -71,8 +71,8 @@ Expected> configure_network_group(Device static void output_async_callback(const OutputStream::CompletionInfo &completion_info) { // Real applications can free the buffer or forward it to post-process/display. - if ((HAILO_SUCCESS != completion_info.status) && (HAILO_STREAM_ABORTED_BY_USER != completion_info.status)) { - // We will get HAILO_STREAM_ABORTED_BY_USER when activated_network_group is destructed. + if ((HAILO_SUCCESS != completion_info.status) && (HAILO_STREAM_ABORT != completion_info.status)) { + // We will get HAILO_STREAM_ABORT when activated_network_group is destructed. std::cerr << "Got an unexpected status on callback. status=" << completion_info.status << std::endl; } } @@ -80,13 +80,13 @@ static void output_async_callback(const OutputStream::CompletionInfo &completion static void input_async_callback(const InputStream::CompletionInfo &completion_info) { // Real applications can free the buffer or reuse it for next transfer. - if ((HAILO_SUCCESS != completion_info.status) && (HAILO_STREAM_ABORTED_BY_USER != completion_info.status)) { - // We will get HAILO_STREAM_ABORTED_BY_USER when activated_network_group is destructed. + if ((HAILO_SUCCESS != completion_info.status) && (HAILO_STREAM_ABORT != completion_info.status)) { + // We will get HAILO_STREAM_ABORT when activated_network_group is destructed. std::cerr << "Got an unexpected status on callback. status=" << completion_info.status << std::endl; } } -static hailo_status infer(ConfiguredNetworkGroup &network_group) +static hailo_status infer(Device &device, ConfiguredNetworkGroup &network_group) { // Assume one input and output auto &output = network_group.get_output_streams()[0].get(); @@ -101,6 +101,16 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group) auto output_buffer = page_aligned_alloc(output.get_frame_size()); auto input_buffer = page_aligned_alloc(input.get_frame_size()); + // If the same buffer is used multiple times on async-io, to improve performance, it is recommended to pre-map it + // into the device. The DmaMappedBuffer object manages the mapping, and it'll be unmapped when it is destroyed. + // Notice that the buffer must be alive as long as the mapping is alive, so we define it after the buffers. + auto output_mapping = DmaMappedBuffer::create(device, output_buffer.get(), output.get_frame_size(), HAILO_DMA_BUFFER_DIRECTION_D2H); + auto input_mapping = DmaMappedBuffer::create(device, input_buffer.get(), input.get_frame_size(), HAILO_DMA_BUFFER_DIRECTION_H2D); + if (!output_mapping || !input_mapping) { + std::cerr << "Failed to map buffer with status=" << input_mapping.status() << ", " << output_mapping.status() << std::endl; + return HAILO_INTERNAL_FAILURE; + } + std::atomic output_status(HAILO_UNINITIALIZED); std::thread output_thread([&]() { while (true) { @@ -127,16 +137,16 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group) std::this_thread::sleep_for(std::chrono::seconds(5)); // Calling shutdown on a network group will ensure that all async operations are done. All pending - // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORTED_BY_USER. + // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORT. // Only after the shutdown is called, we can safely free the buffers and any variable captured inside the async // callback lambda. network_group.shutdown(); - // Thread should be stopped with HAILO_STREAM_ABORTED_BY_USER status. + // Thread should be stopped with HAILO_STREAM_ABORT status. output_thread.join(); input_thread.join(); - if ((HAILO_STREAM_ABORTED_BY_USER != output_status) || (HAILO_STREAM_ABORTED_BY_USER != input_status)) { + if ((HAILO_STREAM_ABORT != output_status) || (HAILO_STREAM_ABORT != input_status)) { std::cerr << "Got unexpected statues from thread: " << output_status << ", " << input_status << std::endl; return HAILO_INTERNAL_FAILURE; } @@ -165,7 +175,7 @@ int main() return EXIT_FAILURE; } - auto status = infer(*network_group.value()); + auto status = infer(*device.value(), *network_group.value()); if (HAILO_SUCCESS != status) { return EXIT_FAILURE; } diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt index cf7e24d5..c132b53c 100644 --- a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) add_executable(cpp_raw_async_streams_single_thread_example raw_async_streams_single_thread_example.cpp) target_link_libraries(cpp_raw_async_streams_single_thread_example PRIVATE HailoRT::libhailort Threads::Threads) diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp index e402a8dd..d86f9f72 100644 --- a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp +++ b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp @@ -61,11 +61,11 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group) case HAILO_SUCCESS: // Real applications can forward the buffer to post-process/display. Here we just re-launch new async read. status = output.read_async(completion_info.buffer_addr, completion_info.buffer_size, read_done); - if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) { + if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORT != status)) { std::cerr << "Failed read async with status=" << status << std::endl; } break; - case HAILO_STREAM_ABORTED_BY_USER: + case HAILO_STREAM_ABORT: // Transfer was canceled, finish gracefully. break; default: @@ -80,11 +80,11 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group) // Real applications may free the buffer and replace it with new buffer ready to be sent. Here we just // re-launch new async write. status = input.write_async(completion_info.buffer_addr, completion_info.buffer_size, write_done); - if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) { + if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORT != status)) { std::cerr << "Failed read async with status=" << status << std::endl; } break; - case HAILO_STREAM_ABORTED_BY_USER: + case HAILO_STREAM_ABORT: // Transfer was canceled, finish gracefully. break; default: @@ -121,7 +121,7 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group) std::this_thread::sleep_for(std::chrono::seconds(5)); // Calling shutdown on a network group will ensure that all async operations are done. All pending - // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORTED_BY_USER. + // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORT. // Only after the shutdown is called, we can safely free the buffers and any variable captured inside the async // callback lambda. network_group.shutdown(); diff --git a/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt index 709110a7..6cf42fd0 100644 --- a/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) add_executable(cpp_raw_streams_example raw_streams_example.cpp) target_link_libraries(cpp_raw_streams_example PRIVATE HailoRT::libhailort Threads::Threads) diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt index 1b3a6895..115feac1 100644 --- a/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) add_executable(cpp_switch_network_groups_example switch_network_groups_example.cpp) target_link_libraries(cpp_switch_network_groups_example PRIVATE HailoRT::libhailort Threads::Threads) diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt index 8b5c1f22..9f4ac2bc 100644 --- a/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) add_executable(cpp_switch_network_groups_manually_example switch_network_groups_manually_example.cpp) target_link_libraries(cpp_switch_network_groups_manually_example PRIVATE HailoRT::libhailort Threads::Threads) diff --git a/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt index f0a8ad08..fddc3b99 100644 --- a/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -find_package(HailoRT 4.16.2 EXACT REQUIRED) +find_package(HailoRT 4.17.0 EXACT REQUIRED) add_executable(cpp_vstreams_example vstreams_example.cpp) target_link_libraries(cpp_vstreams_example PRIVATE HailoRT::libhailort Threads::Threads) diff --git a/hailort/libhailort/hef.proto b/hailort/libhailort/hef.proto index 52504937..59625f86 100644 --- a/hailort/libhailort/hef.proto +++ b/hailort/libhailort/hef.proto @@ -51,6 +51,8 @@ enum ProtoHEFExtensionType { OUTPUT_SCALE_PER_FEATURE = 25; PERIPH_CALCULATION_IN_HAILORT = 26; HAILO_NET_FLOW_YOLOV8_NMS = 27; + BATCH_REGISTER_CONFIG = 28; + HAILO_NET_FLOW_BBOX_DECODING = 29; UNUSED = 0XFFFF; } @@ -78,7 +80,7 @@ message ProtoHEFHeader { uint64 version = 4; } -// Enum describing the different possible hw_archs +// Enum describing the different possible hw_archs. Must be aligned to device_internal::HEFHwArch enum ProtoHEFHwArch { PROTO__HW_ARCH__HAILO8 = 0; PROTO__HW_ARCH__HAILO8P = 1; @@ -291,6 +293,9 @@ message ProtoHEFNmsOp { // Index of background class for background removal uint32 background_removal_index = 6; + // Whether the op contains bbox decoding only + bool bbox_decoding_only = 13; + // Additional information needed for specific NMS types oneof nms_op { ProtoHEFYoloNmsOp yolo_nms_op = 7; // YOLOv5 post process @@ -574,6 +579,7 @@ message ProtoHEFAction { ProtoHEFActionDebugSleep debug_sleep = 12; ProtoHEFActionEnableNMS enable_nms = 13; ProtoHEFActionWriteDataByType write_data_by_type = 14; + ProtoHEFActionSwitchLcuBatch switch_lcu_batch = 15; } } @@ -690,7 +696,7 @@ message ProtoHEFActionEnableLcu { // Address at lcu to mark as complete apon reach (after lcu_kernel_done_count times) uint32 lcu_kernel_done_address = 3; - // Amount of times lcu_kernel_done_addess should be reached before marking is done + // Amount of times lcu_kernel_done_addess should be visited before done uint32 lcu_kernel_done_count = 4; // Address to indicate where the FW should write to @@ -701,6 +707,18 @@ message ProtoHEFActionEnableLcu { uint32 network_index = 6; } +message ProtoHEFActionSwitchLcuBatch { + // Index of the lcu + uint32 lcu_index = 1; + + // Index of the cluster of the lcu + uint32 cluster_index = 2; + + // network index index- name given by networks_names + // in ProtoHEFNetworkGroup + uint32 network_index = 6; +} + message ProtoHEFActionEnableNMS { // Index of the nms unit uint32 nms_unit_index = 1; diff --git a/hailort/libhailort/include/hailo/buffer.hpp b/hailort/libhailort/include/hailo/buffer.hpp index 1b647f95..08c6b136 100644 --- a/hailort/libhailort/include/hailo/buffer.hpp +++ b/hailort/libhailort/include/hailo/buffer.hpp @@ -11,7 +11,6 @@ #define _HAILO_BUFFER_HPP_ #include "hailo/expected.hpp" -#include "hailo/buffer_storage.hpp" #include #include @@ -24,9 +23,25 @@ namespace hailort { +class BufferStorage; +using BufferStoragePtr = std::shared_ptr; + class Buffer; using BufferPtr = std::shared_ptr; + +/*! Buffer storage parameters. Analogical to hailo_buffer_parameters_t */ +struct HAILORTAPI BufferStorageParams +{ +public: + + static BufferStorageParams create_dma(); + // Defaults to heap params + BufferStorageParams(); + + hailo_buffer_flags_t flags; +}; + class HAILORTAPI Buffer final { public: @@ -50,9 +65,7 @@ class HAILORTAPI Buffer final // Empty buffer (points to null, size is zero) Buffer(); - // Buffer backed by the storage param - Buffer(BufferStoragePtr storage); - ~Buffer() = default; + ~Buffer(); Buffer(const Buffer& other) = delete; Buffer& operator=(const Buffer& other) = delete; @@ -156,9 +169,20 @@ class HAILORTAPI Buffer final uint32_t& as_uint32(); uint64_t& as_uint64(); + // Returns the pointer managed by this object and releases ownership + Expected release() noexcept; + + // Internal functions + static Expected create(BufferStoragePtr storage, bool register_storage = true); + private: + class StorageImpl; + + // Buffer backed by the storage param + Buffer(std::unique_ptr storage); + // Initialization dependency - BufferStoragePtr m_storage; + std::unique_ptr m_storage_impl; uint8_t *m_data; size_t m_size; }; diff --git a/hailort/libhailort/include/hailo/buffer_storage.hpp b/hailort/libhailort/include/hailo/buffer_storage.hpp deleted file mode 100644 index ce227e48..00000000 --- a/hailort/libhailort/include/hailo/buffer_storage.hpp +++ /dev/null @@ -1,285 +0,0 @@ -/** - * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) -**/ -/** - * @file buffer_storage.hpp - * @brief TODO: fill me (HRT-10026) - **/ - -#ifndef _HAILO_BUFFER_STORAGE_HPP_ -#define _HAILO_BUFFER_STORAGE_HPP_ - -#include "hailo/hailort.h" -#include "hailo/expected.hpp" - -#include -#include -#include -#include -#include -#include - - -/** hailort namespace */ -namespace hailort -{ - -// Forward declarations -class Device; -class VDevice; -class VdmaDevice; -class BufferStorage; -class HeapStorage; -class DmaStorage; -class UserBufferStorage; -class HailoRTDriver; -class Buffer; - -namespace vdma { - class DmaAbleBuffer; - using DmaAbleBufferPtr = std::shared_ptr; - - class MappedBuffer; - using MappedBufferPtr = std::shared_ptr; -} - - -/*! Buffer storage parameters. Analogical to hailo_buffer_parameters_t */ -struct HAILORTAPI BufferStorageParams -{ -public: - struct HeapParams - { - public: - HeapParams(); - }; - - struct DmaMappingParams - { - public: - static Expected create(const hailo_buffer_dma_mapping_params_t ¶ms); - // DmaMappingParams for a buffer to be mapped to device - DmaMappingParams(Device &device, hailo_dma_buffer_direction_t data_direction); - // DmaMappingParams for a buffer to be mapped to all the underlying devices held by vdevice - DmaMappingParams(VDevice &vdevice, hailo_dma_buffer_direction_t data_direction); - // DmaMappingParams for a buffer to be lazily mapped upon it's first async transfer to a given device - DmaMappingParams(); - - // Note: We hold a pointer to a Device/VDevice/neither, since DmaMappingParams support mapping to - // a device, vdevice or lazy mapping - Device *device; - VDevice *vdevice; - hailo_dma_buffer_direction_t data_direction; - - private: - DmaMappingParams(const hailo_buffer_dma_mapping_params_t ¶ms); - }; - - static Expected create(const hailo_buffer_parameters_t ¶ms); - // Dma buffer params for lazy mapping - static BufferStorageParams create_dma(); - // Dma buffer params for mapping to device in data_direction - static BufferStorageParams create_dma(Device &device, hailo_dma_buffer_direction_t data_direction); - // Dma buffer params for mapping to vdevice in data_direction - static BufferStorageParams create_dma(VDevice &vdevice, hailo_dma_buffer_direction_t data_direction); - - // Defaults to heap params - BufferStorageParams(); - - hailo_buffer_flags_t flags; - union { - HeapParams heap_params; - DmaMappingParams dma_mapping_params; - }; -}; - -using BufferStoragePtr = std::shared_ptr; - -class HAILORTAPI BufferStorage -{ -public: - enum class Type { - HEAP, - DMA, - USER_BUFFER - }; - - static Expected create(size_t size, const BufferStorageParams ¶ms); - - BufferStorage(BufferStorage&& other) noexcept = default; - BufferStorage(const BufferStorage &) = delete; - BufferStorage &operator=(BufferStorage &&) = delete; - BufferStorage &operator=(const BufferStorage &) = delete; - virtual ~BufferStorage() = default; - - Type type() const; - virtual size_t size() const = 0; - virtual void *user_address() = 0; - // Returns the pointer managed by this object and releases ownership - // TODO: Add a free function pointer? (HRT-10024) - // // Free the returned pointer with `delete` - // TODO: after release the containing buffer will hold pointers to values that were released. - // Document that this can happen? Disable this behavior somehow? (HRT-10024) - virtual Expected release() noexcept = 0; - // Maps the storage to device in data_direction. - // - If the mapping is new - true is returned. - // - If the mapping already exists - false is returned. - // - Otherwise - Unexpected with a failure status is returned. - // Note: This buffer storage must be destroyed before the device it is mapped to is destroyed! - // Failing to do so will lead to unexpected results - // TODO: resolve this issue (HRT-12361) - virtual Expected dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) = 0; - virtual Expected dma_map(VdmaDevice &device, hailo_dma_buffer_direction_t data_direction) = 0; - - // Internal functions - virtual Expected get_dma_mapped_buffer(const std::string &device_id) = 0; - -protected: - explicit BufferStorage(Type type); - - const Type m_type; -}; - -using HeapStoragePtr = std::shared_ptr; - -class HAILORTAPI HeapStorage : public BufferStorage -{ -public: - static Expected create(size_t size); - HeapStorage(std::unique_ptr data, size_t size); - HeapStorage(HeapStorage&& other) noexcept; - HeapStorage(const HeapStorage &) = delete; - HeapStorage &operator=(HeapStorage &&) = delete; - HeapStorage &operator=(const HeapStorage &) = delete; - virtual ~HeapStorage() = default; - - virtual size_t size() const override; - virtual void *user_address() override; - virtual Expected release() noexcept override; - virtual Expected dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) override; - virtual Expected dma_map(VdmaDevice &device, hailo_dma_buffer_direction_t data_direction) override; - - // Internal functions - virtual Expected get_dma_mapped_buffer(const std::string &device_id) override; - -private: - std::unique_ptr m_data; - size_t m_size; -}; - -// ************************************* NOTE - START ************************************* // -// DmaStorage isn't currently supported and is for internal use only // -// **************************************************************************************** // -using DmaStoragePtr = std::shared_ptr; - -// TODO: HRT-10026 doc this -class HAILORTAPI DmaStorage : public BufferStorage -{ -public: - // Creates a DmaStorage instance holding a dma-able buffer size bytes large. - // The buffer isn't mapped to dma until dma_map is called. - static Expected create(size_t size); - // Creates a DmaStorage instance holding a dma-able buffer size bytes large. - // The buffer is mapped to device in data_direction. - static Expected create(size_t size, - hailo_dma_buffer_direction_t data_direction, Device &device); - // Creates a DmaStorage instance holding a dma-able buffer size bytes large. - // The buffer is mapped to vdevice.get_physical_devices() in data_direction. - static Expected create(size_t size, - hailo_dma_buffer_direction_t data_direction, VDevice &vdevice); - - // TODO: doc that the addr needs to be on a new page and aligned to 64B (HRT-9559) - // probably best just to call mmap - // Creates a DmaStorage instance backed by the size bytes large buffer pointed to by user_address. - // The buffer isn't mapped to dma until dma_map is called. - static Expected create_from_user_address(void *user_address, size_t size); - // Creates a DmaStorage instance backed by the size bytes large buffer pointed to by user_address. - // The buffer is mapped to device in data_direction. - static Expected create_from_user_address(void *user_address, size_t size, - hailo_dma_buffer_direction_t data_direction, Device &device); - // Creates a DmaStorage instance backed by the size bytes large buffer pointed to by user_address. - // The buffer is mapped to vdevice.get_physical_devices() in data_direction. - static Expected create_from_user_address(void *user_address, size_t size, - hailo_dma_buffer_direction_t data_direction, VDevice &device); - // Creates a DMA-able buffer from given user buffer at address given of size length if possible, - // Otherwise allocates new one length of size - static Expected> create_dma_able_buffer_from_user_size(void *addr, size_t size); - - DmaStorage(const DmaStorage &other) = delete; - DmaStorage &operator=(const DmaStorage &other) = delete; - DmaStorage(DmaStorage &&other) noexcept = default; - DmaStorage &operator=(DmaStorage &&other) = delete; - virtual ~DmaStorage(); - - virtual size_t size() const override; - virtual void *user_address() override; - virtual Expected release() noexcept override; - // TODO: thread safety (HRT-10669) - virtual Expected dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) override; - virtual Expected dma_map(VdmaDevice &device, hailo_dma_buffer_direction_t data_direction) override; - - // Internal functions - DmaStorage(vdma::DmaAbleBufferPtr &&dma_able_buffer); - virtual Expected get_dma_mapped_buffer(const std::string &device_id) override; - -private: - // - Creates a backing DmaAbleBuffer: - // - If user_address is null, it'll be allocated by hailort - // - Otherwise, it'll be a non owning wrapper of the user's buffer - // - The said buffer is mapped physical_devices in data_direction. - // - By default (if physical_devices is empty), no mapping will occur - static Expected create(void *user_address, size_t size, - hailo_dma_buffer_direction_t data_direction = HAILO_DMA_BUFFER_DIRECTION_MAX_ENUM, - std::vector> &&physical_devices = {}); - - // Initialization dependency - vdma::DmaAbleBufferPtr m_dma_able_buffer; - // For each device (key is device_id), we store some vdma mapping. - // TODO: use (device_id, direction) as key or have two dicts (HRT-10656) - using UnmappingCallback = std::function; - std::unordered_map> m_mappings; -}; - - -using UserBufferStoragePtr = std::shared_ptr; -class HAILORTAPI UserBufferStorage : public BufferStorage -{ -public: - static Expected create(void *user_address, const size_t size); - - UserBufferStorage(void *user_address, const size_t size); - UserBufferStorage(const UserBufferStorage &other) = delete; - UserBufferStorage &operator=(const UserBufferStorage &other) = delete; - UserBufferStorage(UserBufferStorage &&other) noexcept = default; - UserBufferStorage &operator=(UserBufferStorage &&other) = delete; - virtual ~UserBufferStorage() = default; - - virtual size_t size() const override; - virtual void *user_address() override; - virtual Expected release() noexcept override; - virtual Expected dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) override; - virtual Expected dma_map(VdmaDevice &device, hailo_dma_buffer_direction_t data_direction) override; - - // Internal functions - virtual Expected get_dma_mapped_buffer(const std::string &device_id) override; - - // Craete storage for user buffer to store mappings. Used internally not by the user. - static Expected> create_storage_from_user_buffer(void *addr, size_t size); - -private: - - void * m_user_address; - const size_t m_size; - - using UnmappingCallback = std::function; - std::unordered_map> m_mappings; -}; - -// ************************************** NOTE - END ************************************** // -// DmaStorage isn't currently supported and is for internal use only // -// **************************************************************************************** // - -} /* namespace hailort */ - -#endif /* _HAILO_BUFFER_STORAGE_HPP_ */ diff --git a/hailort/libhailort/include/hailo/device.hpp b/hailort/libhailort/include/hailo/device.hpp index 11097919..88f0959f 100644 --- a/hailort/libhailort/include/hailo/device.hpp +++ b/hailort/libhailort/include/hailo/device.hpp @@ -34,14 +34,6 @@ namespace hailort class Device; using NotificationCallback = std::function; -namespace vdma { - class DmaAbleBuffer; - using DmaAbleBufferPtr = std::shared_ptr; - - class MappedBuffer; - using MappedBufferPtr = std::shared_ptr; -} - /** @} */ // end of group_type_definitions /*! Represents the Hailo device (chip). */ @@ -700,42 +692,44 @@ class HAILORTAPI Device */ virtual bool is_stream_interface_supported(const hailo_stream_interface_t &stream_interface) const = 0; - // TODO: Also link to async infer - ConfiguredInferModel, Bindings etc. Just like we did for - // InputStream::write_async and OutputStream::read_async (HRT-11039) /** * Maps the buffer pointed to by @a address for DMA transfers to/from this device, in the specified - * @a direction. - * DMA mapping of buffers in advance may improve the performance of `InputStream::write_async()` or - * `OutputStream::read_async()`. This improvement will be realized if the buffer is reused multiple times - * across different async operations. - * - For buffers that will be written to the device via `InputStream::write_async()`, use `HAILO_H2D_STREAM` - * for the @a direction parameter. - * - For buffers that will be read from the device via `OutputStream::read_async()`, use `HAILO_D2H_STREAM` - * for the @a direction parameter. - * - * @param[in] address The address of the buffer to be mapped - * @param[in] size The buffer's size in bytes - * @param[in] direction The direction of the mapping + * @a data_direction. + * DMA mapping of buffers in advance may improve the performance of async API. This improvement will become + * apparent when the buffer is reused multiple times across different async operations. + * + * For high level API (aka InferModel), buffers bound using ConfiguredInferModel::Bindings::InferStream::set_buffer + * can be mapped. + * + * For low level API (aka InputStream/OutputStream), buffers passed to InputStream::write_async and + * OutputStream::read_async can be mapped. + * + * @param[in] address The address of the buffer to be mapped. + * @param[in] size The buffer's size in bytes. + * @param[in] direction The direction of the mapping. For input streams, use `HAILO_DMA_BUFFER_DIRECTION_H2D` + * and for output streams, use `HAILO_DMA_BUFFER_DIRECTION_D2H`. + * * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. - * @note The DMA mapping will be freed upon calling dma_unmap() with @a address and @a direction, or when the - * @a Device object is destroyed. - * @note The buffer pointed to by @a address cannot be freed until it is unmapped (via dma_unmap() or @a Device + * + * @note The DMA mapping will be released upon calling dma_unmap() with @a address, @a size and @a data_direction, or + * when the @a VDevice object is destroyed. + * @note The buffer pointed to by @a address cannot be released until it is unmapped (via dma_unmap() or @a Device * destruction). */ - virtual hailo_status dma_map(void *address, size_t size, hailo_stream_direction_t direction); + virtual hailo_status dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction); /** * Un-maps a buffer buffer pointed to by @a address for DMA transfers to/from this device, in the direction * @a direction. * - * @param[in] address The address of the buffer to be un-mapped - * @param[in] direction The direction of the mapping + * @param[in] address The address of the buffer to be un-mapped. + * @param[in] size The buffer's size in bytes. + * @param[in] direction The direction of the mapping. + * * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. */ - virtual hailo_status dma_unmap(void *address, hailo_stream_direction_t direction); + virtual hailo_status dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction); - virtual Expected> try_dma_map(vdma::DmaAbleBufferPtr buffer, - hailo_stream_direction_t direction); virtual hailo_status direct_write_memory(uint32_t address, const void *buffer, uint32_t size); virtual hailo_status direct_read_memory(uint32_t address, void *buffer, uint32_t size); hailo_status set_overcurrent_state(bool should_activate); @@ -745,12 +739,12 @@ class HAILORTAPI Device // The sum of the number of contexts will fit in uint8_t Expected> get_number_of_dynamic_contexts_per_network_group(); Expected download_context_action_list(uint32_t network_group_id, uint8_t context_type, - uint8_t context_index, uint32_t *base_address, uint32_t *batch_counter, uint16_t max_size = 10000); + uint16_t context_index, uint32_t *base_address, uint32_t *batch_counter, uint16_t max_size = 10000); // The batch configured is reset between network groups hailo_status set_context_action_list_timestamp_batch(uint16_t batch_index); hailo_status set_context_switch_breakpoint(uint8_t breakpoint_id, bool break_at_any_network_group_index, uint8_t network_group_index, bool break_at_any_batch_index, uint16_t batch_index, bool break_at_any_context_index, - uint8_t context_index, bool break_at_any_action_index, uint16_t action_index); + uint16_t context_index, bool break_at_any_action_index, uint16_t action_index); hailo_status continue_context_switch_breakpoint(uint8_t breakpoint_id); hailo_status clear_context_switch_breakpoint(uint8_t breakpoint_id); Expected get_context_switch_breakpoint_status(uint8_t breakpoint_id); diff --git a/hailort/libhailort/include/hailo/dma_mapped_buffer.hpp b/hailort/libhailort/include/hailo/dma_mapped_buffer.hpp new file mode 100644 index 00000000..3eb8c69c --- /dev/null +++ b/hailort/libhailort/include/hailo/dma_mapped_buffer.hpp @@ -0,0 +1,95 @@ +/** + * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file dma_mapped_buffer.hpp + * @brief Object that keeps DMA mapping to some device/vdevice alive. + **/ + +#ifndef _HAILO_DMA_MAPPED_BUFFER_HPP_ +#define _HAILO_DMA_MAPPED_BUFFER_HPP_ + +#include "hailo/hailort.h" +#include "hailo/vdevice.hpp" +#include "hailo/device.hpp" +#include "hailo/expected.hpp" + +namespace hailort +{ + +/*! + * \class DmaMappedBuffer + * \brief A wrapper class for mapping and unmapping buffers using VDevice::dma_map and VDevice::dma_unmap (or their + * variants for Device). + * + * The DmaMappedBuffer class provides a convenient way to keep a DMA mapping on a buffer active. + * It encapsulates the functionality of mapping and unmapping buffers using VDevice::dma_map and + * VDevice::dma_unmap, as well as their variants for Device. + * + * \note The buffer pointed to by address cannot be released until this object is destroyed. + * + * Example: + * \code{.cpp} + * // Create a DmaMappedBuffer object for a VDevice + * void* user_address = ...; + * size_t size = ...; + * hailo_dma_buffer_direction_t direction = ...; + * Expected mapped_buffer = DmaMappedBuffer::create(vdevice, user_address, size, direction); + * if (!mapped_buffer.has_value()) { + * // Handle error + * } else { + * // Use the mapped buffer + * } + * \endcode + */ +class HAILORTAPI DmaMappedBuffer final { +public: + /** + * Creates a DmaMappedBuffer object for a VDevice. + * + * @param vdevice The VDevice object to use for mapping the buffer. + * @param user_address The user address of the buffer to be mapped. + * @param size The size of the buffer to be mapped. + * @param direction The direction of the DMA transfer. + * + * @return An Expected object containing the created DmaMappedBuffer on success, or an error on failure. + */ + static Expected create(VDevice &vdevice, void *user_address, size_t size, + hailo_dma_buffer_direction_t direction); + + /** + * Creates a DmaMappedBuffer object for a Device. + * + * @param device The Device object to use for mapping the buffer. + * @param user_address The user address of the buffer to be mapped. + * @param size The size of the buffer to be mapped. + * @param direction The direction of the DMA transfer. + * + * @return An Expected object containing the created DmaMappedBuffer on success, or an error on failure. + */ + static Expected create(Device &device, void *user_address, size_t size, + hailo_dma_buffer_direction_t direction); + + /** + * The destructor automatically unmaps the buffer. + */ + ~DmaMappedBuffer(); + + DmaMappedBuffer(const DmaMappedBuffer &) = delete; + DmaMappedBuffer &operator=(const DmaMappedBuffer &) = delete; + + + DmaMappedBuffer(DmaMappedBuffer &&other); + DmaMappedBuffer &operator=(DmaMappedBuffer &&other); + +private: + class Impl; + explicit DmaMappedBuffer(std::unique_ptr impl); + + std::unique_ptr m_impl; +}; + +} /* namespace hailort */ + +#endif /* _HAILO_DMA_MAPPED_BUFFER_HPP_ */ diff --git a/hailort/libhailort/include/hailo/event.hpp b/hailort/libhailort/include/hailo/event.hpp index 1dc82611..c013ea78 100644 --- a/hailort/libhailort/include/hailo/event.hpp +++ b/hailort/libhailort/include/hailo/event.hpp @@ -129,7 +129,7 @@ class HAILORTAPI Semaphore : public Waitable using Waitable::Waitable; static Expected create(uint32_t initial_count); - static SemaphorePtr create_shared(uint32_t initial_count); + static Expected create_shared(uint32_t initial_count); virtual hailo_status signal() override; virtual bool is_auto_reset() override; diff --git a/hailort/libhailort/include/hailo/expected.hpp b/hailort/libhailort/include/hailo/expected.hpp index d911539d..13dcaff2 100644 --- a/hailort/libhailort/include/hailo/expected.hpp +++ b/hailort/libhailort/include/hailo/expected.hpp @@ -186,6 +186,8 @@ class Unexpected final m_status(status) {} + operator hailo_status() { return m_status; } + hailo_status m_status; }; @@ -217,6 +219,17 @@ class Expected final template friend class Expected; + /** + * Construct a new Expected from an Unexpected status. + * + * NOTE: Asserting that status is not HAILO_SUCCESS if NDEBUG is not defined. + */ + Expected(Unexpected unexpected) : + m_status(unexpected.m_status) + { + assert(unexpected.m_status != HAILO_SUCCESS); + } + /** * Default constructor * @@ -335,17 +348,6 @@ class Expected final m_status(HAILO_SUCCESS) {} - /** - * Construct a new Expected from an Unexpected status. - * - * NOTE: Asserting that status is not HAILO_SUCCESS if NDEBUG is not defined. - */ - Expected(const Unexpected &unexpected) : - m_status(unexpected.m_status) - { - assert(unexpected.m_status != HAILO_SUCCESS); - } - Expected& operator=(const Expected &other) = delete; Expected& operator=(Expected &&other) noexcept = delete; Expected& operator=(const T &other) = delete; diff --git a/hailort/libhailort/include/hailo/hailort.h b/hailort/libhailort/include/hailo/hailort.h index d218b229..66ff9451 100644 --- a/hailort/libhailort/include/hailo/hailort.h +++ b/hailort/libhailort/include/hailo/hailort.h @@ -76,6 +76,8 @@ extern "C" { #define HAILO_SCHEDULER_PRIORITY_MIN (0) #define MAX_NUMBER_OF_PLANES (4) +#define NUMBER_OF_PLANES_NV12_NV21 (2) +#define NUMBER_OF_PLANES_I420 (3) typedef float float32_t; typedef double float64_t; @@ -145,8 +147,8 @@ typedef uint16_t nms_bbox_counter_t; HAILO_STATUS__X(59, HAILO_THREAD_NOT_ACTIVATED /*!< The given thread has not been activated */)\ HAILO_STATUS__X(60, HAILO_THREAD_NOT_JOINABLE /*!< The given thread is not joinable */)\ HAILO_STATUS__X(61, HAILO_NOT_FOUND /*!< Could not find element */)\ - HAILO_STATUS__X(62, HAILO_STREAM_ABORTED_BY_HW /*!< Stream aborted due to an external event */)\ - HAILO_STATUS__X(63, HAILO_STREAM_ABORTED_BY_USER /*!< Stream recv/send was aborted */)\ + HAILO_STATUS__X(62, HAILO_RESERVED_STATUS /*!< Reserved for future use */)\ + HAILO_STATUS__X(63, HAILO_STREAM_ABORT /*!< Stream recv/send was aborted */)\ HAILO_STATUS__X(64, HAILO_PCIE_DRIVER_NOT_INSTALLED /*!< Pcie driver is not installed */)\ HAILO_STATUS__X(65, HAILO_NOT_AVAILABLE /*!< Component is not available */)\ HAILO_STATUS__X(66, HAILO_TRAFFIC_CONTROL_FAILURE /*!< Traffic control failure */)\ @@ -167,6 +169,7 @@ typedef uint16_t nms_bbox_counter_t; HAILO_STATUS__X(81, HAILO_OUT_OF_HOST_CMA_MEMORY /*!< Cannot allocate more CMA memory at host */)\ HAILO_STATUS__X(82, HAILO_QUEUE_IS_FULL /*!< Cannot push more items into the queue */)\ HAILO_STATUS__X(83, HAILO_DMA_MAPPING_ALREADY_EXISTS /*!< DMA mapping already exists */)\ + HAILO_STATUS__X(84, HAILO_CANT_MEET_BUFFER_REQUIREMENTS /*!< can't meet buffer requirements */)\ typedef enum { #define HAILO_STATUS__X(value, name) name = value, @@ -180,8 +183,7 @@ typedef enum { HAILO_STATUS_MAX_ENUM = HAILO_MAX_ENUM } hailo_status; -#define HAILO_STREAM_ABORTED HAILO_STREAM_ABORTED_BY_HW /* 'HAILO_STREAM_ABORTED' is deprecated. One should use 'HAILO_STREAM_ABORTED_BY_HW' */ -#define HAILO_STREAM_INTERNAL_ABORT HAILO_STREAM_ABORTED_BY_USER /* 'HAILO_STREAM_INTERNAL_ABORT' is deprecated. One should use 'HAILO_STREAM_ABORTED_BY_USER' */ +#define HAILO_STREAM_ABORTED_BY_USER HAILO_STREAM_ABORT /* 'HAILO_STREAM_ABORTED_BY_USER' is deprecated. One should use 'HAILO_STREAM_ABORT' */ /** HailoRT library version */ typedef struct { @@ -632,8 +634,8 @@ typedef enum { * For each class (::hailo_nms_shape_t.number_of_classes), the layout is * \code * struct (packed) { - * uint16_t/float32_t bbox_count; - * hailo_bbox_t/hailo_bbox_float32_t bbox[bbox_count]; + * float32_t bbox_count; + * hailo_bbox_float32_t bbox[bbox_count]; * }; * \endcode * @@ -815,20 +817,24 @@ typedef enum { HAILO_STREAM_FLAGS_MAX_ENUM = HAILO_MAX_ENUM } hailo_stream_flags_t; -// ************************************* NOTE - START ************************************* // -// Dma buffer allocation isn't currently supported and is for internal use only // -// **************************************************************************************** // -// TODO: remove hailo_dma_buffer_direction_t (HRT-12391) /** Hailo dma buffer direction */ typedef enum { + /** Buffers sent from the host (H) to the device (D). Used for input streams */ HAILO_DMA_BUFFER_DIRECTION_H2D = 0, + + /** Buffers received from the device (D) to the host (H). Used for output streams */ HAILO_DMA_BUFFER_DIRECTION_D2H = 1, + + /** Buffers can be used both send to the device and received from the device */ HAILO_DMA_BUFFER_DIRECTION_BOTH = 2, /** Max enum value to maintain ABI Integrity */ HAILO_DMA_BUFFER_DIRECTION_MAX_ENUM = HAILO_MAX_ENUM } hailo_dma_buffer_direction_t; +// ************************************* NOTE - START ************************************* // +// Dma buffer allocation isn't currently supported and is for internal use only // +// **************************************************************************************** // /** Hailo buffer flags */ typedef enum { HAILO_BUFFER_FLAGS_NONE = 0, /*!< No flags - heap allocated buffer */ @@ -838,31 +844,9 @@ typedef enum { HAILO_BUFFER_FLAGS_MAX_ENUM = HAILO_MAX_ENUM } hailo_buffer_flags_t; -/** Hailo buffer heap parameters */ -typedef struct { - EMPTY_STRUCT_PLACEHOLDER -} hailo_buffer_heap_params_t; - -// Hailo buffer dma mapping parameters. -// - If device is not NULL, the resulting buffer created by hailo_allocate_buffer will be mapped to the device. -// - If vdevice is not NULL, the resulting buffer created by hailo_allocate_buffer will be mapped to all the -// underlying devices held be vdevice. -// - If both device and vdevice are null, the resulting buffer created by hailo_allocate_buffer will be lazily -// mapped upon the first async transfer (i.e. when the buffer is passed to hailo_stream_read_raw_buffer_async -// or hailo_stream_write_raw_buffer_async). -typedef struct { - hailo_device device; - hailo_vdevice vdevice; - hailo_dma_buffer_direction_t direction; -} hailo_buffer_dma_mapping_params_t; - /** Hailo buffer parameters */ typedef struct { hailo_buffer_flags_t flags; - union { - hailo_buffer_heap_params_t heap_params; - hailo_buffer_dma_mapping_params_t dma_mapping_params; - }; } hailo_buffer_parameters_t; // ************************************** NOTE - END ************************************** // // Dma buffer allocation isn't currently supported and is for internal use only // @@ -1229,12 +1213,23 @@ typedef struct { uint32_t features; } hailo_3d_image_shape_t; +typedef enum +{ + HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR, + HAILO_PIX_BUFFER_MEMORY_TYPE_DMABUF, +} hailo_pix_buffer_memory_type_t; + /** image buffer plane */ typedef struct { /** actual data */ uint32_t bytes_used; uint32_t plane_size; - void *user_ptr; + /* Union in case the buffer is a user buffer or DMA buffer */ + union + { + void *user_ptr; + int fd; + }; } hailo_pix_buffer_plane_t; /** image buffer */ @@ -1242,8 +1237,15 @@ typedef struct { uint32_t index; hailo_pix_buffer_plane_t planes[MAX_NUMBER_OF_PLANES]; uint32_t number_of_planes; + hailo_pix_buffer_memory_type_t memory_type; } hailo_pix_buffer_t; +/** dma buffer - intended for use with Linux's dma-buf sub system */ +typedef struct { + int fd; + size_t size; +} hailo_dma_buffer_t; + typedef struct { uint32_t class_group_index; char original_name[HAILO_MAX_STREAM_NAME_SIZE]; @@ -1290,8 +1292,11 @@ typedef struct { uint32_t number_of_classes; /** Maximum amount of bboxes per nms class */ uint32_t max_bboxes_per_class; - /** Maximum mask size */ - uint32_t max_mask_size; + /** Maximum accumulated mask size for all of the detections in a frame. + * Used only with 'HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK' format order. + * The default value is (`input_image_size` * 2) + */ + uint32_t max_accumulated_mask_size; } hailo_nms_shape_t; #pragma pack(push, 1) @@ -1354,7 +1359,7 @@ typedef struct { /** * Status of the async transfer: * - ::HAILO_SUCCESS - The transfer is complete. - * - ::HAILO_STREAM_ABORTED_BY_USER - The transfer was canceled (can happen after network deactivation). + * - ::HAILO_STREAM_ABORT - The transfer was canceled (can happen after network deactivation). * - Any other ::hailo_status on unexpected errors. */ hailo_status status; @@ -1382,7 +1387,7 @@ typedef struct { /** * Status of the async transfer: * - ::HAILO_SUCCESS - The transfer is complete. - * - ::HAILO_STREAM_ABORTED_BY_USER - The transfer was canceled (can happen after network deactivation). + * - ::HAILO_STREAM_ABORT - The transfer was canceled (can happen after network deactivation). * - Any other ::hailo_status on unexpected errors. */ hailo_status status; @@ -1625,7 +1630,7 @@ typedef struct { typedef struct { uint8_t network_group_index; uint16_t batch_index; - uint8_t context_index; + uint16_t context_index; uint16_t action_index; } hailo_context_switch_breakpoint_reached_message_t; @@ -1643,7 +1648,7 @@ typedef struct { uint32_t exit_status; uint8_t network_group_index; uint16_t batch_index; - uint8_t context_index; + uint16_t context_index; uint16_t action_index; } hailo_context_switch_run_time_error_message_t; @@ -2811,7 +2816,7 @@ HAILORTAPI hailo_status hailo_network_group_get_output_stream_infos(hailo_config /** * Shutdown a given network group. Makes sure all ongoing async operations are canceled. All async callbacks - * of transfers that have not been completed will be called with status ::HAILO_STREAM_ABORTED_BY_USER. + * of transfers that have not been completed will be called with status ::HAILO_STREAM_ABORT. * Any resources attached to the network group may be released after function returns. * * @param[in] network_group NetworkGroup to be shutdown. @@ -2878,16 +2883,16 @@ HAILORTAPI hailo_status hailo_get_latency_measurement(hailo_configured_network_g const char *network_name, hailo_latency_measurement_result_t *result); /** - * Sets the maximum time period that may pass before getting run time from the scheduler, - * even without reaching the minimum required send requests (e.g. threshold - see hailo_set_scheduler_threshold()), - * as long as at least one send request has been sent. - * This time period is measured since the last time the scheduler gave this network group run time. + * Sets the maximum time period that may pass before receiving run time from the scheduler. + * This will occur providing at least one send request has been sent, there is no minimum requirement for send + * requests, (e.g. threshold - see set_scheduler_threshold()). * * @param[in] configured_network_group NetworkGroup for which to set the scheduler timeout. * @param[in] timeout_ms Timeout in milliseconds. * @param[in] network_name Network name for which to set the timeout. * If NULL is passed, the timeout will be set for all the networks in the network group. * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + * @note The new time period will be measured after the previous time the scheduler allocated run time to this network group. * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE. * @note The default timeout is 0ms. * @note Currently, setting the timeout for a specific network is not supported. @@ -2942,13 +2947,86 @@ HAILORTAPI hailo_status hailo_set_scheduler_priority(hailo_configured_network_gr // Free returned buffer via hailo_free_buffer HAILORTAPI hailo_status hailo_allocate_buffer(size_t size, const hailo_buffer_parameters_t *allocation_params, void **buffer_out); HAILORTAPI hailo_status hailo_free_buffer(void *buffer); -// Maps buffer to dma. Free mapping by calling hailo_dma_unmap_buffer_from_device and then free buffer as needed -// If buffer has already been mapped to device, then HAILO_DMA_MAPPING_ALREADY_EXISTS shall be returned -HAILORTAPI hailo_status hailo_dma_map_buffer_to_device(void *buffer, size_t size, hailo_device device, hailo_dma_buffer_direction_t direction); -HAILORTAPI hailo_status hailo_dma_unmap_buffer_from_device(void *buffer, hailo_device device, hailo_dma_buffer_direction_t direction); // ************************************** NOTE - END ************************************** // // Dma buffer allocation isn't currently supported and is for internal use only // // **************************************************************************************** // + +/** + * Maps the buffer pointed to by @a address for DMA transfers to/from the given @a device, in the specified + * @a data_direction. + * DMA mapping of buffers in advance may improve the performance of async API. This improvement will become + * apparent when the buffer is reused multiple times across different async operations. + * For low level API (aka ::hailo_input_stream or ::hailo_output_stream), buffers passed to + * ::hailo_stream_write_raw_buffer_async and ::hailo_stream_read_raw_buffer_async can be mapped. + * + * @param[in] device A ::hailo_device object. + * @param[in] address The address of the buffer to be mapped + * @param[in] size The buffer's size in bytes + * @param[in] direction The direction of the mapping. For input streams, use `HAILO_DMA_BUFFER_DIRECTION_H2D` + * and for output streams, use `HAILO_DMA_BUFFER_DIRECTION_D2H`. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + * + * @note The DMA mapping will be released upon calling ::hailo_device_dma_unmap_buffer with @a address, @a size and + * @a data_direction, or when the @a device object is destroyed. + * @note The buffer pointed to by @a address cannot be released until it is unmapped (via + * ::hailo_device_dma_unmap_buffer or ::hailo_release_device). + */ +HAILORTAPI hailo_status hailo_device_dma_map_buffer(hailo_device device, void *address, size_t size, + hailo_dma_buffer_direction_t direction); + +/** + * Un-maps a buffer buffer pointed to by @a address for DMA transfers to/from the given @a device, in the direction + * @a direction. + * + * @param[in] device A ::hailo_device object. + * @param[in] address The address of the buffer to be un-mapped. + * @param[in] size The buffer's size in bytes. + * @param[in] direction The direction of the mapping. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + */ +HAILORTAPI hailo_status hailo_device_dma_unmap_buffer(hailo_device device, void *address, size_t size, + hailo_dma_buffer_direction_t direction); + +/** + * Maps the buffer pointed to by @a address for DMA transfers to/from the given @a vdevice, in the specified + * @a data_direction. + * DMA mapping of buffers in advance may improve the performance of async API. This improvement will become + * apparent when the buffer is reused multiple times across different async operations. + * For low level API (aka ::hailo_input_stream or ::hailo_output_stream), buffers passed to + * ::hailo_stream_write_raw_buffer_async and ::hailo_stream_read_raw_buffer_async can be mapped. + * + * @param[in] vdevice A ::hailo_vdevice object. + * @param[in] address The address of the buffer to be mapped + * @param[in] size The buffer's size in bytes + * @param[in] direction The direction of the mapping. For input streams, use `HAILO_DMA_BUFFER_DIRECTION_H2D` + * and for output streams, use `HAILO_DMA_BUFFER_DIRECTION_D2H`. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + * + * @note The DMA mapping will be released upon calling ::hailo_vdevice_dma_unmap_buffer with @a address, @a size and + * @a data_direction, or when the @a vdevice object is destroyed. + * @note The buffer pointed to by @a address cannot be released until it is unmapped (via + * ::hailo_vdevice_dma_unmap_buffer or ::hailo_release_vdevice). + */ +HAILORTAPI hailo_status hailo_vdevice_dma_map_buffer(hailo_vdevice vdevice, void *address, size_t size, + hailo_dma_buffer_direction_t direction); + +/** + * Un-maps a buffer buffer pointed to by @a address for DMA transfers to/from the given @a vdevice, in the direction + * @a direction. + * + * @param[in] vdevice A ::hailo_vdevice object. + * @param[in] address The address of the buffer to be un-mapped. + * @param[in] size The buffer's size in bytes. + * @param[in] direction The direction of the mapping. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + */ +HAILORTAPI hailo_status hailo_vdevice_dma_unmap_buffer(hailo_vdevice vdevice, void *address, size_t size, + hailo_dma_buffer_direction_t direction); + /** @} */ // end of group_buffer_functions /** @defgroup group_stream_functions Stream functions @@ -3695,6 +3773,7 @@ HAILORTAPI hailo_status hailo_vstream_write_raw_buffer(hailo_input_vstream input * pointers to the planes to where the data to * be sent to the device is stored. * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + * @note Currently only support memory_type field of buffer to be HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR. */ HAILORTAPI hailo_status hailo_vstream_write_pix_buffer(hailo_input_vstream input_vstream, const hailo_pix_buffer_t *buffer); diff --git a/hailort/libhailort/include/hailo/hailort.hpp b/hailort/libhailort/include/hailo/hailort.hpp index 1c85ac02..cb842657 100644 --- a/hailort/libhailort/include/hailo/hailort.hpp +++ b/hailort/libhailort/include/hailo/hailort.hpp @@ -30,5 +30,6 @@ #include "hailo/network_rate_calculator.hpp" #include "hailo/quantization.hpp" #include "hailo/hailort_defaults.hpp" +#include "hailo/dma_mapped_buffer.hpp" #endif /* _HAILORT_HPP_ */ diff --git a/hailort/libhailort/include/hailo/hailort_common.hpp b/hailort/libhailort/include/hailo/hailort_common.hpp index 203c9a63..85076f3a 100644 --- a/hailort/libhailort/include/hailo/hailort_common.hpp +++ b/hailort/libhailort/include/hailo/hailort_common.hpp @@ -12,6 +12,7 @@ #include "hailo/hailort.h" #include "hailo/expected.hpp" +#include "hailo/buffer.hpp" #include #include @@ -34,7 +35,7 @@ class HAILORTAPI HailoRTCommon final static_assert(sizeof(hailo_bbox_t) / sizeof(uint16_t) == sizeof(hailo_bbox_float32_t) / sizeof(float32_t), "Mismatch bbox params size"); static const uint32_t BBOX_PARAMS = sizeof(hailo_bbox_t) / sizeof(uint16_t); - static const uint32_t MASK_PARAMS = 1; // mask_size + static const uint32_t DETECTION_WITH_BYTE_MASK_SIZE = sizeof(hailo_detection_with_byte_mask_t); static const uint32_t MAX_DEFUSED_LAYER_COUNT = 9; static const size_t HW_DATA_ALIGNMENT = 8; static const uint32_t MUX_INFO_COUNT = 32; @@ -84,12 +85,17 @@ class HAILORTAPI HailoRTCommon final * @param[in] alignment Returned number should be aligned to this parameter. * @return aligned number */ - template - static constexpr T align_to(T num, T alignment) { + template + static constexpr T align_to(T num, U alignment) { auto remainder = num % alignment; return remainder == 0 ? num : num + (alignment - remainder); } + static void *align_to(void *addr, size_t alignment) + { + return reinterpret_cast(align_to(reinterpret_cast(addr), alignment)); + } + /** * Gets the shape size. * @@ -241,7 +247,7 @@ class HAILORTAPI HailoRTCommon final case HAILO_FORMAT_ORDER_HAILO_YYYYUV: return "YYYYUV"; case HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK: - return "HAILO NMS WITH METADATA"; + return "HAILO NMS WITH BYTE MASK"; default: return "Nan"; } @@ -280,23 +286,18 @@ class HAILORTAPI HailoRTCommon final static uint32_t get_nms_host_frame_size(const hailo_nms_shape_t &nms_shape, const hailo_format_t &format); /** - * Gets HAILO_NMS_WITH_BYTE_MASK host shape size in bytes by nms_shape and buffer format. + * Gets `HAILO_NMS_WITH_BYTE_MASK` host frame size in bytes by nms_shape. * * @param[in] nms_shape The NMS shape to get size from. - * @param[in] format A ::hailo_format_t object. - * @return The HAILO_NMS_WITH_BYTE_MASK host shape size. + * @return The HAILO_NMS_WITH_BYTE_MASK host frame size. */ - static constexpr uint32_t get_nms_with_byte_mask_host_shape_size(const hailo_nms_shape_t &nms_shape, const hailo_format_t &format) + static constexpr uint32_t get_nms_with_byte_mask_host_frame_size(const hailo_nms_shape_t &nms_shape) { - // Assuming 1 byte per pixel for the mask - auto bbox_size = BBOX_PARAMS + MASK_PARAMS + nms_shape.max_mask_size; - const uint32_t size_per_class = 1 + (bbox_size * nms_shape.max_bboxes_per_class); - double shape_size = size_per_class * nms_shape.number_of_classes; - if ((shape_size * get_format_data_bytes(format)) < UINT32_MAX) { - return static_cast(shape_size); - } else { - return UINT32_MAX / get_format_data_bytes(format); - } + // TODO: HRT-12035 - Change `max_bboxes_per_class` to `max_boxes` + auto max_detections = nms_shape.number_of_classes * nms_shape.max_bboxes_per_class; + auto max_detections_size = max_detections * DETECTION_WITH_BYTE_MASK_SIZE; + auto frame_size = max_detections_size + nms_shape.max_accumulated_mask_size; + return frame_size; } /** @@ -411,8 +412,16 @@ class HAILORTAPI HailoRTCommon final return ((HAILO_FORMAT_ORDER_HAILO_NMS == order) || (HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK == order)); } + // TODO HRT-10073: change to supported features list + static bool is_hailo1x_device_type(const hailo_device_architecture_t dev_arch) + { + // Compare with HAILO1X device archs + return (HAILO_ARCH_HAILO15H == dev_arch) || (HAILO_ARCH_HAILO15M == dev_arch) || (HAILO_ARCH_PLUTO == dev_arch); + } + static Expected to_device_id(const std::string &device_id); static Expected> to_device_ids_vector(const std::vector &device_ids_str); + static Expected as_hailo_pix_buffer(MemoryView &memory_view, hailo_format_order_t order); }; #ifndef HAILO_EMULATOR diff --git a/hailort/libhailort/include/hailo/hef.hpp b/hailort/libhailort/include/hailo/hef.hpp index 3ddd8b2b..b44e6289 100644 --- a/hailort/libhailort/include/hailo/hef.hpp +++ b/hailort/libhailort/include/hailo/hef.hpp @@ -22,6 +22,9 @@ namespace hailort { +#define DEFAULT_NMS_NO_BURST_SIZE (1) +#define DEFAULT_ACTUAL_BATCH_SIZE (1) + /*! Hailo configure parameters per network_group. Analogical to hailo_configure_network_group_params_t */ struct ConfigureNetworkParams { @@ -466,6 +469,7 @@ class HAILORTAPI Hef final friend class ConfiguredNetworkGroupBase; friend class CoreOp; friend class VDeviceBase; + friend class InferModel; #ifdef HAILO_SUPPORT_MULTI_PROCESS friend class HailoRtRpcClient; diff --git a/hailort/libhailort/include/hailo/infer_model.hpp b/hailort/libhailort/include/hailo/infer_model.hpp index 258cc895..c92995e8 100644 --- a/hailort/libhailort/include/hailo/infer_model.hpp +++ b/hailort/libhailort/include/hailo/infer_model.hpp @@ -23,6 +23,8 @@ namespace hailort class ConfiguredInferModelImpl; class AsyncInferRunnerImpl; + +/*! Asynchronous inference job representation is used to manage and control an inference job that is running asynchronously. */ class HAILORTAPI AsyncInferJob { public: @@ -34,7 +36,19 @@ class HAILORTAPI AsyncInferJob AsyncInferJob(AsyncInferJob &&other); AsyncInferJob &operator=(AsyncInferJob &&other); + /** + * Waits for the asynchronous inference job to finish. + * + * @param[in] timeout The maximum time to wait. + * + * @return A ::hailo_status indicating the status of the operation. + * If the job finishes successfully within the timeout, ::HAILO_SUCCESS is returned. Otherwise, returns a ::hailo_status error + **/ hailo_status wait(std::chrono::milliseconds timeout); + + /** + * Detaches the job. Without detaching, the job's destructor will block until the job finishes. + **/ void detach(); private: @@ -47,35 +61,115 @@ class HAILORTAPI AsyncInferJob }; struct AsyncInferCompletionInfo; + +static const auto ASYNC_INFER_EMPTY_CALLBACK = [](const AsyncInferCompletionInfo&) {}; + +/*! Configured infer_model that can be used to perform an asynchronous inference */ class HAILORTAPI ConfiguredInferModel { public: ConfiguredInferModel() = default; + /** Represents an asynchronous infer request - holds the input and output buffers of the request */ class HAILORTAPI Bindings { public: Bindings() = default; + /** Holds the input and output buffers of the Bindings infer request */ class HAILORTAPI InferStream { public: + /** + * Sets the edge's buffer to a new one, of type MemoryView. + * + * @param[in] view The new buffer to be set. + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + */ hailo_status set_buffer(MemoryView view); + + /** + * @return Upon success, returns Expected of the MemoryView buffer of the edge. + * Otherwise, returns Unexpected of ::hailo_status error. + * @note If buffer type is not MemoryView, will return ::HAILO_INVALID_OPERATION. + */ Expected get_buffer(); + + /** + * Sets the edge's buffer to a new one, of type hailo_pix_buffer_t. + * + * @param[in] pix_buffer The new buffer to be set. + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + * @note Supported only for inputs. + * @note Currently only support memory_type field of buffer to be HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR. + */ hailo_status set_pix_buffer(const hailo_pix_buffer_t &pix_buffer); + + /** + * @return Upon success, returns Expected of the ::hailo_pix_buffer_t buffer of the edge. + * Otherwise, returns Unexpected of ::hailo_status error. + * @note If buffer type is not ::hailo_pix_buffer_t, will return ::HAILO_INVALID_OPERATION. + */ Expected get_pix_buffer(); + /** + * Sets the edge's buffer from a DMA buffer. + * + * @param[in] dma_buffer The new buffer to be set. + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + * @note Supported on Linux only. + */ + hailo_status set_dma_buffer(hailo_dma_buffer_t dma_buffer); + + /** + * @return Upon success, returns Expected of the ::hailo_dma_buffer_t buffer of the edge. + * Otherwise, returns Unexpected of ::hailo_status error. + * @note If buffer type is not ::hailo_dma_buffer_t, will return ::HAILO_INVALID_OPERATION. + * @note Supported on Linux only. + */ + Expected get_dma_buffer(); + private: friend class ConfiguredInferModelImpl; + friend class AsyncInferRunnerImpl; class Impl; InferStream(std::shared_ptr pimpl); std::shared_ptr m_pimpl; }; + /** + * Returns the single input's InferStream object. + * + * @return Upon success, returns Expected of the single input's InferStream object. Otherwise, returns Unexpected of ::hailo_status error. + * @note If Bindings has multiple inputs, will return ::HAILO_INVALID_OPERATION. + * In that case - use input(const std::string &name) instead. + */ Expected input(); + + /** + * Returns the single output's InferStream object. + * + * @return Upon success, returns Expected of the single output's InferStream object. Otherwise, returns Unexpected of ::hailo_status error. + * @note If Bindings has multiple outputs, will return ::HAILO_INVALID_OPERATION. + * In that case - use output(const std::string &name) instead. + */ Expected output(); + + /** + * Gets an input's InferStream object. + * + * @param[in] name The name of the input edge. + * @return Upon success, returns Expected of the relevant InferStream object. Otherwise, returns a ::hailo_status error. + */ Expected input(const std::string &name); + + /** + * Gets an output's InferStream object. + * + * @param[in] name The name of the output edge. + * @return Upon success, returns Expected of the relevant InferStream object. Otherwise, returns a ::hailo_status error. + */ Expected output(const std::string &name); private: @@ -88,19 +182,121 @@ class HAILORTAPI ConfiguredInferModel std::unordered_map m_outputs; }; + /** + * Creates a Bindings object. + * + * @return Upon success, returns Expected of Bindings. Otherwise, returns Unexpected of ::hailo_status error. + */ Expected create_bindings(); + + /** + * Waits until the model is ready to launch a new asynchronous inference operation. + * The readiness of the model is determined by the ability to push buffers to the asynchronous inference pipeline. + * + * @param[in] timeout Amount of time to wait until the model is ready in milliseconds. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise: + * - If @a timeout has passed and the model is not ready, returns ::HAILO_TIMEOUT. + * - In any other error case, returns ::hailo_status error. + */ hailo_status wait_for_async_ready(std::chrono::milliseconds timeout); + + /** + * Activates hailo device inner-resources for context_switch inference. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns ::hailo_status error. + * @note Calling this function is invalid in case scheduler is enabled. + */ hailo_status activate(); + + /** + * Deactivates hailo device inner-resources for context_switch inference. + * @note Calling this function is invalid in case scheduler is enabled. + */ void deactivate(); + + /** + * Launches a synchronous inference operation with the provided bindings. + * + * @param[in] bindings The bindings for the inputs and outputs of the model. + * @param[in] timeout The maximum amount of time to wait for the inference operation to complete. + * + * @return Upon success, returns ::HAILO_SUCCESS. + * Otherwise, returns Unexpected of ::hailo_status error. + */ hailo_status run(Bindings bindings, std::chrono::milliseconds timeout); + + /** + * Launches an asynchronous inference operation with the provided bindings. + * The completion of the operation is notified through the provided callback function. + * + * @param[in] bindings The bindings for the inputs and outputs of the model. + * @param[in] callback The function to be called upon completion of the asynchronous inference operation. + * + * @return Upon success, returns an instance of Expected representing the launched job. + * Otherwise, returns Unexpected of ::hailo_status error. + * @note @a callback should execute as quickly as possible. + */ Expected run_async(Bindings bindings, - std::function callback = [] (const AsyncInferCompletionInfo &) {}); - Expected get_hw_latency_measurement(const std::string &network_name = ""); + std::function callback = ASYNC_INFER_EMPTY_CALLBACK); + + /** + * @return Upon success, returns Expected of LatencyMeasurementResult object containing the output latency result. + * Otherwise, returns Unexpected of ::hailo_status error. + */ + Expected get_hw_latency_measurement(); + + /** + * Sets the maximum time period that may pass before receiving run time from the scheduler. + * This will occur providing at least one send request has been sent, there is no minimum requirement for send + * requests, (e.g. threshold - see set_scheduler_threshold()). + * + * @param[in] timeout Timeout in milliseconds. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + * @note The new time period will be measured after the previous time the scheduler allocated run time to this network group. + * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE. + * @note The default timeout is 0ms. + */ hailo_status set_scheduler_timeout(const std::chrono::milliseconds &timeout); + + /** + * Sets the minimum number of send requests required before the network is considered ready to get run time from the scheduler. + * + * @param[in] threshold Threshold in number of frames. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE. + * @note The default threshold is 1. + * @note If at least one send request has been sent, but the threshold is not reached within a set time period (e.g. timeout - see + * hailo_set_scheduler_timeout()), the scheduler will consider the network ready regardless. + */ hailo_status set_scheduler_threshold(uint32_t threshold); + + /** + * Sets the priority of the network. + * When the network group scheduler will choose the next network, networks with higher priority will be prioritized in the selection. + * bigger number represent higher priority. + * + * @param[in] priority Priority as a number between HAILO_SCHEDULER_PRIORITY_MIN - HAILO_SCHEDULER_PRIORITY_MAX. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE. + * @note The default priority is HAILO_SCHEDULER_PRIORITY_NORMAL. + */ hailo_status set_scheduler_priority(uint8_t priority); + + /** + * @return Upon success, returns Expected of a the number of inferences that can be queued simultaneously for execution. + * Otherwise, returns Unexpected of ::hailo_status error. + */ Expected get_async_queue_size(); + /** + * Shuts the inference down. After calling this method, the model is no longer usable. + */ + void shutdown(); + private: friend class InferModel; @@ -109,39 +305,134 @@ class HAILORTAPI ConfiguredInferModel std::shared_ptr m_pimpl; }; +/** + * Context passed to the callback function after the asynchronous inference operation was completed or has failed. + */ struct HAILORTAPI AsyncInferCompletionInfo { - AsyncInferCompletionInfo(ConfiguredInferModel::Bindings _bindings, hailo_status _status) : bindings(_bindings), status(_status) + /** + * Constructor for AsyncInferCompletionInfo. + * + * @param[in] _status The status of the inference operation. + */ + AsyncInferCompletionInfo(hailo_status _status) : status(_status) { } - ConfiguredInferModel::Bindings bindings; + /** + * Status of the asynchronous inference operation. + * - ::HAILO_SUCCESS - When the inference operation is complete successfully. + * - Any other ::hailo_status on unexpected errors. + */ hailo_status status; }; +/** + * Contains all of the necessary information for configuring the network for inference. + * This class is used to set up the model for inference and includes methods for setting and getting the model's parameters. + * By calling the configure function, the user can create a ConfiguredInferModel object, which is used to run inference. + */ class HAILORTAPI InferModel final { public: ~InferModel() = default; + /** + * Represents the parameters of a stream. + * In default, the stream's parameters are set to the default values of the model. + * The user can change the stream's parameters by calling the set_ functions. + */ class HAILORTAPI InferStream { public: - // TODO: explain that the getters return what the user defined with set_ functions + /** + * @return The name of the stream. + */ const std::string name() const; + + /** + * @return The shape of the image that the stream will use for inference. + */ hailo_3d_image_shape_t shape() const; + + /** + * @return The format that the stream will use for inference. + */ hailo_format_t format() const; + + /** + * @return The size in bytes of a frame that the stream will use for inference. + */ size_t get_frame_size() const; + + /** + * @return upon success, an Expected of hailo_nms_shape_t, the NMS shape for the stream. + * Otherwise, returns Unexpected of ::hailo_status error. + * @note In case NMS is disabled, returns an unexpected of ::HAILO_INVALID_OPERATION. + */ Expected get_nms_shape() const; - + + /** + * Sets the format type of the stream. + * This method is used to specify the format type that the stream will use for inference. + * + * @param[in] type The format type to be set for the stream. This should be a value of the hailo_format_type_t enum. + */ void set_format_type(hailo_format_type_t type); + + /** + * Sets the format order of the stream. + * This method is used to specify the format order that the stream will use for inference. + * + * @param[in] order The format order to be set for the stream. This should be a value of the hailo_format_order_t enum. + */ void set_format_order(hailo_format_order_t order); + + /** + * Retrieves the quantization information for all layers in the model. + * @return A vector of hailo_quant_info_t structures, each representing the quantization information for a layer in the model. + */ std::vector get_quant_infos() const; + + /** + * Checks if Non-Maximum Suppression (NMS) is enabled for the model. + * + * @return True if NMS is enabled, false otherwise. + */ bool is_nms() const; + + /** + * Set NMS score threshold, used for filtering out candidates. Any box with score m_pimpl; }; + /** + * @return A constant reference to the Hef object associated with this InferModel. + */ const Hef &hef() const; + + /** + * Sets the batch size of the InferModel. + * + * @param[in] batch_size The new batch size to be set. + */ void set_batch_size(uint16_t batch_size); + + /** + * Sets the power mode of the InferModel. + * See ::hailo_power_mode_t for more information. + * + * @param[in] power_mode The new power mode to be set. + */ void set_power_mode(hailo_power_mode_t power_mode); + + /** + * Sets the latency measurement flags of the InferModel. + * see ::hailo_latency_measurement_flags_t for more information. + * + * @param[in] latency The new latency measurement flags to be set. + */ void set_hw_latency_measurement_flags(hailo_latency_measurement_flags_t latency); - Expected configure(const std::string &network_name = ""); + /** + * Configures the InferModel object. Also checks the validity of the configuration's formats. + * + * @return Upon success, returns Expected of ConfiguredInferModel, which can be used to perform an asynchronous inference. + * Otherwise, returns Unexpected of ::hailo_status error. + * @note InferModel can be configured once. + */ + Expected configure(); + + /** + * Returns the single input's InferStream object. + * + * @return Upon success, returns Expected of the single input's InferStream object. Otherwise, returns Unexpected of ::hailo_status error. + * @note If InferModel has multiple inputs, will return ::HAILO_INVALID_OPERATION. + * In that case - use input(const std::string &name) instead. + */ Expected input(); + + /** + * Returns the single output's InferStream object. + * + * @return Upon success, returns Expected of the single output's InferStream object. Otherwise, returns Unexpected of ::hailo_status error. + * @note If InferModel has multiple outputs, will return ::HAILO_INVALID_OPERATION. + * In that case - use output(const std::string &name) instead. + */ Expected output(); + + /** + * Gets an input's InferStream object. + * + * @param[in] name The name of the input edge. + * @return Upon success, returns Expected of the relevant InferStream object. Otherwise, returns a ::hailo_status error. + */ Expected input(const std::string &name); + + /** + * Gets an output's InferStream object. + * + * @param[in] name The name of the output edge. + * @return Upon success, returns Expected of the relevant InferStream object. Otherwise, returns a ::hailo_status error. + */ Expected output(const std::string &name); + + /** + * @return A constant reference to the vector of input InferStream objects, each representing an input edge. + */ const std::vector &inputs() const; + + /** + * @return A constant reference to the vector of output InferStream objects, each representing an output edge. + */ const std::vector &outputs() const; + + /** + * @return A constant reference to a vector of strings, each representing the name of an input stream. + */ const std::vector &get_input_names() const; + + /** + * @return A constant reference to a vector of strings, each representing the name of an output stream. + */ const std::vector &get_output_names() const; - + InferModel(InferModel &&); Expected configure_for_ut(std::shared_ptr async_infer_runner, - const std::vector &input_names, const std::vector &output_names); + const std::vector &input_names, const std::vector &output_names, + std::shared_ptr net_group = nullptr); private: friend class VDevice; diff --git a/hailort/libhailort/include/hailo/inference_pipeline.hpp b/hailort/libhailort/include/hailo/inference_pipeline.hpp index a6811b26..e9b5be10 100644 --- a/hailort/libhailort/include/hailo/inference_pipeline.hpp +++ b/hailort/libhailort/include/hailo/inference_pipeline.hpp @@ -106,10 +106,23 @@ class HAILORTAPI InferVStreams final * * @param[in] max_proposals_per_class NMS max proposals per class to set. * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. - * @note This function will fail in cases where there is no output with NMS operations on the CPU. + * @note This function must be called before starting inference! + * This function will fail in cases where there is no output with NMS operations on the CPU. */ hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class); + /** + * Set maximum accumulated mask size for all the detections in a frame. + * + * Note: Used in order to change the output buffer frame size, + * in cases where the output buffer is too small for all the segmentation detections. + * + * @param[in] max_accumulated_mask_size NMS max accumulated mask size. + * @note This function must be called before starting inference! + * This function will fail in cases where the output vstream has no NMS operations on the CPU. + */ + hailo_status set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size); + InferVStreams(const InferVStreams &other) = delete; InferVStreams &operator=(const InferVStreams &other) = delete; InferVStreams &operator=(InferVStreams &&other) = delete; diff --git a/hailort/libhailort/include/hailo/network_group.hpp b/hailort/libhailort/include/hailo/network_group.hpp index 9b765476..6d5e1708 100644 --- a/hailort/libhailort/include/hailo/network_group.hpp +++ b/hailort/libhailort/include/hailo/network_group.hpp @@ -66,7 +66,7 @@ struct HwInferResults { }; /*@}*/ -using src_context_t = uint8_t; +using src_context_t = uint16_t; using src_stream_index_t = uint8_t; using IntermediateBufferKey = std::pair; @@ -239,7 +239,7 @@ class HAILORTAPI ConfiguredNetworkGroup /** * Shutdown the network group. Makes sure all ongoing async operations are canceled. All async callbacks - * of transfers that have not been completed will be called with status ::HAILO_STREAM_ABORTED_BY_USER. + * of transfers that have not been completed will be called with status ::HAILO_STREAM_ABORT. * Any resources attached to the network group may be released after function returns. * * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. @@ -348,15 +348,15 @@ class HAILORTAPI ConfiguredNetworkGroup virtual bool is_scheduled() const = 0; /** - * Sets the maximum time period that may pass before getting run time from the scheduler, - * even without reaching the minimum required send requests (e.g. threshold - see set_scheduler_threshold()), - * as long as at least one send request has been sent. - * This time period is measured since the last time the scheduler gave this network group run time. + * Sets the maximum time period that may pass before receiving run time from the scheduler. + * This will occur providing at least one send request has been sent, there is no minimum requirement for send + * requests, (e.g. threshold - see set_scheduler_threshold()). * * @param[in] timeout Timeout in milliseconds. * @param[in] network_name Network name for which to set the timeout. * If not passed, the timeout will be set for all the networks in the network group. * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + * @note The new time period will be measured after the previous time the scheduler allocated run time to this network group. * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE. * @note The default timeout is 0ms. * @note Currently, setting the timeout for a specific network is not supported. @@ -365,8 +365,6 @@ class HAILORTAPI ConfiguredNetworkGroup /** * Sets the minimum number of send requests required before the network is considered ready to get run time from the scheduler. - * If at least one send request has been sent, but the threshold is not reached within a set time period (e.g. timeout - see hailo_set_scheduler_timeout()), - * the scheduler will consider the network ready regardless. * * @param[in] threshold Threshold in number of frames. * @param[in] network_name Network name for which to set the threshold. @@ -374,6 +372,8 @@ class HAILORTAPI ConfiguredNetworkGroup * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE. * @note The default threshold is 1. + * @note If at least one send request has been sent, but the threshold is not reached within a set time period (e.g. timeout - see + * hailo_set_scheduler_timeout()), the scheduler will consider the network ready regardless. * @note Currently, setting the threshold for a specific network is not supported. */ virtual hailo_status set_scheduler_threshold(uint32_t threshold, const std::string &network_name="") = 0; @@ -429,14 +429,14 @@ class HAILORTAPI ConfiguredNetworkGroup const std::function &infer_request_done_cb) = 0; virtual Expected> get_ops_metadata() = 0; virtual Expected> get_layer_info(const std::string &stream_name) = 0; - hailo_status wait_for_callbacks_finish(); - hailo_status wait_for_callbacks_to_maintain_below_threshold(size_t threshold); + hailo_status wait_for_ongoing_callbacks_count_under(size_t threshold); void decrease_ongoing_callbacks(); void increase_ongoing_callbacks(); virtual hailo_status set_nms_score_threshold(const std::string &edge_name, float32_t nms_score_threshold) = 0; virtual hailo_status set_nms_iou_threshold(const std::string &edge_name, float32_t iou_threshold) = 0; virtual hailo_status set_nms_max_bboxes_per_class(const std::string &edge_name, uint32_t max_bboxes_per_class) = 0; + virtual hailo_status set_nms_max_accumulated_mask_size(const std::string &edge_name, uint32_t max_accumulated_mask_size) = 0; protected: ConfiguredNetworkGroup(); @@ -446,7 +446,7 @@ class HAILORTAPI ConfiguredNetworkGroup std::condition_variable m_cv; private: friend class ActivatedNetworkGroup; - friend class PipelineBuilder; + friend class AsyncAsyncPipelineBuilder; }; using ConfiguredNetworkGroupVector = std::vector>; diff --git a/hailort/libhailort/include/hailo/stream.hpp b/hailort/libhailort/include/hailo/stream.hpp index 3f21bdfd..5423ac17 100644 --- a/hailort/libhailort/include/hailo/stream.hpp +++ b/hailort/libhailort/include/hailo/stream.hpp @@ -43,7 +43,7 @@ class HAILORTAPI InputStream /** * Status of the async transfer. * - ::HAILO_SUCCESS - When transfer is complete successfully. - * - ::HAILO_STREAM_ABORTED_BY_USER - The transfer was canceled (can happen after network deactivation). + * - ::HAILO_STREAM_ABORT - The transfer was canceled (can happen after network deactivation). * - Any other ::hailo_status on unexpected errors. */ hailo_status status; @@ -223,9 +223,6 @@ class HAILORTAPI InputStream */ virtual hailo_status write_async(const void *buffer, size_t size, const TransferDoneCallback &user_callback) = 0; - // The usage of BufferPtr for async API isn't currently supported and is for internal use only. - virtual hailo_status write_async(BufferPtr buffer, const TransferDoneCallback &user_callback) = 0; - /** * @returns A ::hailo_stream_info_t object containing the stream's info. */ @@ -290,7 +287,7 @@ class HAILORTAPI OutputStream /** * Status of the async transfer. * - ::HAILO_SUCCESS - When transfer is complete successfully. - * - ::HAILO_STREAM_ABORTED_BY_USER - The transfer was canceled (can happen after network deactivation). + * - ::HAILO_STREAM_ABORT - The transfer was canceled (can happen after network deactivation). * - Any other ::hailo_status on unexpected errors. */ hailo_status status; @@ -505,9 +502,6 @@ class HAILORTAPI OutputStream */ virtual hailo_status read_async(void *buffer, size_t size, const TransferDoneCallback &user_callback) = 0; - // The usage of BufferPtr for async API isn't currently supported and is for internal use only. - virtual hailo_status read_async(BufferPtr buffer, const TransferDoneCallback &user_callback) = 0; - // get_network_group_activated_event is same as this function virtual EventPtr &get_core_op_activated_event() = 0; protected: diff --git a/hailort/libhailort/include/hailo/vdevice.hpp b/hailort/libhailort/include/hailo/vdevice.hpp index fd8f40a2..726c42b8 100644 --- a/hailort/libhailort/include/hailo/vdevice.hpp +++ b/hailort/libhailort/include/hailo/vdevice.hpp @@ -55,7 +55,7 @@ class HAILORTAPI VDevice static Expected> create(const std::vector &device_ids); /** - * Configure the vdevice from an hef. + * Configures the vdevice from an hef. * * @param[in] hef A reference to an Hef object to configure the vdevice by. * @param[in] configure_params A map of configured network group name and parameters. @@ -64,8 +64,17 @@ class HAILORTAPI VDevice */ virtual Expected configure(Hef &hef, const NetworkGroupsParamsMap &configure_params={}) = 0; - - virtual Expected> create_infer_model(const std::string &hef_path); + + /** + * Creates the infer model from an hef + * + * @param[in] hef_path A string of an hef file. + * @param[in] network_name A string of the network name (optional). + * @return Upon success, returns Expected of a shared pointer of infer model. + * Otherwise, returns Unexpected of ::hailo_status error. + */ + virtual Expected> create_infer_model(const std::string &hef_path, + const std::string &network_name = ""); /** * Gets the underlying physical devices. @@ -111,39 +120,43 @@ class HAILORTAPI VDevice */ Expected create_configure_params(Hef &hef, const std::string &network_group_name) const; - // TODO: Also link to async infer - ConfiguredInferModel, Bindings etc. Just like we did for - // InputStream::write_async and OutputStream::read_async (HRT-11039) /** * Maps the buffer pointed to by @a address for DMA transfers to/from this vdevice, in the specified * @a data_direction. - * DMA mapping of buffers in advance may improve the performance of `InputStream::write_async()` or - * `OutputStream::read_async()`. This improvement will be realized if the buffer is reused multiple times - * across different async operations. - * - For buffers that will be written to the vdevice via `InputStream::write_async()`, use `HAILO_H2D_STREAM` - * for the @a direction parameter. - * - For buffers that will be read from the vdevice via `OutputStream::read_async()`, use `HAILO_D2H_STREAM` - * for the @a direction parameter. + * DMA mapping of buffers in advance may improve the performance of async API. This improvement will become + * apparent when the buffer is reused multiple times across different async operations. + * + * For high level API (aka InferModel), buffers bound using ConfiguredInferModel::Bindings::InferStream::set_buffer + * can be mapped. + * + * For low level API (aka InputStream/OutputStream), buffers passed to InputStream::write_async and + * OutputStream::read_async can be mapped. + * + * @param[in] address The address of the buffer to be mapped. + * @param[in] size The buffer's size in bytes. + * @param[in] direction The direction of the mapping. For input streams, use `HAILO_DMA_BUFFER_DIRECTION_H2D` + * and for output streams, use `HAILO_DMA_BUFFER_DIRECTION_D2H`. * - * @param[in] address The address of the buffer to be mapped - * @param[in] size The buffer's size in bytes - * @param[in] direction The direction of the mapping * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. - * @note The DMA mapping will be freed upon calling dma_unmap() with @a address and @a data_direction, or when the - * @a VDevice object is destroyed. - * @note The buffer pointed to by @a address cannot be freed until it is unmapped (via dma_unmap() or @a VDevice + * + * @note The DMA mapping will be released upon calling dma_unmap() with @a address, @a size and @a data_direction, or + * when the @a VDevice object is destroyed. + * @note The buffer pointed to by @a address cannot be released until it is unmapped (via dma_unmap() or @a VDevice * destruction). */ - virtual hailo_status dma_map(void *address, size_t size, hailo_stream_direction_t direction); + virtual hailo_status dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction) = 0; /** * Un-maps a buffer buffer pointed to by @a address for DMA transfers to/from this vdevice, in the direction * @a direction. * - * @param[in] address The address of the buffer to be un-mapped - * @param[in] direction The direction of the mapping + * @param[in] address The address of the buffer to be un-mapped. + * @param[in] size The buffer's size in bytes. + * @param[in] direction The direction of the mapping. + * * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. */ - virtual hailo_status dma_unmap(void *address, hailo_stream_direction_t direction); + virtual hailo_status dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction) = 0; virtual hailo_status before_fork(); virtual hailo_status after_fork_in_parent(); diff --git a/hailort/libhailort/include/hailo/vstream.hpp b/hailort/libhailort/include/hailo/vstream.hpp index 76d17f71..fd3cd30f 100644 --- a/hailort/libhailort/include/hailo/vstream.hpp +++ b/hailort/libhailort/include/hailo/vstream.hpp @@ -28,7 +28,7 @@ class HAILORTAPI InputVStream static Expected create(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, std::shared_ptr pipeline_exit, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, EventPtr core_op_activated_event, + std::shared_ptr> &&pipeline_status, EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator); InputVStream(InputVStream &&other) noexcept = default; InputVStream &operator=(InputVStream &&other) noexcept = default; @@ -50,6 +50,7 @@ class HAILORTAPI InputVStream * @param[in] buffer The buffer containing pointers to the planes where the data to * be sent to the device is stored. * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + * @note Currently only support memory_type field of buffer to be HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR. */ hailo_status write(const hailo_pix_buffer_t &buffer); @@ -202,7 +203,7 @@ class HAILORTAPI OutputVStream const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, std::vector> &&pipeline, std::shared_ptr> &&pipeline_status, - EventPtr shutdown_event, EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator); + EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator); OutputVStream(OutputVStream &&other) noexcept = default; OutputVStream &operator=(OutputVStream &&other) noexcept = default; virtual ~OutputVStream() = default; @@ -302,8 +303,8 @@ class HAILORTAPI OutputVStream /** * Gets a reference to a map between pipeline element names to their respective queue size accumulators. - * These accumulators measure the number of free buffers in the queue, right before a buffer is removed - * from the queue to be used. + * These accumulators measure the number of buffers in the queue, waiting to be processed downstream. + * The measurements take place right before we try to enqueue the next buffer. * * @return A const reference to a map between pipeline element names to their respective queue size accumulators. * @note Queue size accumulators are created for pipeline elements, if the vstream is created with the flag @@ -352,10 +353,23 @@ class HAILORTAPI OutputVStream * * @param[in] max_proposals_per_class NMS max proposals per class to set. * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. - * @note This function will fail in cases where the output vstream has no NMS operations on the CPU. + * @note This function must be called before starting inference! + * This function will fail in cases where the output vstream has no NMS operations on the CPU. */ hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class); + /** + * Set maximum accumulated mask size for all the detections in a frame. + * + * Note: Used in order to change the output buffer frame size, + * in cases where the output buffer is too small for all the segmentation detections. + * + * @param[in] max_accumulated_mask_size NMS max accumulated mask size. + * @note This function must be called before starting inference! + * This function will fail in cases where the output vstream has no NMS operations on the CPU. + */ + hailo_status set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size); + bool is_aborted(); diff --git a/hailort/libhailort/src/CMakeLists.txt b/hailort/libhailort/src/CMakeLists.txt index 51b0c90b..b9921de5 100644 --- a/hailort/libhailort/src/CMakeLists.txt +++ b/hailort/libhailort/src/CMakeLists.txt @@ -7,6 +7,7 @@ include(CMakePackageConfigHelpers) include(${CMAKE_CURRENT_SOURCE_DIR}/../../cmake/common_compiler_options.cmake) include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/spdlog.cmake) include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/readerwriterqueue.cmake) +include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/eigen.cmake) FUNCTION(relative_to_absolute_paths output) SET(listVar "") @@ -53,8 +54,10 @@ relative_to_absolute_paths(C_OS_SOURCES ${C_OS_SOURCES}) relative_to_absolute_paths(COMMON_C_SOURCES ${COMMON_C_SOURCES}) relative_to_absolute_paths(HAILO_OS_DIR ${HAILO_OS_DIR}) relative_to_absolute_paths(HAILO_FULL_OS_DIR ${HAILO_FULL_OS_DIR}) +relative_to_absolute_paths(HAILO_DRIVER_SRC_FILES ${HAILO_DRIVER_SRC_FILES}) set(HAILO_OS_DIR ${HAILO_OS_DIR} CACHE INTERNAL "Absolute path of os-dir") set(HAILO_FULL_OS_DIR ${HAILO_FULL_OS_DIR} CACHE INTERNAL "Absolute Full path of os-dir") +set(HAILO_DRIVER_SRC_FILES ${HAILO_DRIVER_SRC_FILES} CACHE INTERNAL "Absolute Full path of driver src files") set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} CACHE INTERNAL "Absolute paths of hailort's cpp source files") set(COMMON_C_SOURCES ${COMMON_C_SOURCES} CACHE INTERNAL "Absolute paths of common source files") set(HAILORT_SRCS_ABS ${HAILORT_CPP_SOURCES} ${HAILORT_COMMON_CPP_SOURCES} ${COMMON_C_SOURCES} CACHE INTERNAL "All absolute paths of hailort's source files") @@ -87,6 +90,7 @@ target_link_libraries(libhailort PRIVATE profiler_proto) target_link_libraries(libhailort PRIVATE scheduler_mon_proto) target_link_libraries(libhailort PRIVATE spdlog::spdlog) target_link_libraries(libhailort PRIVATE readerwriterqueue) +target_link_libraries(libhailort PRIVATE Eigen3::Eigen) if(HAILO_BUILD_SERVICE) target_link_libraries(libhailort PRIVATE grpc++_unsecure) target_link_libraries(libhailort PRIVATE hailort_rpc_grpc_proto) @@ -101,7 +105,6 @@ set(HAILORT_PUBLIC_HEADERS ${HAILORT_INC_DIR}/hailo/platform.h ${HAILORT_INC_DIR}/hailo/hailort.hpp - ${HAILORT_INC_DIR}/hailo/buffer_storage.hpp ${HAILORT_INC_DIR}/hailo/buffer.hpp ${HAILORT_INC_DIR}/hailo/device.hpp ${HAILORT_INC_DIR}/hailo/event.hpp @@ -119,6 +122,7 @@ set(HAILORT_PUBLIC_HEADERS ${HAILORT_INC_DIR}/hailo/vdevice.hpp ${HAILORT_INC_DIR}/hailo/quantization.hpp ${HAILORT_INC_DIR}/hailo/hailort_defaults.hpp + ${HAILORT_INC_DIR}/hailo/dma_mapped_buffer.hpp ) set_target_properties(libhailort PROPERTIES diff --git a/hailort/libhailort/src/core_op/CMakeLists.txt b/hailort/libhailort/src/core_op/CMakeLists.txt index 79c32184..b2d401d6 100644 --- a/hailort/libhailort/src/core_op/CMakeLists.txt +++ b/hailort/libhailort/src/core_op/CMakeLists.txt @@ -8,8 +8,11 @@ set(SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/config_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/intermediate_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/channel_allocator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/context_switch_buffer_builder.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.cpp ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/periph_calculator.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/internal_buffer_manager.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/internal_buffer_planner.cpp ) set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE) diff --git a/hailort/libhailort/src/core_op/core_op.cpp b/hailort/libhailort/src/core_op/core_op.cpp index 83fcc4f5..d195d041 100644 --- a/hailort/libhailort/src/core_op/core_op.cpp +++ b/hailort/libhailort/src/core_op/core_op.cpp @@ -16,7 +16,6 @@ #include "core_op/core_op.hpp" #include "core_op/resource_manager/resource_manager.hpp" -#include "hef/hef_internal.hpp" #include "eth/eth_stream.hpp" #include "vdma/vdma_stream.hpp" #include "mipi/mipi_stream.hpp" @@ -139,11 +138,12 @@ hailo_status CoreOp::activate(uint16_t dynamic_batch_size) } m_active_core_op_holder.clear(); } - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { return status; } CHECK_SUCCESS(status); + //TODO: HRT-13019 - Unite with the calculation in vmda_config_core_op.cpp const auto elapsed_time_ms = std::chrono::duration( std::chrono::steady_clock::now() - start_time).count(); @@ -185,6 +185,7 @@ hailo_status CoreOp::deactivate() LOGGER__ERROR("Failed deactivating core-op (status {})", deactivate_status); } + //TODO: HRT-13019 - Unite with the calculation in vmda_config_core_op.cpp const auto elapsed_time_ms = std::chrono::duration( std::chrono::steady_clock::now() - start_time).count(); LOGGER__INFO("Deactivating took {} ms", elapsed_time_ms); @@ -289,7 +290,7 @@ hailo_status CoreOp::activate_low_level_streams() { for (auto &name_pair : m_input_streams) { auto status = name_pair.second->activate_stream(); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { LOGGER__INFO("Stream {} activation failed because it was aborted by user", name_pair.first); return status; } @@ -297,7 +298,7 @@ hailo_status CoreOp::activate_low_level_streams() } for (auto &name_pair : m_output_streams) { auto status = name_pair.second->activate_stream(); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { LOGGER__INFO("Stream {} activation failed because it was aborted by user", name_pair.first); return status; } @@ -532,8 +533,8 @@ hailo_status CoreOp::infer_async_impl(std::unordered_mapsecond.get_total_transfer_size(), input.second->get_frame_size()); - auto status = input.second->write_async(std::move(transfer->second)); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + auto status = input.second->write_async(TransferRequest{transfer->second}); + if (HAILO_STREAM_ABORT == status) { return status; } CHECK_SUCCESS(status); @@ -548,8 +549,8 @@ hailo_status CoreOp::infer_async_impl(std::unordered_mapsecond.get_total_transfer_size(), output.second->get_frame_size()); - auto status = output.second->read_async(std::move(transfer->second)); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + auto status = output.second->read_async(TransferRequest{transfer->second}); + if (HAILO_STREAM_ABORT == status) { return status; } CHECK_SUCCESS(status); @@ -563,8 +564,13 @@ TransferDoneCallback CoreOp::wrap_user_callback(TransferDoneCallback &&original_ std::shared_ptr state, TransferDoneCallback infer_callback) { - return [original_callback, state, infer_callback](hailo_status status) { - original_callback(status); + return [original_callback, state, infer_callback](hailo_status status) mutable { + { + // Before calling infer_callback, we must ensure all stream callbacks were called and released (since the + // user may capture some variables in the callbacks). + auto moved_callback = std::move(original_callback); + moved_callback(status); + } if (HAILO_SUCCESS != status) { state->status = status; @@ -638,7 +644,7 @@ Expected> CoreOp::create_output_stream_from_co const auto max_queue_size = batch_size * MAX_ACTIVE_TRANSFERS_SCALE; auto nms_stream = NmsOutputStream::create(base_stream, layer_info.value(), max_queue_size, - m_core_op_activated_event); + m_core_op_activated_event, stream_params.stream_interface); CHECK_EXPECTED(nms_stream); output_stream = nms_stream.release(); } diff --git a/hailort/libhailort/src/core_op/core_op.hpp b/hailort/libhailort/src/core_op/core_op.hpp index 6672c4b3..17f350e9 100644 --- a/hailort/libhailort/src/core_op/core_op.hpp +++ b/hailort/libhailort/src/core_op/core_op.hpp @@ -20,10 +20,10 @@ #define _HAILO_CORE_OP_HPP_ #include "hailo/network_group.hpp" +#include "hailo/device.hpp" #include "common/latency_meter.hpp" -#include "hef/hef_internal.hpp" #include "hef/core_op_metadata.hpp" #include "control_protocol.h" #include "core_op/active_core_op_holder.hpp" @@ -80,7 +80,7 @@ class CoreOp hailo_status activate(uint16_t dynamic_batch_size = CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE); hailo_status deactivate(); - // Shutdown the core-op, make sure all ongoing transfers are completed with status HAILO_STREAM_ABORTED_BY_USER + // Shutdown the core-op, make sure all ongoing transfers are completed with status HAILO_STREAM_ABORT virtual hailo_status shutdown() = 0; virtual hailo_status activate_impl(uint16_t dynamic_batch_size = CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE) = 0; @@ -155,7 +155,7 @@ class CoreOp // Launch write_async/read_async on all streams with wrapped callback. // We remove all transfer that was launched successfully from transfers in order to call those callback - // with HAILO_STREAM_ABORTED_BY_USER status on the case of a failure. + // with HAILO_STREAM_ABORT status on the case of a failure. hailo_status infer_async_impl(std::unordered_map &transfers, std::shared_ptr state, TransferDoneCallback done_callback); diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp new file mode 100644 index 00000000..03b360c4 --- /dev/null +++ b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp @@ -0,0 +1,53 @@ +/** + * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file action_list_buffer_builder.hpp + * @brief Pure virtual class that represents the basic functions and members for building the action list for the FW. + * Implemented and derived by two different classes: + * ControlActionListBufferBuilder - uses control messages to send Action list to FW + * DDRActionListBufferBuilder (only relevant in hailo1x) - Action list is written to M4 mapped memory in DDR - and read + * from there directly by FW + **/ +#ifndef _HAILO_ACTION_LIST_BUFFER_BUILDER_HPP_ +#define _HAILO_ACTION_LIST_BUFFER_BUILDER_HPP_ + +#include "hailo/hailort.h" +#include "hailo/expected.hpp" +#include "hailo/buffer.hpp" + +#include + +#include "control_protocol.h" + +namespace hailort +{ + +class ActionListBufferBuilder { +public: + enum class Type { + CONTROL, + DDR + }; + + virtual hailo_status write_action(MemoryView action, CONTROL_PROTOCOL__context_switch_context_type_t context_type, + bool is_new_context, bool last_action_buffer_in_context) = 0; + + virtual uint64_t get_mapped_buffer_dma_address() const = 0; + + ActionListBufferBuilder::Type get_builder_type() const { + return m_builder_type; + } +protected: + ActionListBufferBuilder(ActionListBufferBuilder::Type builder_type) : + m_builder_type(builder_type) + {} + virtual ~ActionListBufferBuilder() = default; +private: + const ActionListBufferBuilder::Type m_builder_type; +}; + +} /* namespace hailort */ + +#endif /* _HAILO_ACTION_LIST_BUFFER_BUILDER_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.cpp new file mode 100644 index 00000000..92ccf303 --- /dev/null +++ b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.cpp @@ -0,0 +1,70 @@ +/** + * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file control_action_list_buffer_builder.cpp + * @brief Class used to build the vector of controls containing the action list content sent to the firmware. + **/ + +#include "control_action_list_buffer_builder.hpp" + +namespace hailort +{ + +ControlActionListBufferBuilder::ControlActionListBufferBuilder() : + ActionListBufferBuilder(ActionListBufferBuilder::Type::CONTROL) +{} + +Expected> ControlActionListBufferBuilder::create() +{ + return make_shared_nothrow(); +} + +hailo_status ControlActionListBufferBuilder::write_action(MemoryView action, + CONTROL_PROTOCOL__context_switch_context_type_t context_type, bool is_new_context, bool last_action_buffer_in_context) +{ + (void) last_action_buffer_in_context; + assert(action.size() < std::numeric_limits::max()); + const uint32_t action_size = static_cast(action.size()); + const auto should_start_new_control = (is_new_context || !has_space_for_action(action_size)); + + if (should_start_new_control) { + start_new_control(context_type, is_new_context); + } + + auto &control = current_control(); + memcpy(&control.context_network_data[control.context_network_data_length], action.data(), action_size); + control.context_network_data_length += action_size; + return HAILO_SUCCESS; +} + +CONTROL_PROTOCOL__context_switch_context_info_chunk_t &ControlActionListBufferBuilder::current_control() +{ + assert(!m_controls.empty()); + return m_controls.back(); +} + +bool ControlActionListBufferBuilder::has_space_for_action(uint32_t action_size) +{ + auto &control = current_control(); + return (control.context_network_data_length + action_size) <= CONTROL_PROTOCOL__CONTEXT_NETWORK_DATA_SINGLE_CONTROL_MAX_SIZE; +} + +void ControlActionListBufferBuilder::start_new_control(CONTROL_PROTOCOL__context_switch_context_type_t context_type, + bool is_new_context) +{ + if (!is_new_context) { + current_control().is_last_chunk_per_context = false; + } + + // Creating a new control directly inside the vector to avoid copying the control struct. + m_controls.emplace_back(); + auto &new_control = current_control(); + new_control.context_network_data_length = 0; + new_control.context_type = static_cast(context_type); + new_control.is_first_chunk_per_context = is_new_context; + new_control.is_last_chunk_per_context = true; +} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.hpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.hpp new file mode 100644 index 00000000..d417df58 --- /dev/null +++ b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.hpp @@ -0,0 +1,55 @@ +/** + * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file control_action_list_buffer_builder.cpp + * @brief Class used to build the vector of controls containing the action list content sent to the firmware. + **/ + +#ifndef _HAILO_CONTROL_ACTION_LIST_BUFFER_BUILDER_HPP_ +#define _HAILO_CONTROL_ACTION_LIST_BUFFER_BUILDER_HPP_ + +#include "hailo/hailort.h" + +#include "context_switch_defs.h" +#include "core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp" + +#include "vdma/channel/channel_id.hpp" +#include "device_common/control_protocol.hpp" +#include "hef/layer_info.hpp" + + +namespace hailort +{ + +// This class manages a vector of CONTROL_PROTOCOL__context_switch_context_info_chunk_t controls to be sent +// to the firmware. Actions are written to the control buffer, until we reach the maximum control size, then we will +// start a new control. +class ControlActionListBufferBuilder : public ActionListBufferBuilder { +public: + ControlActionListBufferBuilder(); + static Expected> create(); + virtual ~ControlActionListBufferBuilder() = default; + + virtual hailo_status write_action(MemoryView action, CONTROL_PROTOCOL__context_switch_context_type_t context_type, + bool is_new_context, bool last_action_buffer_in_context) override; + + virtual uint64_t get_mapped_buffer_dma_address() const override { + return CONTEXT_SWITCH_DEFS__INVALID_DDR_CONTEXTS_BUFFER_ADDRESS; + } + + const std::vector &get_controls() const { + return m_controls; + } +private: + CONTROL_PROTOCOL__context_switch_context_info_chunk_t ¤t_control(); + bool has_space_for_action(uint32_t action_size); + void start_new_control(CONTROL_PROTOCOL__context_switch_context_type_t context_type, bool is_new_context); + + std::vector m_controls; +}; + +} /* namespace hailort */ + +#endif /* _HAILO_CONTROL_ACTION_LIST_BUFFER_BUILDER_HPP_ */ diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.cpp new file mode 100644 index 00000000..ee9b179a --- /dev/null +++ b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.cpp @@ -0,0 +1,93 @@ +/** + * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file ddr_action_list_buffer_builder.cpp + * @brief Class used to build the action list sent to the firmware through DDR. + **/ + +#include "ddr_action_list_buffer_builder.hpp" + +namespace hailort +{ + +// TODO: HRT-12512 : Can remove these variables when / if continuous buffer comes from designated region +// In hailo15 - the DDR memory range of 0x80000000 - 0x90000000 is mapped to the M4 using a LUT (look up table) to addresses +// 0x50000000 - 0x60000000, Currently this is the range the CMA allocation should come from seeing as this is one of the first CMA allocations +// and the linux cma memory pool according to the hailo15 dtsi is - "alloc-ranges = <0 0x80000000 0 0x40000000>" +// (meaning starts from 0x80000000 and goes for 992 MB) - so anything allocated from 0x90000000 and on ward will be outside the mapped area +// The solution to this issue is to create a specific range for this allocation inide the mapped area - seeing as this affects other components +// Like the dsp etc...need to check with them before doing so. For now - this should almost always retirn in the mapped area and we will verify +// to double check + +DDRActionListBufferBuilder::DDRActionListBufferBuilder(vdma::ContinuousBuffer &&buffer) : + ActionListBufferBuilder(ActionListBufferBuilder::Type::DDR), + m_action_list_buffer(std::move(buffer)), + m_write_offset(0), + m_current_context_info{} +{} + +bool DDRActionListBufferBuilder::verify_dma_addr(vdma::ContinuousBuffer &buffer) +{ + // verify that buffer starts and ends inside mapped range + if (buffer.dma_address() < CONTEXT_SWITCH_DEFS__START_M4_MAPPED_DDR_ADDRESS || + (buffer.dma_address() + buffer.size() >= CONTEXT_SWITCH_DEFS__END_M4_MAPPED_DDR_ADDRESS)) { + return false; + } + return true; +} + +Expected> DDRActionListBufferBuilder::create(size_t num_contexts, + HailoRTDriver &driver) +{ + // Try to allocate continous buffer for action list in DDR + auto continous_alloc = vdma::ContinuousBuffer::create(num_contexts * + sizeof(CONTROL_PROTOCOL__context_switch_context_info_chunk_t), driver); + + // TODO HRT-12512 - Add fallback to Control if continous buffer allocation fails + CHECK_EXPECTED(continous_alloc); + // Verify that continous buffer is in allocated region + CHECK_AS_EXPECTED(verify_dma_addr(continous_alloc.value()), HAILO_INTERNAL_FAILURE, + "Failed to allocate continous buffer in M4 mapped memory region"); + return make_shared_nothrow(continous_alloc.release()); +} + +hailo_status DDRActionListBufferBuilder::write_action(MemoryView action, + CONTROL_PROTOCOL__context_switch_context_type_t context_type, bool is_new_context, bool is_last_action_in_context) +{ + assert(action.size() < std::numeric_limits::max()); + const uint32_t action_size = static_cast(action.size()); + + if (is_new_context) { + m_current_context_info.is_first_chunk_per_context = true; + m_current_context_info.is_last_chunk_per_context = true; + m_current_context_info.context_type = static_cast(context_type); + m_current_context_info.context_network_data_length = 0; + } + + CHECK(m_current_context_info.context_network_data_length + action_size <= + ARRAY_ENTRIES(m_current_context_info.context_network_data), HAILO_INVALID_ARGUMENT, + "Context exceeds maximum context size {}", ARRAY_ENTRIES(m_current_context_info.context_network_data)); + + // TODO HRT-12788 - make more efficient by writing directly to DDR without using the local context_info_single_control_t + memcpy(&(m_current_context_info.context_network_data[m_current_context_info.context_network_data_length]), + action.data(), action_size); + m_current_context_info.context_network_data_length += action_size; + + if (is_last_action_in_context) { + const auto write_size = sizeof(CONTROL_PROTOCOL__context_switch_context_info_chunk_t); + auto status = m_action_list_buffer.write(&m_current_context_info, write_size, m_write_offset); + CHECK_SUCCESS(status); + m_write_offset += write_size; + } + + return HAILO_SUCCESS; +} + +uint64_t DDRActionListBufferBuilder::get_mapped_buffer_dma_address() const +{ + return m_action_list_buffer.dma_address(); +} + +} /* namespace hailort */ \ No newline at end of file diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.hpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.hpp new file mode 100644 index 00000000..05b4b214 --- /dev/null +++ b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.hpp @@ -0,0 +1,43 @@ +/** + * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file ddr_action_list_buffer_builder.hpp + * @brief Class used to build the action list sent to the firmware through DDR. + **/ +#ifndef _HAILO_DDR_ACTION_LIST_BUFFER_BUILDER_HPP_ +#define _HAILO_DDR_ACTION_LIST_BUFFER_BUILDER_HPP_ + +#include "hailo/hailort.h" +#include "context_switch_defs.h" +#include "core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp" +#include "vdma/memory/continuous_buffer.hpp" + +#define DDR_ACTION_LIST_ENV_VAR ("HAILO_DDR_ACTION_LIST") +#define DDR_ACTION_LIST_ENV_VAR_VALUE ("1") + +namespace hailort +{ + +class DDRActionListBufferBuilder : public ActionListBufferBuilder { +public: + DDRActionListBufferBuilder(vdma::ContinuousBuffer &&buffer); + virtual ~DDRActionListBufferBuilder() = default; + static Expected> create(size_t num_contexts, HailoRTDriver &driver); + + virtual hailo_status write_action(MemoryView action, CONTROL_PROTOCOL__context_switch_context_type_t context_type, + bool is_new_context, bool last_action_buffer_in_context) override; + + virtual uint64_t get_mapped_buffer_dma_address() const override; +private: + vdma::ContinuousBuffer m_action_list_buffer; + // TODO: HRT-12512 : Can remove this check when / if continuous buffer comes from designated region + static bool verify_dma_addr(vdma::ContinuousBuffer &buffer); + size_t m_write_offset; + CONTROL_PROTOCOL__context_switch_context_info_chunk_t m_current_context_info; +}; + +} /* namespace hailort */ + +#endif /* _HAILO_DDR_ACTION_LIST_BUFFER_BUILDER_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp b/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp index 12385b51..ecc323df 100644 --- a/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp +++ b/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp @@ -9,8 +9,8 @@ */ #include "core_op/resource_manager/config_buffer.hpp" -#include "vdma/memory/sg_buffer.hpp" -#include "vdma/memory/continuous_buffer.hpp" +#include "vdma/memory/sg_edge_layer.hpp" +#include "vdma/memory/continuous_edge_layer.hpp" #include "vdma/memory/buffer_requirements.hpp" #include @@ -18,7 +18,7 @@ namespace hailort { -Expected> ConfigBuffer::create_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id, +Expected> ConfigBuffer::create_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id, const std::vector &cfg_sizes, const uint32_t buffer_size) { auto buffer_ptr = should_use_ccb(driver) ? @@ -43,7 +43,7 @@ Expected ConfigBuffer::create(HailoRTDriver &driver, vdma::Channel return ConfigBuffer(buffer_ptr.release(), channel_id, buffer_size); } -ConfigBuffer::ConfigBuffer(std::unique_ptr &&buffer, +ConfigBuffer::ConfigBuffer(std::unique_ptr &&buffer, vdma::ChannelId channel_id, size_t total_buffer_size) : m_buffer(std::move(buffer)), m_channel_id(channel_id), @@ -55,7 +55,7 @@ Expected ConfigBuffer::program_descriptors() { // TODO HRT-9657: remove DEVICE interrupts auto descriptors_count = - m_buffer->program_descriptors(m_acc_buffer_offset, vdma::InterruptsDomain::DEVICE, m_acc_desc_count); + m_buffer->program_descriptors(m_acc_buffer_offset, InterruptsDomain::DEVICE, m_acc_desc_count); CHECK_EXPECTED(descriptors_count); m_acc_desc_count += descriptors_count.value(); @@ -71,7 +71,7 @@ hailo_status ConfigBuffer::pad_with_nops() auto page_size = desc_page_size(); auto buffer_size = m_total_buffer_size; auto buffer_residue = buffer_size % page_size; - if (0 != buffer_residue % CCW_HEADER_SIZE) { + if (0 != (page_size - buffer_residue) % CCW_HEADER_SIZE) { LOGGER__ERROR("CFG channel buffer size must be a multiple of CCW header size ({})", CCW_HEADER_SIZE); return HAILO_INTERNAL_FAILURE; } @@ -135,40 +135,56 @@ hailo_status ConfigBuffer::write_inner(const MemoryView &data) return HAILO_SUCCESS; } -Expected> ConfigBuffer::create_sg_buffer(HailoRTDriver &driver, +Expected> ConfigBuffer::create_sg_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id, const std::vector &cfg_sizes) { - static const bool NOT_CIRCULAR = false; + static const auto NOT_CIRCULAR = false; // For config channels (In Hailo15), the page size must be a multiplication of host default page size. // Therefore we use the flag force_default_page_size for those types of buffers. - auto const FORCE_DEFAULT_PAGE_SIZE = true; - auto const FORCE_BATCH_SIZE = true; - auto buffer_size_requirements = vdma::BufferSizesRequirements::get_sg_buffer_requirements_multiple_transfers( - driver.desc_max_page_size(), 1, cfg_sizes, NOT_CIRCULAR, FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE); + static const auto FORCE_DEFAULT_PAGE_SIZE = true; + static const auto FORCE_BATCH_SIZE = true; + auto buffer_size_requirements = vdma::BufferSizesRequirements::get_buffer_requirements_multiple_transfers( + vdma::VdmaBuffer::Type::SCATTER_GATHER, driver.desc_max_page_size(), 1, cfg_sizes, NOT_CIRCULAR, + FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE); CHECK_EXPECTED(buffer_size_requirements); const auto page_size = buffer_size_requirements->desc_page_size(); const auto descs_count = buffer_size_requirements->descs_count(); const auto buffer_size = buffer_size_requirements->buffer_size(); - auto buffer = vdma::SgBuffer::create(driver, buffer_size, descs_count, page_size, NOT_CIRCULAR, - HailoRTDriver::DmaDirection::H2D, channel_id); + auto buffer = vdma::SgBuffer::create(driver, buffer_size, HailoRTDriver::DmaDirection::H2D); CHECK_EXPECTED(buffer); - auto buffer_ptr = make_unique_nothrow(buffer.release()); + auto buffer_ptr = make_shared_nothrow(buffer.release()); CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); - return std::unique_ptr(std::move(buffer_ptr)); + static const auto DEFAULT_OFFSET = 0; + auto edge_layer = vdma::SgEdgeLayer::create(std::move(buffer_ptr), buffer_size, DEFAULT_OFFSET, driver, descs_count, + page_size, NOT_CIRCULAR, channel_id); + CHECK_EXPECTED(edge_layer); + + auto edge_layer_ptr = make_unique_nothrow(edge_layer.release()); + CHECK_NOT_NULL_AS_EXPECTED(edge_layer_ptr, HAILO_OUT_OF_HOST_MEMORY); + + return std::unique_ptr(std::move(edge_layer_ptr)); } -Expected> ConfigBuffer::create_ccb_buffer(HailoRTDriver &driver, +Expected> ConfigBuffer::create_ccb_buffer(HailoRTDriver &driver, uint32_t buffer_size) { - static const bool NOT_CIRCULAR = false; - static const uint16_t SINGLE_TRANSFER = 1; - auto buffer_size_requirements = vdma::BufferSizesRequirements::get_ccb_buffer_requirements_single_transfer( - SINGLE_TRANSFER, buffer_size, NOT_CIRCULAR); + static const auto NOT_CIRCULAR = false; + // For config channels (In Hailo15), the page size must be a multiplication of host default page size. + // Therefore we use the flag force_default_page_size for those types of buffers. + static const auto FORCE_DEFAULT_PAGE_SIZE = true; + static const auto FORCE_BATCH_SIZE = true; + static const auto DEFAULT_BATCH_SIZE = 1; + static const auto IS_VDMA_ALIGNED_BUFFER = true; + auto buffer_size_requirements = vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer( + vdma::VdmaBuffer::Type::CONTINUOUS, driver.desc_max_page_size(), DEFAULT_BATCH_SIZE, DEFAULT_BATCH_SIZE, + buffer_size, NOT_CIRCULAR, FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER); CHECK_EXPECTED(buffer_size_requirements); + const auto page_size = buffer_size_requirements->desc_page_size(); + const auto descs_count = buffer_size_requirements->descs_count(); auto buffer = vdma::ContinuousBuffer::create(buffer_size_requirements->buffer_size(), driver); /* Don't print error here since this might be expected error that the libhailoRT can recover from (out of host memory). If it's not the case, there is a print in hailort_driver.cpp file */ @@ -178,10 +194,17 @@ Expected> ConfigBuffer::create_ccb_buffer(Hail CHECK_EXPECTED(buffer); } - auto buffer_ptr = make_unique_nothrow(buffer.release()); + auto buffer_ptr = make_shared_nothrow(buffer.release()); CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); - return std::unique_ptr(std::move(buffer_ptr)); + static const auto DEFAULT_OFFSET = 0; + auto edge_layer = vdma::ContinuousEdgeLayer::create(std::move(buffer_ptr), buffer_size, DEFAULT_OFFSET, page_size, descs_count); + CHECK_EXPECTED(edge_layer); + + auto edge_layer_ptr = make_unique_nothrow(edge_layer.release()); + CHECK_NOT_NULL_AS_EXPECTED(edge_layer_ptr, HAILO_OUT_OF_HOST_MEMORY); + + return std::unique_ptr(std::move(edge_layer_ptr)); } bool ConfigBuffer::should_use_ccb(HailoRTDriver &driver) diff --git a/hailort/libhailort/src/core_op/resource_manager/config_buffer.hpp b/hailort/libhailort/src/core_op/resource_manager/config_buffer.hpp index 534bab52..45695216 100644 --- a/hailort/libhailort/src/core_op/resource_manager/config_buffer.hpp +++ b/hailort/libhailort/src/core_op/resource_manager/config_buffer.hpp @@ -13,7 +13,7 @@ #include "hailo/buffer.hpp" -#include "vdma/memory/vdma_buffer.hpp" +#include "vdma/memory/vdma_edge_layer.hpp" namespace hailort { @@ -48,20 +48,20 @@ class ConfigBuffer final CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info() const; private: - ConfigBuffer(std::unique_ptr &&buffer, vdma::ChannelId channel_id, size_t total_buffer_size); + ConfigBuffer(std::unique_ptr &&buffer, vdma::ChannelId channel_id, size_t total_buffer_size); hailo_status write_inner(const MemoryView &data); - static Expected> create_sg_buffer(HailoRTDriver &driver, + static Expected> create_sg_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id, const std::vector &cfg_sizes); - static Expected> create_ccb_buffer(HailoRTDriver &driver, + static Expected> create_ccb_buffer(HailoRTDriver &driver, uint32_t buffer_size); - static Expected> create_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id, + static Expected> create_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id, const std::vector &cfg_sizes, const uint32_t buffer_size); static bool should_use_ccb(HailoRTDriver &driver); - std::unique_ptr m_buffer; + std::unique_ptr m_buffer; vdma::ChannelId m_channel_id; const size_t m_total_buffer_size; size_t m_acc_buffer_offset; diff --git a/hailort/libhailort/src/core_op/resource_manager/context_switch_buffer_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/context_switch_buffer_builder.cpp deleted file mode 100644 index 5684abc6..00000000 --- a/hailort/libhailort/src/core_op/resource_manager/context_switch_buffer_builder.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/** - * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) -**/ -/** - * @file context_switch_buffer_builder.cpp - * @brief Class used to build the context switch buffer sent to the firmware - **/ - -#include "context_switch_buffer_builder.hpp" - -namespace hailort -{ - -ContextSwitchBufferBuilder::ContextSwitchBufferBuilder(CONTROL_PROTOCOL__context_switch_context_type_t context_type) : - m_context_type(context_type) -{ - // Initialize first control - start_new_control(); -} - -void ContextSwitchBufferBuilder::write_action(MemoryView action) -{ - assert(action.size() < std::numeric_limits::max()); - const uint32_t action_size = static_cast(action.size()); - - if (!has_space_for_action(action_size)) { - // Size exceeded single control size, creating a new control buffer. - start_new_control(); - } - - auto &control = current_control(); - memcpy(&control.context_network_data[control.context_network_data_length], action.data(), action_size); - control.context_network_data_length += action_size; -} - -const std::vector &ContextSwitchBufferBuilder::get_controls() const -{ - return m_controls; -} - -const CONTROL_PROTOCOL__context_switch_context_type_t &ContextSwitchBufferBuilder::get_context_type() const -{ - return m_context_type; -} - -CONTROL_PROTOCOL__context_switch_context_info_single_control_t &ContextSwitchBufferBuilder::current_control() -{ - assert(!m_controls.empty()); - return m_controls.back(); -} - -bool ContextSwitchBufferBuilder::has_space_for_action(uint32_t action_size) -{ - auto &control = current_control(); - return (control.context_network_data_length + action_size) <= ARRAY_ENTRIES(control.context_network_data); -} - -void ContextSwitchBufferBuilder::start_new_control() -{ - if (!m_controls.empty()) { - current_control().is_last_control_per_context = false; - } - - // Creating a new control directly inside the vector to avoid copying the control struct. - m_controls.emplace_back(); - auto &new_control = current_control(); - new_control.context_network_data_length = 0; - new_control.context_type = static_cast(m_context_type); - new_control.is_first_control_per_context = (1 == m_controls.size()); - new_control.is_last_control_per_context = true; -} - -} /* namespace hailort */ diff --git a/hailort/libhailort/src/core_op/resource_manager/context_switch_buffer_builder.hpp b/hailort/libhailort/src/core_op/resource_manager/context_switch_buffer_builder.hpp deleted file mode 100644 index a121761c..00000000 --- a/hailort/libhailort/src/core_op/resource_manager/context_switch_buffer_builder.hpp +++ /dev/null @@ -1,45 +0,0 @@ -/** - * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) -**/ -/** - * @file context_switch_buffer_builder.hpp - * @brief Class used to build the context switch buffer sent to the firmware. - **/ - -#ifndef _HAILO_CONTEXT_SWITCH_BUFFER_BUILDER_HPP_ -#define _HAILO_CONTEXT_SWITCH_BUFFER_BUILDER_HPP_ - -#include "hailo/hailort.h" - -#include "vdma/channel/channel_id.hpp" -#include "device_common/control_protocol.hpp" -#include "hef/layer_info.hpp" - - -namespace hailort -{ - -// This class manages a vector of CONTROL_PROTOCOL__context_switch_context_info_single_control_t controls to be sent -// to the firmware. Actions are written to the control buffer, until we reach the maximum control size, then we will -// start a new control. -class ContextSwitchBufferBuilder final { -public: - ContextSwitchBufferBuilder(CONTROL_PROTOCOL__context_switch_context_type_t context_type); - - void write_action(MemoryView action); - const std::vector &get_controls() const; - const CONTROL_PROTOCOL__context_switch_context_type_t &get_context_type() const; - -private: - CONTROL_PROTOCOL__context_switch_context_info_single_control_t ¤t_control(); - bool has_space_for_action(uint32_t action_size); - void start_new_control(); - - CONTROL_PROTOCOL__context_switch_context_type_t m_context_type; - std::vector m_controls; -}; - -} /* namespace hailort */ - -#endif /* _HAILO_CONTEXT_SWITCH_BUFFER_BUILDER_HPP_ */ diff --git a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp index 59be948b..77b2e801 100644 --- a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp +++ b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp @@ -10,35 +10,33 @@ #include "intermediate_buffer.hpp" #include "core_op/resource_manager/resource_manager.hpp" -#include "vdma/memory/sg_buffer.hpp" -#include "vdma/memory/continuous_buffer.hpp" +#include "vdma/memory/sg_edge_layer.hpp" +#include "vdma/memory/continuous_edge_layer.hpp" #include "vdma/memory/buffer_requirements.hpp" namespace hailort { -Expected> IntermediateBuffer::create_buffer(HailoRTDriver &driver, uint32_t transfer_size, +Expected> IntermediateBuffer::create_edge_layer( + std::shared_ptr &&buffer, size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type) { const bool is_circular = (streaming_type == StreamingType::CIRCULAR_CONTINUOS); - auto buffer_exp = should_use_ccb(driver, streaming_type) ? - create_ccb_buffer(driver, transfer_size, max_batch_size, is_circular) : - create_sg_buffer(driver, transfer_size, max_batch_size, d2h_channel_id, is_circular); + auto buffer_exp = (vdma::VdmaBuffer::Type::CONTINUOUS == buffer->type()) ? + create_ccb_edge_layer(std::move(buffer), buffer_offset, driver, transfer_size, max_batch_size, is_circular) : + create_sg_edge_layer(std::move(buffer), buffer_offset, driver, transfer_size, max_batch_size, d2h_channel_id, is_circular); - if (should_use_ccb(driver, streaming_type) && (HAILO_OUT_OF_HOST_CMA_MEMORY == buffer_exp.status())) { - /* Try to use sg buffer instead */ - return create_sg_buffer(driver, transfer_size, max_batch_size, d2h_channel_id, is_circular); - } else { - return buffer_exp; - } + return buffer_exp; } Expected IntermediateBuffer::create(HailoRTDriver &driver, uint32_t transfer_size, - uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type) + uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type, + std::shared_ptr &&buffer, size_t buffer_offset) { - auto buffer_exp = create_buffer(driver, transfer_size, max_batch_size, d2h_channel_id, streaming_type); - CHECK_EXPECTED(buffer_exp); - auto buffer_ptr = buffer_exp.release(); + auto edge_layer_exp = create_edge_layer(std::move(buffer), buffer_offset, driver, transfer_size, max_batch_size, + d2h_channel_id, streaming_type); + CHECK_EXPECTED(edge_layer_exp); + auto edge_layer_ptr = edge_layer_exp.release(); if (streaming_type == StreamingType::BURST) { // We have max_batch_size transfers, so we program them one by one. The last transfer should report interrupt @@ -46,31 +44,31 @@ Expected IntermediateBuffer::create(HailoRTDriver &driver, u size_t acc_offset = 0; for (uint16_t i = 0; i < max_batch_size; i++) { const auto last_desc_interrupts_domain = ((max_batch_size - 1) == i) ? - vdma::InterruptsDomain::DEVICE : vdma::InterruptsDomain::NONE; - auto desc_count_local = buffer_ptr->program_descriptors(transfer_size, last_desc_interrupts_domain, acc_offset); + InterruptsDomain::DEVICE : InterruptsDomain::NONE; + auto desc_count_local = edge_layer_ptr->program_descriptors(transfer_size, last_desc_interrupts_domain, acc_offset); CHECK_EXPECTED(desc_count_local, "Failed to program descs for inter context channels. Given max_batch_size is too big."); acc_offset += desc_count_local.value(); } } else { // Program all descriptors, no need for interrupt. - const auto interrupts_domain = vdma::InterruptsDomain::NONE; - const auto total_size = buffer_ptr->descs_count() * buffer_ptr->desc_page_size(); - auto desc_count_local = buffer_ptr->program_descriptors(total_size, interrupts_domain, 0); + const auto interrupts_domain = InterruptsDomain::NONE; + const auto total_size = edge_layer_ptr->descs_count() * edge_layer_ptr->desc_page_size(); + auto desc_count_local = edge_layer_ptr->program_descriptors(total_size, interrupts_domain, 0); CHECK_EXPECTED(desc_count_local); } - return IntermediateBuffer(std::move(buffer_ptr), transfer_size, max_batch_size); + return IntermediateBuffer(std::move(edge_layer_ptr), transfer_size, max_batch_size); } Expected IntermediateBuffer::read() { const auto size = m_transfer_size * m_dynamic_batch_size; - assert(size <= m_buffer->size()); + assert(size <= m_edge_layer->size()); auto res = Buffer::create(size); CHECK_EXPECTED(res); - auto status = m_buffer->read(res->data(), size, 0); + auto status = m_edge_layer->read(res->data(), size, 0); CHECK_SUCCESS_AS_EXPECTED(status); return res.release(); @@ -78,94 +76,65 @@ Expected IntermediateBuffer::read() CONTROL_PROTOCOL__host_buffer_info_t IntermediateBuffer::get_host_buffer_info() const { - return m_buffer->get_host_buffer_info(m_transfer_size); + return m_edge_layer->get_host_buffer_info(m_transfer_size); } -IntermediateBuffer::IntermediateBuffer(std::unique_ptr &&buffer, uint32_t transfer_size, +IntermediateBuffer::IntermediateBuffer(std::unique_ptr &&edge_layer, uint32_t transfer_size, uint16_t batch_size) : - m_buffer(std::move(buffer)), + m_edge_layer(std::move(edge_layer)), m_transfer_size(transfer_size), m_dynamic_batch_size(batch_size) {} -Expected> IntermediateBuffer::create_sg_buffer(HailoRTDriver &driver, - uint32_t transfer_size, uint16_t batch_size, vdma::ChannelId d2h_channel_id, bool is_circular) +Expected> IntermediateBuffer::create_sg_edge_layer( + std::shared_ptr &&buffer, size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, + uint16_t batch_size, vdma::ChannelId d2h_channel_id, bool is_circular) { - auto const DONT_FORCE_DEFAULT_PAGE_SIZE = false; - auto const FORCE_BATCH_SIZE = true; - auto const IS_VDMA_ALIGNED_BUFFER = true; - auto buffer_requirements = vdma::BufferSizesRequirements::get_sg_buffer_requirements_single_transfer( - driver.desc_max_page_size(), batch_size, batch_size, transfer_size, is_circular, DONT_FORCE_DEFAULT_PAGE_SIZE, - FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER); + static const auto DONT_FORCE_DEFAULT_PAGE_SIZE = false; + static const auto FORCE_BATCH_SIZE = true; + static const auto IS_VDMA_ALIGNED_BUFFER = true; + auto buffer_requirements = vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer( + vdma::VdmaBuffer::Type::SCATTER_GATHER, driver.desc_max_page_size(), batch_size, batch_size, transfer_size, + is_circular, DONT_FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER); CHECK_EXPECTED(buffer_requirements); const auto desc_page_size = buffer_requirements->desc_page_size(); const auto descs_count = buffer_requirements->descs_count(); const auto buffer_size = buffer_requirements->buffer_size(); - auto buffer = vdma::SgBuffer::create(driver, buffer_size, descs_count, desc_page_size, is_circular, - HailoRTDriver::DmaDirection::BOTH, d2h_channel_id); - CHECK_EXPECTED(buffer); + auto edge_layer = vdma::SgEdgeLayer::create(std::dynamic_pointer_cast(buffer), buffer_size, + buffer_offset, driver, descs_count, desc_page_size, is_circular, d2h_channel_id); + CHECK_EXPECTED(edge_layer); - auto buffer_ptr = make_unique_nothrow(buffer.release()); - CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); + auto edge_layer_ptr = make_unique_nothrow(edge_layer.release()); + CHECK_NOT_NULL_AS_EXPECTED(edge_layer_ptr, HAILO_OUT_OF_HOST_MEMORY); - return std::unique_ptr(std::move(buffer_ptr)); + return std::unique_ptr(std::move(edge_layer_ptr)); } -Expected> IntermediateBuffer::create_ccb_buffer(HailoRTDriver &driver, - uint32_t transfer_size, uint16_t batch_size, bool is_circular) +Expected> IntermediateBuffer::create_ccb_edge_layer(std::shared_ptr &&buffer, + size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, uint16_t batch_size, bool is_circular) { - auto buffer_size_requirements = vdma::BufferSizesRequirements::get_ccb_buffer_requirements_single_transfer( - batch_size, transfer_size, is_circular); - CHECK_EXPECTED(buffer_size_requirements); - - auto buffer = vdma::ContinuousBuffer::create(buffer_size_requirements->buffer_size(), driver); - /* Don't print error here since this might be expected error that the libhailoRT can recover from - (out of host memory). If it's not the case, there is a print in hailort_driver.cpp file */ - if (HAILO_OUT_OF_HOST_CMA_MEMORY == buffer.status()) { - return make_unexpected(buffer.status()); - } else { - CHECK_EXPECTED(buffer); - } + static const auto DONT_FORCE_DEFAULT_PAGE_SIZE = false; + static const auto FORCE_BATCH_SIZE = true; + static const auto IS_VDMA_ALIGNED_BUFFER = true; - auto buffer_ptr = make_unique_nothrow(buffer.release()); - CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); + auto buffer_size_requirements = vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer( + vdma::VdmaBuffer::Type::CONTINUOUS, driver.desc_max_page_size(), batch_size, batch_size, transfer_size, + is_circular, DONT_FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER); + CHECK_EXPECTED(buffer_size_requirements); - return std::unique_ptr(std::move(buffer_ptr)); -} + const auto page_size = buffer_size_requirements->desc_page_size(); + const auto descs_count = buffer_size_requirements->descs_count(); + const auto buffer_size = buffer_size_requirements->buffer_size(); -bool IntermediateBuffer::should_use_ccb(HailoRTDriver &driver, StreamingType streaming_type) -{ - if (driver.dma_type() == HailoRTDriver::DmaType::PCIE) { - // CCB not supported on PCIe - return false; - } + auto edge_layer = vdma::ContinuousEdgeLayer::create(std::dynamic_pointer_cast(buffer), + buffer_size, buffer_offset, page_size, descs_count); + CHECK_EXPECTED(edge_layer); - switch (streaming_type) { - case StreamingType::BURST: - // On burst (aka inter-context), because the buffers are big (And depends on the max_batch_size), we currently - // don't want to use CCB by default. - if (nullptr != std::getenv("HAILO_FORCE_INFER_CONTEXT_CHANNEL_OVER_DESC")) { - LOGGER__WARNING("Using desc instead of CCB for inter context channels is not optimal for performance.\n"); - return false; - } else { - return true; - } - case StreamingType::CIRCULAR_CONTINUOS: - // On circular_continuous (aka ddr), the buffers are relatively small and we want to verify the C2C mechanism, - // therefore the CCB is the default behaviour. - // Due to request from the DFC group (Memory issues) - DDR buffers would run over DESC and not CCB buffers. - if (nullptr != std::getenv("HAILO_FORCE_DDR_CHANNEL_OVER_CCB")) { - LOGGER__INFO("Using Non default buffer type (CCB instead of DESC) for ddr channel. \n"); - return true; - } else { - return false; - } - } + auto edge_layer_ptr = make_unique_nothrow(edge_layer.release()); + CHECK_NOT_NULL_AS_EXPECTED(edge_layer_ptr, HAILO_OUT_OF_HOST_MEMORY); - // Shouldn't reach here - assert(false); - return false; + return std::unique_ptr(std::move(edge_layer_ptr)); } } /* namespace hailort */ diff --git a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp index aebf2ab5..8661cafa 100644 --- a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp +++ b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp @@ -13,8 +13,8 @@ #include "hailo/expected.hpp" #include "hailo/buffer.hpp" -#include "os/hailort_driver.hpp" -#include "vdma/memory/vdma_buffer.hpp" +#include "vdma/driver/hailort_driver.hpp" +#include "vdma/memory/vdma_edge_layer.hpp" #include "control_protocol.h" @@ -34,24 +34,25 @@ class IntermediateBuffer final { }; static Expected create(HailoRTDriver &driver, uint32_t transfer_size, - uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type); + uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type, + std::shared_ptr &&buffer, size_t buffer_offset); Expected read(); CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info() const; private: - IntermediateBuffer(std::unique_ptr &&buffer, uint32_t transfer_size, uint16_t batch_size); - - static Expected> create_sg_buffer(HailoRTDriver &driver, - uint32_t transfer_size, uint16_t batch_size, vdma::ChannelId d2h_channel_id, bool is_circular); - static Expected> create_ccb_buffer(HailoRTDriver &driver, - uint32_t transfer_size, uint16_t batch_size, bool is_circular); - static Expected> create_buffer(HailoRTDriver &driver, uint32_t transfer_size, - uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type); - - static bool should_use_ccb(HailoRTDriver &driver, StreamingType streaming_type); - - std::unique_ptr m_buffer; + IntermediateBuffer(std::unique_ptr &&buffer, uint32_t transfer_size, uint16_t batch_size); + + static Expected> create_sg_edge_layer(std::shared_ptr &&buffer, + size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, uint16_t batch_size, + vdma::ChannelId d2h_channel_id, bool is_circular); + static Expected> create_ccb_edge_layer(std::shared_ptr &&buffer, + size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, uint16_t batch_size, bool is_circular); + static Expected> create_edge_layer(std::shared_ptr &&buffer, + size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, uint16_t max_batch_size, + vdma::ChannelId d2h_channel_id, StreamingType streaming_type); + + std::unique_ptr m_edge_layer; const uint32_t m_transfer_size; uint16_t m_dynamic_batch_size; }; diff --git a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.cpp b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.cpp new file mode 100644 index 00000000..3dd26db8 --- /dev/null +++ b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.cpp @@ -0,0 +1,327 @@ +/** + * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file internal_buffer_manager.hpp + * @brief Planner for all the internal buffers of the CoreOp + * + * The manager will hold all the internal buffers of the CoreOp. + * The manager can optimize the memory consumption of the core op and provide API + * about the total internal memory consumption. + * + **/ + +#include "internal_buffer_manager.hpp" +#include "hef/layer_info.hpp" +#include "vdma/memory/sg_buffer.hpp" +#include "vdma/memory/continuous_buffer.hpp" +#include "vdma/memory/buffer_requirements.hpp" + + +#include + +namespace hailort +{ + +// Macros that check status. If status is HAILO_CANT_MEET_BUFFER_REQUIREMENTS, return without printing error to the prompt. +#define CHECK_EXPECTED_OUT_OF_CMA_MEMORY(type) if (HAILO_OUT_OF_HOST_CMA_MEMORY == (type).status()) {return make_unexpected(HAILO_OUT_OF_HOST_CMA_MEMORY);} CHECK_SUCCESS(type); + +Expected> InternalBufferManager::create(HailoRTDriver &driver, + const ConfigureNetworkParams &config_params) +{ + + auto buffer_manager_ptr = make_shared_nothrow(InternalBufferManager(driver, config_params)); + CHECK_NOT_NULL_AS_EXPECTED(buffer_manager_ptr, HAILO_OUT_OF_HOST_MEMORY); + + return buffer_manager_ptr; +} + +InternalBufferManager::InternalBufferManager(HailoRTDriver &driver, const ConfigureNetworkParams &config_params) + : m_driver(driver), + m_config_params(config_params), + m_edge_layer_infos(), + m_edge_layer_to_buffer_map() + {} + + +void InternalBufferManager::add_buffer_info(const EdgeLayerKey &edge_layer_key, const EdgeLayerInfo &buffer_info) +{ + m_edge_layer_infos.emplace(edge_layer_key, buffer_info); +} + +Expected InternalBufferManager::get_network_batch_size(const std::string &network_name) const +{ + for (auto const &network_map : m_config_params.network_params_by_name) { + auto const network_name_from_params = network_map.first; + if (network_name_from_params == network_name) { + auto actual_batch_size = network_map.second.batch_size; + if (HAILO_DEFAULT_BATCH_SIZE == actual_batch_size) { + actual_batch_size = DEFAULT_ACTUAL_BATCH_SIZE; + } + return actual_batch_size; + } + } + + LOGGER__ERROR("Failed to find network with network name {}", network_name); + + return make_unexpected(HAILO_NOT_FOUND); +} + +hailo_status InternalBufferManager::add_inter_context_buffer(const LayerInfo &layer_info) +{ + // This API gets the inter context input Layer, but the key is the output layer. + // The reason is that there is one output edge layer and multiple input edge layers. + // We must get the info of all the inputs in order to set the right start and end contexts, + // but the key must the the output (from the connected context info). + + // layer_info.connected_context_info.context_index == start context + // layer_info.context_index == end context + const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info); + TRY(auto batch_size, get_network_batch_size(layer_info.network_name)); + static const bool BUFFER_REUSE = true; + + auto edge_layer_key = + std::make_pair(layer_info.connected_context_info.context_index, layer_info.connected_context_info.stream_index); + // First check if there is a key (for the case of one output multiple inputs). + + const auto it = m_edge_layer_infos.find(edge_layer_key); + if (it != m_edge_layer_infos.end()) { + CHECK(it->second.transfer_size == transfer_size, HAILO_INTERNAL_FAILURE, + "Found two edge layers with the same key but different transfer size"); + CHECK(it->second.max_transfers_in_batch == batch_size, HAILO_INTERNAL_FAILURE, + "Found two edge layers with the same key but different batch size"); + // Now if the new end context is bigger than the old one, update it. + if (it->second.end_context < layer_info.context_index) { + it->second.end_context = layer_info.context_index; + } + } else { + LOGGER__DEBUG("Adding edge layer with key ({}, {}) to the internal buffer manager", edge_layer_key.first, edge_layer_key.second); + add_buffer_info(edge_layer_key, + EdgeLayerInfo{ + layer_info.type, + transfer_size, + batch_size, + layer_info.connected_context_info.context_index, + layer_info.context_index, + BUFFER_REUSE}); + } + return HAILO_SUCCESS; +} + +hailo_status InternalBufferManager::add_ddr_buffer(const LayerInfo &layer_info) +{ + // In DDR - always use core bytes per buffer as row size + const auto row_size = static_cast(layer_info.nn_stream_config.core_bytes_per_buffer); + const auto min_buffered_rows = layer_info.ddr_info.min_buffered_rows; + static auto const BUFFER_REUSE = true; + auto edge_layer_key = std::make_pair(layer_info.context_index, layer_info.stream_index); + + auto it = m_edge_layer_infos.find(edge_layer_key); + CHECK(it == m_edge_layer_infos.end(), HAILO_INTERNAL_FAILURE, + "Found two edge layers with the same key for DDR layer. This is not supported."); + + add_buffer_info(edge_layer_key, + EdgeLayerInfo{ + layer_info.type, + row_size, + min_buffered_rows, + layer_info.context_index, + layer_info.connected_context_info.context_index, + BUFFER_REUSE}); + + return HAILO_SUCCESS; +} + +// For edge layers +hailo_status InternalBufferManager::add_layer_buffer_info(const LayerInfo &layer_info) +{ + switch (layer_info.type) { + case LayerType::INTER_CONTEXT: + return add_inter_context_buffer(layer_info); + case LayerType::DDR: + return add_ddr_buffer(layer_info); + default: + LOGGER__ERROR("Unsupported layer type for InternalBufferManager"); + return HAILO_INTERNAL_FAILURE; + } +} + +hailo_status InternalBufferManager::add_config_buffer_info(const uint16_t context_index, const size_t config_stream_index, + const std::vector &cfg_sizes) +{ + static const bool NO_REUSE = false; + static const auto SINGLE_TRANSFER_PER_BATCH = 1; + auto edge_layer_key = std::make_pair(static_cast(context_index), static_cast(MAX_EDGE_LAYERS_PER_CONTEXT + config_stream_index)); + const auto buffer_size = static_cast(std::accumulate(cfg_sizes.begin(), cfg_sizes.end(), 0)); + add_buffer_info(edge_layer_key, + EdgeLayerInfo{ + LayerType::CFG, + buffer_size, + SINGLE_TRANSFER_PER_BATCH, + context_index, + context_index, + NO_REUSE}); + + return HAILO_SUCCESS; +} + +Expected> InternalBufferManager::create_intermediate_sg_buffer( + const size_t buffer_size) +{ + auto buffer = vdma::SgBuffer::create(m_driver, buffer_size, HailoRTDriver::DmaDirection::BOTH); + CHECK_EXPECTED(buffer); + + auto buffer_ptr = make_shared_nothrow(buffer.release()); + CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); + + return std::shared_ptr(std::move(buffer_ptr)); +} + +Expected> InternalBufferManager::create_intermediate_ccb_buffer( + const size_t buffer_size) +{ + auto buffer = vdma::ContinuousBuffer::create(buffer_size, m_driver); + CHECK_EXPECTED_OUT_OF_CMA_MEMORY(buffer); + + auto buffer_ptr = make_shared_nothrow(buffer.release()); + CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); + + return std::shared_ptr(std::move(buffer_ptr)); +} + +Expected> InternalBufferManager::create_intermediate_buffer( + vdma::VdmaBuffer::Type &buffer_type, const size_t buffer_size) +{ + if (vdma::VdmaBuffer::Type::CONTINUOUS == buffer_type) { + return create_intermediate_ccb_buffer(buffer_size); + } + return create_intermediate_sg_buffer(buffer_size); +} + +void InternalBufferManager::print_execution_results(const BufferPlanReport &default_planner_report, + bool default_planner_meet_requirements, const BufferPlanReport &executed_buffers_report) +{ + if (!default_planner_meet_requirements) { + LOGGER__INFO("Default Internal buffer planner failed to meet requirements"); + } else { + LOGGER__INFO("Planned internal buffer memory: CMA memory {}, user memory {}. memory to edge layer usage factor is {}", + default_planner_report.cma_memory, default_planner_report.user_memory, default_planner_report.memory_utilization_factor); + } + + auto default_plan_executed = (default_planner_report.cma_memory == executed_buffers_report.cma_memory) && + (default_planner_report.user_memory == executed_buffers_report.user_memory); + + if (default_plan_executed) { + LOGGER__INFO("Default Internal buffer planner executed successfully"); + } else { + LOGGER__INFO("executed internal buffer memory: CMA memory {}, user memory {}. memory to edge layer usage factor is {}", + executed_buffers_report.cma_memory, executed_buffers_report.user_memory, executed_buffers_report.memory_utilization_factor); + } +} + +hailo_status InternalBufferManager::plan_and_execute(InternalBufferPlanner::Type default_planner_type, + const size_t number_of_contexts) +{ + // Create buffer planning + auto planner_type = default_planner_type; + // copy of initial edge layers + auto edge_layers = m_edge_layer_infos; + // Vector of executed buffers from the planning + InternalBufferPlanning buffers_executed; + // Default planner report + BufferPlanReport default_planner_report {}; + bool default_planner_meet_requirements = false; + + while (!edge_layers.empty()) { + CHECK(InternalBufferPlanner::Type::INVALID != planner_type, HAILO_CANT_MEET_BUFFER_REQUIREMENTS, + "Cannot find an executable buffer planning for the given edge layers"); + + LOGGER__DEBUG("Trying to plan with planner type {}", static_cast(planner_type)); + auto buffer_planning_exp = InternalBufferPlanner::create_buffer_planning(edge_layers, planner_type, + m_driver.dma_type(), m_driver.desc_max_page_size(), number_of_contexts); + if (HAILO_CANT_MEET_BUFFER_REQUIREMENTS == buffer_planning_exp.status()) { + // If planner failed, Try to go to next planner + LOGGER__DEBUG("Can't plan with planner type {}", static_cast(planner_type)); + planner_type = static_cast((static_cast(planner_type)) + 1); + continue; + } + auto buffer_planning = buffer_planning_exp.release(); + + if (planner_type == default_planner_type) { + default_planner_meet_requirements = true; + default_planner_report = InternalBufferPlanner::report_planning_info(buffer_planning); + } + + std::vector edge_layers_executed; + auto status = execute_plan(buffer_planning, edge_layers_executed, buffers_executed); + // Don't return error if out of CMA host memory. Try to go to next plan. + if (HAILO_OUT_OF_HOST_CMA_MEMORY != status) { + CHECK_SUCCESS(status); + } + + // Remove executed edge layers from edge layers + for (const auto &edge_layer_key : edge_layers_executed) { + edge_layers.erase(edge_layer_key); + } + + if (!edge_layers.empty()) { + LOGGER__DEBUG("Execute of plan type {} didn't finish. Moving to next planner ", static_cast(planner_type)); + } else { + LOGGER__DEBUG("Execute finished successfully"); + } + // Move to next planner + planner_type = static_cast((static_cast(planner_type)) + 1); + } + + const auto executed_buffers_report = InternalBufferPlanner::report_planning_info(buffers_executed); + + print_execution_results(default_planner_report, default_planner_meet_requirements, executed_buffers_report); + + return HAILO_SUCCESS; +} + +hailo_status InternalBufferManager::execute_plan(InternalBufferPlanning &buffer_planning, + std::vector &edge_layers_executed, InternalBufferPlanning &buffers_executed) +{ + // Verify no buffers were allocated yet + assert(m_edge_layer_to_buffer_map.empty()); + + auto execution_status = HAILO_SUCCESS; + + // Go over plan and create buffers + for (auto &buffer_plan : buffer_planning) { + auto buffer_ptr = create_intermediate_buffer(buffer_plan.buffer_type, buffer_plan.buffer_size); + if (buffer_ptr.status() == HAILO_OUT_OF_HOST_CMA_MEMORY) { + execution_status = buffer_ptr.status(); + // If one of the buffer failed due to lack to memory, try to move to next buffer. + continue; + } + for (const auto &edge_layer_offset : buffer_plan.edge_layer_offsets) { + m_edge_layer_to_buffer_map.emplace( + edge_layer_offset.first, + EdgeLayerToBufferMap{buffer_ptr.value(), edge_layer_offset.second}); + } + // Add edge layers to executed list + for (const auto &edge_layer_info : buffer_plan.edge_layer_infos) { + edge_layers_executed.emplace_back(edge_layer_info.first); + } + + // Add buffer to executed list + buffers_executed.emplace_back(buffer_plan); + } + + return execution_status; +} + +Expected InternalBufferManager::get_intermediate_buffer(const EdgeLayerKey &key) +{ + const auto buffer_it = m_edge_layer_to_buffer_map.find(key); + if (std::end(m_edge_layer_to_buffer_map) == buffer_it) { + return make_unexpected(HAILO_NOT_FOUND); + } + + return Expected(buffer_it->second); +} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.hpp b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.hpp new file mode 100644 index 00000000..5a93200c --- /dev/null +++ b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.hpp @@ -0,0 +1,76 @@ +/** + * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file internal_buffer_manager.hpp + * @brief Planner for all the internal buffers of the CoreOp + * + * The manager will hold all the internal buffers of the CoreOp. + * The manager can optimize the memory consumption of the core op and provide API + * about the total internal memory consumption. + * + **/ + +#ifndef _HAILO_INTERNAL_BUFFER_MANAGER_HPP_ +#define _HAILO_INTERNAL_BUFFER_MANAGER_HPP_ + +#include "hailo/hailort.h" +#include "hailo/hef.hpp" +#include "common/utils.hpp" +#include "hef/layer_info.hpp" +#include "vdma/memory/vdma_buffer.hpp" +#include "internal_buffer_planner.hpp" + + +namespace hailort +{ + +#define MAX_EDGE_LAYERS_PER_CONTEXT (20) + +class InternalBufferManager final +{ +public: + static Expected> create(HailoRTDriver &driver, + const ConfigureNetworkParams &config_params); + + hailo_status add_config_buffer_info(const uint16_t context_index, const size_t config_stream_index, + const std::vector &cfg_sizes); + hailo_status add_layer_buffer_info(const LayerInfo &layer_info); + Expected get_intermediate_buffer(const EdgeLayerKey &key); + hailo_status plan_and_execute(InternalBufferPlanner::Type default_planner_type, const size_t number_of_contexts); +private: + + // Add buffer info phase functions + void add_buffer_info(const EdgeLayerKey &edge_layer_key, const EdgeLayerInfo &buffer_info); + hailo_status add_inter_context_buffer(const LayerInfo &layer_info); + hailo_status add_ddr_buffer(const LayerInfo &layer_info); + Expected get_network_batch_size(const std::string &network_name) const; + + // Execute phase functions + hailo_status execute_plan(InternalBufferPlanning &buffer_planning, + std::vector &edge_layers_executed, InternalBufferPlanning &buffers_executed); + Expected> create_intermediate_buffer( + vdma::VdmaBuffer::Type &buffer_type, const size_t buffer_size); + Expected> create_intermediate_ccb_buffer( + const size_t buffer_size); + Expected> create_intermediate_sg_buffer( + const size_t buffer_size); + + // Reporting functions + void print_execution_results(const BufferPlanReport &default_planner_report, + bool default_planner_meet_requirements, const BufferPlanReport &executed_buffers_report); + + HailoRTDriver &m_driver; + const ConfigureNetworkParams &m_config_params; + // m_edge_layer_infos is filled by add_buffer_info API + std::map m_edge_layer_infos; + + std::map m_edge_layer_to_buffer_map; + + InternalBufferManager(HailoRTDriver &driver, const ConfigureNetworkParams &config_params); +}; + +} /* namespace hailort */ + +#endif /* _HAILO_INTERNAL_BUFFER_MANAGER_HPP_ */ diff --git a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.cpp b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.cpp new file mode 100644 index 00000000..bf369481 --- /dev/null +++ b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.cpp @@ -0,0 +1,403 @@ +/** + * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file internal_buffer_planner.cpp + * @brief Planner for all the internal buffers of the CoreOp + * + * The planner hold the algorithms to plan connection between buffers and edge layer + * + **/ + +#include "vdma/memory/buffer_requirements.hpp" +#include "internal_buffer_planner.hpp" + +#include + +constexpr size_t NAIVE_PLANNING_EDGE_LAYER_OFFSET = 0; + +// Macros that check status. If status is HAILO_CANT_MEET_BUFFER_REQUIREMENTS, return without printing error to the prompt. +#define CHECK_EXPECTED_CANT_MEET_REQUIREMENTS(type) if (HAILO_CANT_MEET_BUFFER_REQUIREMENTS == type.status()) {return make_unexpected(HAILO_CANT_MEET_BUFFER_REQUIREMENTS);} CHECK_SUCCESS(type); +#define CHECK_STATUS_CANT_MEET_REQUIREMENTS(status) if (HAILO_CANT_MEET_BUFFER_REQUIREMENTS == status) {return make_unexpected(status);} CHECK_SUCCESS(status); + +namespace hailort +{ + +bool InternalBufferPlanner::should_edge_layer_use_ccb(const LayerType &layer_type, HailoRTDriver::DmaType dma_type, + bool force_sg_buffer_type) +{ + if (HailoRTDriver::DmaType::PCIE == dma_type) { + // CCB not supported on PCIe + return false; + } + + if (force_sg_buffer_type) { + return false; + } + + switch (layer_type) { + case LayerType::INTER_CONTEXT: + // On burst (aka inter-context), because the buffers are big (And depends on the max_batch_size), we currently + // don't want to use CCB by default. + if (nullptr != std::getenv("HAILO_FORCE_INFER_CONTEXT_CHANNEL_OVER_DESC")) { + LOGGER__WARNING("Using desc instead of CCB for inter context channels is not optimal for performance."); + return false; + } else { + return true; + } + case LayerType::DDR: + // On circular_continuous (aka ddr), the buffers are relatively small and we want to verify the C2C mechanism, + // therefore the CCB is the default behaviour. + // Due to request from the DFC group (Memory issues) - DDR buffers would run over DESC and not CCB buffers. + if (nullptr != std::getenv("HAILO_FORCE_DDR_CHANNEL_OVER_CCB")) { + LOGGER__WARNING("Using Non default buffer type (CCB instead of DESC) for ddr channel."); + return true; + } else { + return false; + } + case LayerType::CFG: + if (nullptr != std::getenv("HAILO_FORCE_CONF_CHANNEL_OVER_DESC")) { + LOGGER__WARNING("Using desc instead of CCB for config channel is not optimal for performance."); + return false; + } + else { + return true; + } + default: + // Shouldn't reach here + assert(false); + return false; + } +} + +Expected InternalBufferPlanner::create_naive_buffer_planning( + const std::map &edge_layer_infos, HailoRTDriver::DmaType dma_type, + uint16_t max_page_size, bool force_sg_buffer_type) +{ + InternalBufferPlanning buffer_planning; + + // Sort edge layers by size - Start with the biggest buffer + auto sorted_edge_layer_vector = sort_edge_layers_by_size(edge_layer_infos); + for (const auto &edge_layer_info : sorted_edge_layer_vector) { + // Naive planning - Buffer holds only one transfer pattern and one edge layer + std::vector> edge_layer_offsets; + std::map plan_edge_layer_infos; + plan_edge_layer_infos.emplace(edge_layer_info.first, edge_layer_info.second); + edge_layer_offsets.emplace_back(edge_layer_info.first, NAIVE_PLANNING_EDGE_LAYER_OFFSET); + vdma::VdmaBuffer::Type buffer_type = should_edge_layer_use_ccb(edge_layer_info.second.type, dma_type, force_sg_buffer_type) ? + vdma::VdmaBuffer::Type::CONTINUOUS : vdma::VdmaBuffer::Type::SCATTER_GATHER; + const auto buffer_requirements = return_buffer_requirements(edge_layer_info.second, buffer_type, max_page_size); + CHECK_EXPECTED_CANT_MEET_REQUIREMENTS(buffer_requirements); + + buffer_planning.emplace_back( + BufferPlan{ + buffer_type, + buffer_requirements->buffer_size(), + buffer_requirements->buffer_size(), + edge_layer_offsets, + plan_edge_layer_infos}); + } + return buffer_planning; +} + +std::vector> InternalBufferPlanner::sort_edge_layers_by_size( + const std::map &edge_layers) +{ + std::vector> sorted_edge_layers; + std::copy(edge_layers.begin(), edge_layers.end(), std::back_inserter>>(sorted_edge_layers)); + std::sort(sorted_edge_layers.begin(), sorted_edge_layers.end(), + [](const std::pair &a, const std::pair &b) { + return a.second.transfer_size > b.second.transfer_size; + }); + return sorted_edge_layers; +} + +Expected InternalBufferPlanner::return_buffer_requirements(const EdgeLayerInfo &edge_layer, + const vdma::VdmaBuffer::Type buffer_type, uint16_t max_page_size) +{ + // Calc actual size + static const auto DONT_FORCE_DEFAULT_PAGE_SIZE = false; + static const auto FORCE_BATCH_SIZE = true; + static const auto IS_VDMA_ALIGNED_BUFFER = true; + const auto is_circular = (LayerType::DDR == edge_layer.type); + auto buffer_requirements = vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer( + buffer_type, max_page_size, edge_layer.max_transfers_in_batch, + edge_layer.max_transfers_in_batch, edge_layer.transfer_size, is_circular, DONT_FORCE_DEFAULT_PAGE_SIZE, + FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER); + return buffer_requirements; +} + +ContextBufferUsageSegments InternalBufferPlanner::merge_context_buffer_events( + ContextBufferUsageSegments& combined, const ContextBufferUsageSegments& added_buffers) +{ + // Combine the two vectors into one + combined.insert(combined.end(), added_buffers.begin(), added_buffers.end()); + + // Sort the combined vector by offset + std::sort(combined.begin(), combined.end(), [](const BufferUsageSegment& a, const BufferUsageSegment& b) { + return a.offset < b.offset; + }); + + // Merge overlapping buffers + ContextBufferUsageSegments merged; + for (const auto& buffer : combined) { + if (!merged.empty() && (merged.back().offset + merged.back().size >= buffer.offset)) { + // If the current buffer overlaps with the last buffer in the merged list, + // extend the size of the last buffer to include the current buffer + merged.back().size = std::max(merged.back().size, buffer.offset + buffer.size - merged.back().offset); + } else { + // If the current buffer does not overlap with the last buffer in the merged list, + // add it to the list + merged.push_back(buffer); + } + } + + return merged; +} + +size_t InternalBufferPlanner::find_new_buffer_offset(const ContextBufferUsageSegments& unified_buffers, size_t new_buffer_size, + uint16_t buffer_offset_alignment) +{ + // Try to find a gap in the list that is large enough to hold the new buffer + // If first buffer starts after 0, check the gap at the beginning of the list + const auto aligned_first_buffer_offset = + !unified_buffers.empty() ? (DIV_ROUND_DOWN(unified_buffers[0].offset, buffer_offset_alignment) * buffer_offset_alignment) : 0; + if (!unified_buffers.empty() && aligned_first_buffer_offset >= new_buffer_size) { + return 0; + } + + const auto max_size = unified_buffers.empty() ? 0 : unified_buffers.back().offset + unified_buffers.back().size; + const auto aligned_max_size = DIV_ROUND_UP(max_size, buffer_offset_alignment) * buffer_offset_alignment; + for (auto it = unified_buffers.begin(); it != unified_buffers.end(); ++it) { + const auto aligned_end_of_buffer = DIV_ROUND_UP((it->offset + it->size), buffer_offset_alignment) * buffer_offset_alignment; + // Calculate the gap between the current buffer and the next buffer + size_t gap = ((it + 1 != unified_buffers.end()) ? ((it + 1)->offset) : (max_size)) - aligned_end_of_buffer; + + // If the gap is large enough to hold the new buffer, insert the new buffer there + if (gap >= new_buffer_size) { + return aligned_end_of_buffer; + } + } + + // If no suitable gap was found, add the new buffer to the end of the list (but aligned to page size). + return aligned_max_size; +} + +std::vector InternalBufferPlanner::build_availibility_map( + const std::vector &context_buffer_usage_vector, uint16_t start_context, uint16_t end_context) +{ + // Start with empty event vector + std::vector unified_buffer_events = {}; + for (size_t context_index = start_context; context_index <= end_context; context_index++) { + unified_buffer_events = merge_context_buffer_events(unified_buffer_events, context_buffer_usage_vector[context_index]); + } + + return unified_buffer_events; +} + +void update_buffer_to_context_map(std::vector> &context_buffer_usage_vector, + uint16_t start_context, uint16_t end_context, size_t buffer_offset, size_t buffer_size) +{ + // Don't have to sort here. Only the combined vector needs to be sorted. + for (uint16_t context_index = start_context; context_index <= end_context; context_index++) { + context_buffer_usage_vector[context_index].emplace_back(BufferUsageSegment{buffer_offset, buffer_size}); + } +} + +hailo_status InternalBufferPlanner::add_edge_layer_to_planning( + const std::pair &edge_layer, + std::vector> &context_buffer_usage_vector, BufferPlan &buffer_plan, + const vdma::VdmaBuffer::Type buffer_type, uint16_t max_page_size) +{ + const auto buffer_requirements = return_buffer_requirements(edge_layer.second, buffer_type, max_page_size); + CHECK_EXPECTED_CANT_MEET_REQUIREMENTS(buffer_requirements); + + // Check if there is enough space in the current context buffer. + const auto start_context = edge_layer.second.start_context; + const auto end_context = edge_layer.second.end_context; + const auto buffer_map = build_availibility_map(context_buffer_usage_vector, start_context, end_context); + + const auto edge_layer_size = buffer_requirements->buffer_size(); + const auto buffer_offset_alignment = buffer_requirements->desc_page_size(); + const auto buffer_offset = find_new_buffer_offset(buffer_map, edge_layer_size, buffer_offset_alignment); + + auto end_of_edge_layer_offset = buffer_offset + edge_layer_size; + // Update buffer size if needed + buffer_plan.buffer_size = std::max(end_of_edge_layer_offset, buffer_plan.buffer_size); + // Update total edge layer size + buffer_plan.total_edge_layer_size += edge_layer_size; + + // Add the buffer to the buffer plan + buffer_plan.edge_layer_offsets.emplace_back(edge_layer.first, buffer_offset); + buffer_plan.edge_layer_infos.emplace(edge_layer.first, edge_layer.second); + + update_buffer_to_context_map(context_buffer_usage_vector, start_context, end_context, buffer_offset, edge_layer_size); + + LOGGER__DEBUG("Added edge layer key {}:{} with size {} from context {} to context {} to offset {}", + edge_layer.first.first, edge_layer.first.second, edge_layer_size, start_context, end_context, buffer_offset); + + return HAILO_SUCCESS; +} + +Expected InternalBufferPlanner::create_single_buffer_planning( + const std::map &sg_edge_layers, size_t number_of_contexts, + const vdma::VdmaBuffer::Type buffer_type, uint16_t max_page_size) +{ + InternalBufferPlanning buffer_planning; + // Trying to reserve one buffer only. + buffer_planning.reserve(1); + // Allocate plan for one buffer + BufferPlan buffer_plan; + // Buffer type is SG + buffer_plan.buffer_type = buffer_type; + // Init buffer with size 0 + buffer_plan.buffer_size = 0; + buffer_plan.total_edge_layer_size = 0; + + auto sorted_edge_layer_vector = sort_edge_layers_by_size(sg_edge_layers); + std::vector> context_buffer_usage_vector(number_of_contexts); + + for (auto &edge_layer : sorted_edge_layer_vector) { + auto status = add_edge_layer_to_planning(edge_layer, context_buffer_usage_vector, buffer_plan, buffer_type, max_page_size); + CHECK_STATUS_CANT_MEET_REQUIREMENTS(status); + } + + // Update buffer planning + buffer_planning.emplace_back(buffer_plan); + + return buffer_planning; +} + +Expected InternalBufferPlanner::create_optimized_buffer_planning( + const std::map &edge_layer_infos, HailoRTDriver::DmaType dma_type, + uint16_t max_page_size, size_t number_of_contexts, bool force_sg_buffer_type) +{ + std::map ccb_edge_layers; + std::map sg_edge_layers; + + // First - split between CCB and SG buffers + for (const auto &edge_layer_info : edge_layer_infos) { + if (should_edge_layer_use_ccb(edge_layer_info.second.type, dma_type, force_sg_buffer_type)) { + ccb_edge_layers.emplace(edge_layer_info.first, edge_layer_info.second); + } else { + sg_edge_layers.emplace(edge_layer_info.first, edge_layer_info.second); + } + } + + InternalBufferPlanning buffer_planning; + // Second - create buffer planning for each buffer type + if (!ccb_edge_layers.empty()) { + auto ccb_buffer_planning = + create_single_buffer_planning(ccb_edge_layers, number_of_contexts, vdma::VdmaBuffer::Type::CONTINUOUS, max_page_size); + CHECK_EXPECTED_CANT_MEET_REQUIREMENTS(ccb_buffer_planning); + buffer_planning.insert(buffer_planning.end(), ccb_buffer_planning->begin(), ccb_buffer_planning->end()); + } + + if (!sg_edge_layers.empty()) { + auto sg_buffer_planning = + create_single_buffer_planning(sg_edge_layers, number_of_contexts, vdma::VdmaBuffer::Type::SCATTER_GATHER, max_page_size); + CHECK_EXPECTED_CANT_MEET_REQUIREMENTS(sg_buffer_planning); + buffer_planning.insert(buffer_planning.end(), sg_buffer_planning->begin(), sg_buffer_planning->end()); + } + + return buffer_planning; +} + +Expected InternalBufferPlanner::create_buffer_planning( + const std::map &edge_layer_infos, Type plan_type, + HailoRTDriver::DmaType dma_type, uint16_t max_page_size, size_t number_of_contexts) +{ + static const bool FORCE_SG_BUFFER_TYPE = true; + // Force plan by user flag + if (nullptr != std::getenv("HAILO_FORCE_NAIVE_PER_BUFFER_TYPE_ALOCATION")) { + LOGGER__INFO("Forced buffer planning of type 'NAIVE_PER_BUFFER_TYPE."); + plan_type = Type::NAIVE_PER_BUFFER_TYPE; + } + + switch (plan_type) { + case Type::SINGLE_BUFFER_PER_BUFFER_TYPE: + return create_optimized_buffer_planning(edge_layer_infos, dma_type, max_page_size, number_of_contexts); + case Type::SINGLE_SG_BUFFER: + return create_optimized_buffer_planning(edge_layer_infos, dma_type, max_page_size, number_of_contexts, FORCE_SG_BUFFER_TYPE); + case Type::NAIVE_PER_BUFFER_TYPE: + return create_naive_buffer_planning(edge_layer_infos, dma_type, max_page_size); + case Type::NAIVE_SG_BUFFER: + return create_naive_buffer_planning(edge_layer_infos, dma_type, max_page_size, FORCE_SG_BUFFER_TYPE); + default: + return make_unexpected(HAILO_INVALID_ARGUMENT); + } +} + +BufferPlanReport InternalBufferPlanner::report_planning_info(const InternalBufferPlanning &buffer_planning) +{ + BufferPlanReport report = {}; + report.cma_memory = 0; + report.user_memory = 0; + report.edge_layer_size = 0; + + for (const auto &buffer_plan : buffer_planning) { + if (vdma::VdmaBuffer::Type::CONTINUOUS == buffer_plan.buffer_type) { + report.cma_memory += buffer_plan.buffer_size; + } else { + report.user_memory += buffer_plan.buffer_size; + } + report.edge_layer_size += buffer_plan.total_edge_layer_size; + } + + report.memory_utilization_factor = (report.edge_layer_size > 0) ? + (static_cast(report.cma_memory + report.user_memory) / static_cast(report.edge_layer_size)) : 1; + + return report; +} + +Expected InternalBufferPlanner::get_edge_info_from_buffer_plan(const InternalBufferPlanning &buffer_planning, + const EdgeLayerKey &edge_layer_key) +{ + for (const auto &buffer_plan : buffer_planning) { + auto it = buffer_plan.edge_layer_infos.find(edge_layer_key); + if (it != buffer_plan.edge_layer_infos.end()) { + return Expected(it->second); + } + } + return make_unexpected(HAILO_NOT_FOUND); +} + +hailo_status InternalBufferPlanner::change_edge_layer_buffer_offset(InternalBufferPlanning &buffer_planning, + const EdgeLayerKey &edge_layer_key, size_t new_offset, uint16_t max_page_size) +{ + TRY(auto edge_layer_info, get_edge_info_from_buffer_plan(buffer_planning, edge_layer_key)); + for (auto &buffer_plan : buffer_planning) { + const auto buffer_requirements = return_buffer_requirements(edge_layer_info, buffer_plan.buffer_type, max_page_size); + CHECK_EXPECTED_CANT_MEET_REQUIREMENTS(buffer_requirements); + + for (auto &edge_layer_offset : buffer_plan.edge_layer_offsets) { + if (edge_layer_offset.first == edge_layer_key) { + edge_layer_offset.second = new_offset; + if (edge_layer_offset.second + buffer_requirements->buffer_size() > buffer_plan.buffer_size) { + buffer_plan.buffer_size = edge_layer_offset.second + buffer_requirements->buffer_size(); + } + return HAILO_SUCCESS; + } + } + } + return HAILO_INVALID_ARGUMENT; +} + +Expected InternalBufferPlanner::get_edge_layer_buffer_offset(const InternalBufferPlanning &buffer_planning, + const EdgeLayerKey &edge_layer_key) +{ + for (auto &buffer_plan : buffer_planning) { + auto it = buffer_plan.edge_layer_offsets.begin(); + while (it != buffer_plan.edge_layer_offsets.end()) { + if (it->first == edge_layer_key) { + return Expected(it->second); + } + it++; + } + } + return make_unexpected(HAILO_NOT_FOUND); +} + + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.hpp b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.hpp new file mode 100644 index 00000000..2e1a6508 --- /dev/null +++ b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.hpp @@ -0,0 +1,135 @@ +/** + * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file internal_buffer_manager.hpp + * @brief Planner for all the internal buffers of the CoreOp + * + * The manager will hold all the internal buffers of the CoreOp. + * The manager can optimize the memory consumption of the core op and provide API + * about the total internal memory consumption. + * + **/ + +#ifndef _HAILO_INTERNAL_BUFFER_PLANNER_HPP_ +#define _HAILO_INTERNAL_BUFFER_PLANNER_HPP_ + +#include "hailo/hef.hpp" +#include "common/utils.hpp" +#include "hef/layer_info.hpp" +#include "vdma/memory/vdma_buffer.hpp" +#include "vdma/memory/buffer_requirements.hpp" + +namespace hailort +{ + +using EdgeLayerKey = std::pair; + +struct EdgeLayerInfo { + LayerType type; + uint32_t transfer_size; + uint16_t max_transfers_in_batch; + uint16_t start_context; + uint16_t end_context; + bool reuse_buffer; +}; + +struct EdgeLayerToBufferMap { + std::shared_ptr buffer; + size_t offset; +}; + +struct BufferPlan { + vdma::VdmaBuffer::Type buffer_type; + size_t buffer_size; + size_t total_edge_layer_size; + std::vector> edge_layer_offsets; + std::map edge_layer_infos; +}; + +struct BufferPlanReport { + size_t cma_memory; + size_t user_memory; + size_t edge_layer_size; + float memory_utilization_factor; +}; + +using InternalBufferPlanning = std::vector; + + +// BufferUsageSegment is a struct that represents a segment of a buffer that is used in a specific context +typedef struct { + size_t offset; + size_t size; +} BufferUsageSegment; + +// ContextBufferUsageSegments represents all buffer segments that is used in a specific context +using ContextBufferUsageSegments = std::vector; + +class InternalBufferPlanner final +{ +public: + + enum class Type { + SINGLE_BUFFER_PER_BUFFER_TYPE = 0, + SINGLE_SG_BUFFER, + NAIVE_PER_BUFFER_TYPE, + NAIVE_SG_BUFFER, + + // Must be last + INVALID, + }; + + // Planning functions + static Expected create_buffer_planning( + const std::map &edge_layer_infos, Type plan_type, + HailoRTDriver::DmaType dma_type, uint16_t max_page_size, size_t number_of_contexts); + static Expected create_naive_buffer_planning( + const std::map &edge_layer_infos, HailoRTDriver::DmaType dma_type, + uint16_t max_page_size, bool force_sg_type_buffer = false); + static Expected create_optimized_buffer_planning( + const std::map &edge_layer_infos, HailoRTDriver::DmaType dma_type, + uint16_t max_page_size, size_t number_of_contexts, bool force_sg_type_buffer = false); + // Reporting functions + static BufferPlanReport report_planning_info(const InternalBufferPlanning &buffer_planning); + + // Debug API + static hailo_status change_edge_layer_buffer_offset(InternalBufferPlanning &buffer_planning, const EdgeLayerKey &edge_layer_key, + size_t new_offset, uint16_t max_page_size); + static Expected get_edge_layer_buffer_offset(const InternalBufferPlanning &buffer_planning, + const EdgeLayerKey &edge_layer_key); + +private: + + // Helper functions + static bool should_edge_layer_use_ccb(const LayerType &layer_type, HailoRTDriver::DmaType dma_type, + bool force_sg_type_buffer); + static std::vector> sort_edge_layers_by_size( + const std::map &edge_layers); + static Expected return_buffer_requirements( + const EdgeLayerInfo &edge_layer, const vdma::VdmaBuffer::Type buffer_type, + uint16_t max_page_size); + static Expected get_edge_info_from_buffer_plan(const InternalBufferPlanning &buffer_planning, + const EdgeLayerKey &edge_layer_key); + + // Planning phase functions + static ContextBufferUsageSegments merge_context_buffer_events( + ContextBufferUsageSegments& combined, const ContextBufferUsageSegments& added_buffers); + static size_t find_new_buffer_offset(const ContextBufferUsageSegments& unified_buffers, size_t new_buffer_size, + uint16_t buffer_offset_alignment); + static std::vector build_availibility_map( + const std::vector &context_buffer_usage_vector, uint16_t start_context, uint16_t end_context); + static hailo_status add_edge_layer_to_planning(const std::pair &edge_layer, + std::vector> &context_buffer_usage_vector, BufferPlan &buffer_plan, + const vdma::VdmaBuffer::Type buffer_type, uint16_t max_page_size); + + + static Expected create_single_buffer_planning( + const std::map &sg_edge_layers, size_t number_of_contexts, + const vdma::VdmaBuffer::Type buffer_type, uint16_t max_page_size); +}; + +} /* namespace hailort */ + +#endif /* _HAILO_INTERNAL_BUFFER_PLANNER_HPP_ */ diff --git a/hailort/libhailort/src/core_op/resource_manager/periph_calculator.cpp b/hailort/libhailort/src/core_op/resource_manager/periph_calculator.cpp index 29846f93..b35878b6 100644 --- a/hailort/libhailort/src/core_op/resource_manager/periph_calculator.cpp +++ b/hailort/libhailort/src/core_op/resource_manager/periph_calculator.cpp @@ -8,7 +8,7 @@ **/ #include "periph_calculator.hpp" -#include "device_common/device_internal.hpp" +#include "hef/hef_internal.hpp" namespace hailort { @@ -70,7 +70,7 @@ uint32_t PeriphCalculator::calculate_ddr_periph_buffers_per_frame(const LayerInf Expected PeriphCalculator::calculate_periph_registers_impl(const LayerInfo &layer_info, const uint32_t desc_page_size, const uint32_t max_periph_bytes_value, const bool is_core_hw_padding_config_in_dfc, - const ProtoHEFHwArch &hw_arch) + const HEFHwArch &hw_arch) { // Calculate periph according to hw shape - the shape the core is epecting to get const hailo_3d_image_shape_t& periph_shape = layer_info.hw_shape; @@ -84,18 +84,22 @@ Expected PeriphCalculator::calculate_periph_registers_impl(const Laye const auto row_size = static_cast(periph_shape.width * periph_shape.features * layer_info.hw_data_bytes); auto periph_frame_size = periph_shape.height * row_size; - // In case of core hw padding in DFC extension - hw shape might not be aligned - use aligned frame size and + CHECK_AS_EXPECTED(desc_page_size < layer_info.max_shmifo_size, HAILO_INVALID_ARGUMENT, + "Cannot find possible periph buffer size solution since desc_page_size ({}) is equal or larger than max stream size ({}) for layer name {}", + desc_page_size, layer_info.max_shmifo_size, layer_info.name); + + // In case of core hw padding in DFC extension - hw shape might not be aligned - use aligned frame size and // confgured periph registers will add / removed the extra padding if (is_core_hw_padding_config_in_dfc) { if (0 != (periph_frame_size % PERIPH_FRAME_ALIGNMENT)) { auto max_periph_padding_payload = HefConfigurator::max_periph_padding_payload_value( DeviceBase::hef_arch_to_device_arch(hw_arch)); CHECK_EXPECTED(max_periph_padding_payload); - + // Currently case of payload larger than max periph padding payload value - not supported CHECK_AS_EXPECTED(max_periph_padding_payload.value() > periph_frame_size, HAILO_INVALID_HEF, "Error, padded frame size larger than {} Currently not supported", max_periph_padding_payload.value()); - + const auto padded_periph_frame_size = HailoRTCommon::align_to(periph_frame_size, static_cast(PERIPH_FRAME_ALIGNMENT)); // Configure periph padding registers @@ -116,7 +120,7 @@ Expected PeriphCalculator::calculate_periph_registers_impl(const Laye } CHECK_AS_EXPECTED(0 != periph_bytes_per_buffer, HAILO_INVALID_ARGUMENT, "Error, Could not find valid periph bytes per buffer value"); - + // In ddr - the core make sure that row size is aligned to PERIPH_BYTES_PER_BUFFER_DDR_ALIGNMENT_SIZE but if a row // Is too large to fit in core bytes per buffer - they will divide it and put it in mutliple buffers - so in order to // Get the exact size in periph buffers per frame - we must multiply core registers and divide by periph bytes per buffer @@ -131,7 +135,7 @@ Expected PeriphCalculator::calculate_periph_registers_impl(const Laye } Expected PeriphCalculator::calculate_periph_registers(const LayerInfo &layer_info, - const uint32_t desc_page_size, const bool is_periph_calculated_in_hailort, const ProtoHEFHwArch &hw_arch, + const uint32_t desc_page_size, const bool is_periph_calculated_in_hailort, const HEFHwArch &hw_arch, const bool is_core_hw_padding_config_in_dfc) { auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hw_arch)); diff --git a/hailort/libhailort/src/core_op/resource_manager/periph_calculator.hpp b/hailort/libhailort/src/core_op/resource_manager/periph_calculator.hpp index bfa487bb..5ca09112 100644 --- a/hailort/libhailort/src/core_op/resource_manager/periph_calculator.hpp +++ b/hailort/libhailort/src/core_op/resource_manager/periph_calculator.hpp @@ -13,7 +13,8 @@ #include "common/utils.hpp" #include "hailo/hailort_common.hpp" -#include "hef/hef_internal.hpp" +#include "hef/layer_info.hpp" +#include "device_common/device_internal.hpp" namespace hailort { @@ -23,7 +24,7 @@ static const uint64_t PERIPH_FRAME_ALIGNMENT = 8; class PeriphCalculator { public: static Expected calculate_periph_registers(const LayerInfo &layer_info, - const uint32_t desc_page_size, const bool is_periph_calculated_in_hailort, const ProtoHEFHwArch &hw_arch, + const uint32_t desc_page_size, const bool is_periph_calculated_in_hailort, const HEFHwArch &hw_arch, const bool is_core_hw_padding_config_in_dfc); private: static bool is_valid_periph_bytes_value(const uint32_t periph_bytes_per_buffer, const uint32_t hw_frame_size, @@ -32,7 +33,7 @@ class PeriphCalculator { static Expected calculate_nms_periph_registers(const LayerInfo &layer_info); static Expected calculate_periph_registers_impl(const LayerInfo &layer_info, const uint32_t desc_page_size, const uint32_t max_periph_bytes_value, - const bool is_core_hw_padding_config_in_dfc, const ProtoHEFHwArch &hw_arch); + const bool is_core_hw_padding_config_in_dfc, const HEFHwArch &hw_arch); static uint32_t calculate_ddr_periph_buffers_per_frame(const LayerInfo &layer_info, const uint32_t periph_bytes_per_buffer); diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp index b1b74457..ab1aaecf 100644 --- a/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp +++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp @@ -4,20 +4,22 @@ #include "vdma/channel/boundary_channel.hpp" #include "vdma/memory/buffer_requirements.hpp" #include "device_common/control.hpp" +#include "core_op/resource_manager/internal_buffer_manager.hpp" #include #define HAILO15H_NMS_MAX_CLASSES (1024) +#define MAX_NUM_CONTEXTS_FOR_CONTROL_BUILDER (64) namespace hailort { Expected ContextResources::create(HailoRTDriver &driver, - CONTROL_PROTOCOL__context_switch_context_type_t context_type, const std::vector &config_channels_ids, - const ConfigBufferInfoMap &config_buffer_infos) + CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index, + const std::vector &config_channels_ids, const ConfigBufferInfoMap &config_buffer_infos, + std::shared_ptr internal_buffer_manager) { CHECK_AS_EXPECTED(context_type < CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_COUNT, HAILO_INVALID_ARGUMENT); - CHECK_AS_EXPECTED(config_buffer_infos.size() <= config_channels_ids.size(), HAILO_INTERNAL_FAILURE, "config_buffer_infos size ({}) is bigger than config_channels_id count ({})", config_buffer_infos.size(), config_channels_ids.size()); @@ -29,19 +31,11 @@ Expected ContextResources::create(HailoRTDriver &driver, config_buffer_infos.at(config_stream_index)); CHECK_EXPECTED(buffer_resource); config_buffers.emplace_back(buffer_resource.release()); - } - - return ContextResources(driver, context_type, std::move(config_buffers)); -} -const std::vector &ContextResources::get_controls() const -{ - return m_builder.get_controls(); -} + internal_buffer_manager->add_config_buffer_info(context_index, config_stream_index, config_buffer_infos.at(config_stream_index)); + } -ContextSwitchBufferBuilder &ContextResources::builder() -{ - return m_builder; + return ContextResources(driver, context_type, std::move(config_buffers), internal_buffer_manager); } hailo_status ContextResources::add_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id, @@ -145,7 +139,7 @@ hailo_status ContextResources::validate_edge_layer(const LayerInfo &layer_info, // In Activation Context it is ok to have multiple edge layers with same stream index seeing as they could be for // Different contexts etc... - if (CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_ACTIVATION != m_builder.get_context_type()) { + if (CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_ACTIVATION != get_context_type()) { if (edge_layer.layer_info.stream_index == layer_info.stream_index) { // Validate that the amount of edge layers with the same stream index per context is 2 (And with opposite directions) // In the case of dual direction supported feature - otherwise 1 @@ -185,14 +179,18 @@ static Expected create_hw_latency_meter(const std::vector 1) { - LOGGER__WARNING("HW Latency measurement is supported on networks with a single input"); + LOGGER__WARNING("HW Latency measurement is supported on networks with a single input. the model has {} physical inputs.", + h2d_streams_count); return make_unexpected(HAILO_INVALID_OPERATION); } @@ -242,13 +240,20 @@ Expected ResourcesManager::create(VdmaDevice &vdma_device, Hai config_channels_ids.push_back(channel_id.release()); } + auto internal_buffer_manager = InternalBufferManager::create(driver, config_params); + CHECK_EXPECTED(internal_buffer_manager); + + auto action_list_buffer_builder = create_action_list_buffer_builder(core_op_metadata->dynamic_contexts().size(), + driver); + CHECK_EXPECTED(action_list_buffer_builder); + auto network_index_map = core_op_metadata->get_network_names(); auto latency_meters = create_latency_meters_from_config_params(config_params, core_op_metadata); CHECK_EXPECTED(latency_meters); ResourcesManager resources_manager(vdma_device, driver, std::move(allocator), config_params, - std::move(core_op_metadata), core_op_index, - std::move(network_index_map), latency_meters.release(), std::move(config_channels_ids)); + std::move(core_op_metadata), core_op_index, std::move(network_index_map), latency_meters.release(), + std::move(config_channels_ids), internal_buffer_manager.release(), action_list_buffer_builder.release()); return resources_manager; } @@ -258,7 +263,9 @@ ResourcesManager::ResourcesManager(VdmaDevice &vdma_device, HailoRTDriver &drive std::shared_ptr &&core_op_metadata, uint8_t core_op_index, const std::vector &&network_index_map, LatencyMetersMap &&latency_meters, - std::vector &&config_channels_ids) : + std::vector &&config_channels_ids, + std::shared_ptr internal_buffer_manager, + std::shared_ptr &&action_list_buffer_builder) : m_contexts_resources(), m_channel_allocator(std::move(channel_allocator)), m_vdma_device(vdma_device), @@ -273,8 +280,11 @@ ResourcesManager::ResourcesManager(VdmaDevice &vdma_device, HailoRTDriver &drive m_latency_meters(std::move(latency_meters)), m_boundary_channels(), m_is_configured(false), + m_is_activated(false), m_config_channels_ids(std::move(config_channels_ids)), - m_hw_only_boundary_buffers() + m_hw_only_boundary_buffers(), + m_internal_buffer_manager(std::move(internal_buffer_manager)), + m_action_list_buffer_builder(std::move(action_list_buffer_builder)) {} ResourcesManager::ResourcesManager(ResourcesManager &&other) noexcept : @@ -286,19 +296,24 @@ ResourcesManager::ResourcesManager(ResourcesManager &&other) noexcept : m_intermediate_buffers(std::move(other.m_intermediate_buffers)), m_core_op_metadata(std::move(other.m_core_op_metadata)), m_core_op_index(other.m_core_op_index), - m_dynamic_context_count(std::exchange(other.m_dynamic_context_count, static_cast(0))), - m_total_context_count(std::exchange(other.m_total_context_count, static_cast(0))), + m_dynamic_context_count(std::exchange(other.m_dynamic_context_count, static_cast(0))), + m_total_context_count(std::exchange(other.m_total_context_count, static_cast(0))), m_network_index_map(std::move(other.m_network_index_map)), m_latency_meters(std::move(other.m_latency_meters)), m_boundary_channels(std::move(other.m_boundary_channels)), m_is_configured(std::exchange(other.m_is_configured, false)), + m_is_activated(std::exchange(other.m_is_activated, false)), m_config_channels_ids(std::move(other.m_config_channels_ids)), - m_hw_only_boundary_buffers(std::move(other.m_hw_only_boundary_buffers)) + m_hw_only_boundary_buffers(std::move(other.m_hw_only_boundary_buffers)), + m_internal_buffer_manager(std::move(other.m_internal_buffer_manager)), + m_action_list_buffer_builder(std::move(other.m_action_list_buffer_builder)) {} hailo_status ResourcesManager::fill_infer_features(CONTROL_PROTOCOL__application_header_t &app_header) { app_header.infer_features.preliminary_run_asap = m_core_op_metadata->supported_features().preliminary_run_asap; + app_header.infer_features.batch_register_config = m_core_op_metadata->supported_features().batch_register_config; + app_header.infer_features.can_fast_batch_switch = m_core_op_metadata->get_can_fast_batch_switch(); return HAILO_SUCCESS; } @@ -343,7 +358,7 @@ hailo_status ResourcesManager::fill_network_batch_size(CONTROL_PROTOCOL__applica hailo_status ResourcesManager::fill_csm_buffer_size(CONTROL_PROTOCOL__application_header_t &app_header) { // All config buffers on the same platform will have the same desc_page_size - because it is derived from the host - app_header.csm_buffer_size = std::min(m_driver.desc_max_page_size(), vdma::DEFAULT_DESC_PAGE_SIZE); + app_header.csm_buffer_size = std::min(m_driver.desc_max_page_size(), vdma::DEFAULT_SG_PAGE_SIZE); return HAILO_SUCCESS; } @@ -364,6 +379,11 @@ void ResourcesManager::process_interrupts(IrqData &&irq_data) continue; } + if (!channel_irq_data.validation_success) { + LOGGER__CRITICAL("Got validation error on channel {}", channel_irq_data.channel_id); + continue; + } + if (!channel_irq_data.is_active) { LOGGER__CRITICAL("Channel {} was aborted by external source", channel_irq_data.channel_id); continue; @@ -371,7 +391,7 @@ void ResourcesManager::process_interrupts(IrqData &&irq_data) auto status = boundary_channel->second->trigger_channel_completion(channel_irq_data.desc_num_processed); if ((status != HAILO_SUCCESS) && - (status != HAILO_STREAM_ABORTED_BY_USER) && + (status != HAILO_STREAM_ABORT) && (status != HAILO_STREAM_NOT_ACTIVATED)) { // Log error and continue gracefully to process other interrupts LOGGER__ERROR("Trigger channel completion failed on channel {} with status {}", channel_irq_data.channel_id, status); @@ -449,16 +469,21 @@ hailo_status ResourcesManager::create_boundary_vdma_channel(const LayerInfo &lay // TODO - remove this WA after HRT-11747 const uint16_t max_page_size = (m_driver.desc_max_page_size() == layer_info.max_shmifo_size) ? (m_driver.desc_max_page_size() / 2) : m_driver.desc_max_page_size(); - auto buffer_sizes_requirements = vdma::BufferSizesRequirements::get_sg_buffer_requirements_single_transfer( - max_page_size, static_cast(min_active_trans), static_cast(max_active_trans), - transfer_size, IS_CIRCULAR, DONT_FORCE_DEFAULT_PAGE_SIZE, DONT_FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER); + auto buffer_sizes_requirements = vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer( + vdma::VdmaBuffer::Type::SCATTER_GATHER, max_page_size, static_cast(min_active_trans), + static_cast(max_active_trans), transfer_size, IS_CIRCULAR, DONT_FORCE_DEFAULT_PAGE_SIZE, + DONT_FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER); + if (HAILO_CANT_MEET_BUFFER_REQUIREMENTS == buffer_sizes_requirements.status()) { + LOGGER__ERROR("Network shapes and batch size exceeds driver descriptors capabilities. " + "(A common cause for this error could be the batch size - which is {}).", network_batch_size.value()); + } CHECK_EXPECTED_AS_STATUS(buffer_sizes_requirements); const auto page_size = buffer_sizes_requirements->desc_page_size(); const auto descs_count = (nullptr != std::getenv("HAILO_CONFIGURE_FOR_HW_INFER")) ? - MAX_DESCS_COUNT : buffer_sizes_requirements->descs_count(); + MAX_SG_DESCS_COUNT : buffer_sizes_requirements->descs_count(); - auto channel = vdma::BoundaryChannel::create(channel_id.value(), channel_direction, m_vdma_device, descs_count, + auto channel = vdma::BoundaryChannel::create(channel_id.value(), channel_direction, m_driver, descs_count, page_size, layer_info.name, latency_meter); CHECK_EXPECTED_AS_STATUS(channel); @@ -493,16 +518,19 @@ hailo_power_mode_t ResourcesManager::get_power_mode() const return m_config_params.power_mode; } -ExpectedRef ResourcesManager::create_intermediate_buffer(uint32_t transfer_size, - uint16_t batch_size, uint8_t src_stream_index, uint8_t src_context_index, +ExpectedRef ResourcesManager::create_intermediate_buffer( + uint32_t transfer_size, uint16_t batch_size, uint8_t src_stream_index, uint16_t src_context_index, vdma::ChannelId d2h_channel_id, IntermediateBuffer::StreamingType streaming_type) { - auto buffer = IntermediateBuffer::create(m_driver, transfer_size, batch_size, d2h_channel_id, - streaming_type); - CHECK_EXPECTED(buffer); + auto edge_layer_key = std::make_pair(src_context_index, src_stream_index); + TRY(auto buffer_info, m_internal_buffer_manager->get_intermediate_buffer(edge_layer_key)); + + auto intermediate_buffer = IntermediateBuffer::create(m_driver, transfer_size, batch_size, d2h_channel_id, + streaming_type, std::move(buffer_info.buffer), buffer_info.offset); + CHECK_EXPECTED(intermediate_buffer); const auto key = std::make_pair(src_context_index, src_stream_index); - auto emplace_res = m_intermediate_buffers.emplace(key, buffer.release()); + auto emplace_res = m_intermediate_buffers.emplace(key, intermediate_buffer.release()); return std::ref(emplace_res.first->second); } @@ -530,20 +558,27 @@ Expected ResourcesManager::get_control_c status = fill_csm_buffer_size(app_header); CHECK_SUCCESS_AS_EXPECTED(status, "Invalid csm buffer size"); + const auto mapped_addr = get_action_list_buffer_builder()->get_mapped_buffer_dma_address(); + CHECK(IS_FIT_IN_UINT32(mapped_addr), HAILO_INVALID_ARGUMENT, "Invalid Mapped Address {} must fit in uint32", + mapped_addr); + app_header.external_action_list_address = static_cast(mapped_addr); + return app_header; } -Expected> ResourcesManager::add_new_context(CONTROL_PROTOCOL__context_switch_context_type_t type, +Expected> ResourcesManager::add_new_context( + CONTROL_PROTOCOL__context_switch_context_type_t context_type, const uint16_t context_index, const ConfigBufferInfoMap &config_info) { - CHECK_AS_EXPECTED(m_total_context_count < std::numeric_limits::max(), HAILO_INVALID_CONTEXT_COUNT); + CHECK_AS_EXPECTED(m_total_context_count < std::numeric_limits::max(), HAILO_INVALID_CONTEXT_COUNT); - auto context_resources = ContextResources::create(m_driver, type, m_config_channels_ids, config_info); + auto context_resources = ContextResources::create(m_driver, context_type, context_index, + m_config_channels_ids, config_info, m_internal_buffer_manager); CHECK_EXPECTED(context_resources); m_contexts_resources.emplace_back(context_resources.release()); m_total_context_count++; - if (CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_DYNAMIC == type) { + if (CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_DYNAMIC == context_type) { m_dynamic_context_count++; } @@ -609,8 +644,11 @@ hailo_status ResourcesManager::configure() auto status = Control::context_switch_set_network_group_header(m_vdma_device, core_op_header.release()); CHECK_SUCCESS(status); - for (const auto &context : m_contexts_resources) { - status = Control::context_switch_set_context_info(m_vdma_device, context.get_controls()); + // Only send controls to FW in case of control action list builder + if (ActionListBufferBuilder::Type::CONTROL == get_action_list_buffer_builder()->get_builder_type()) { + const auto control_action_list = std::static_pointer_cast( + get_action_list_buffer_builder()); + status = Control::context_switch_set_context_info(m_vdma_device, control_action_list->get_controls()); CHECK_SUCCESS(status); } @@ -619,11 +657,21 @@ hailo_status ResourcesManager::configure() hailo_status ResourcesManager::enable_state_machine(uint16_t dynamic_batch_size, uint16_t batch_count) { - return Control::enable_core_op(m_vdma_device, m_core_op_index, dynamic_batch_size, batch_count); + CHECK_SUCCESS(Control::enable_core_op(m_vdma_device, m_core_op_index, dynamic_batch_size, batch_count)); + // Enable over enable is possible (batch switch in the same NG), so there is no need to verify the state. + set_is_activated(true); + + return HAILO_SUCCESS; } hailo_status ResourcesManager::reset_state_machine() { + if (!get_is_activated()) { + return HAILO_SUCCESS; + } + + set_is_activated(false); + auto status = Control::reset_context_switch_state_machine(m_vdma_device); CHECK_SUCCESS(status); @@ -639,6 +687,8 @@ hailo_status ResourcesManager::reset_state_machine() hailo_status ResourcesManager::start_vdma_interrupts_dispatcher() { + CHECK(get_is_activated(), HAILO_INTERNAL_FAILURE, "Cannot call start_vdma_interrupts_dispatcher when core-op already deactivated"); + auto interrupts_dispatcher = m_vdma_device.get_vdma_interrupts_dispatcher(); CHECK_EXPECTED_AS_STATUS(interrupts_dispatcher); @@ -656,9 +706,31 @@ hailo_status ResourcesManager::start_vdma_interrupts_dispatcher() hailo_status ResourcesManager::stop_vdma_interrupts_dispatcher() { - auto interrupts_dispatcher = m_vdma_device.get_vdma_interrupts_dispatcher(); - CHECK_EXPECTED_AS_STATUS(interrupts_dispatcher); - return interrupts_dispatcher->get().stop(); + if (!get_is_activated()) { + return HAILO_SUCCESS; + } + + TRY(auto interrupts_dispatcher, m_vdma_device.get_vdma_interrupts_dispatcher()); + return interrupts_dispatcher.get().stop(); +} + +hailo_status ResourcesManager::start_vdma_transfer_launcher() +{ + CHECK(get_is_activated(), HAILO_INTERNAL_FAILURE, "Cannot call start_vdma_transfer_launcher when core-op already deactivated"); + TRY(auto vdma_transfer_launcher, m_vdma_device.get_vdma_transfer_launcher()); + vdma_transfer_launcher.get().start(); + return HAILO_SUCCESS; +} + +hailo_status ResourcesManager::stop_vdma_transfer_launcher() +{ + if (!get_is_activated()) { + return HAILO_SUCCESS; + } + + TRY(auto vdma_transfer_launcher, m_vdma_device.get_vdma_transfer_launcher()); + vdma_transfer_launcher.get().stop(); + return HAILO_SUCCESS; } Expected ResourcesManager::program_desc_for_hw_only_flow(std::shared_ptr desc_list, @@ -668,7 +740,7 @@ Expected ResourcesManager::program_desc_for_hw_only_flow(std::shared_p for (uint16_t batch_index = 0; batch_index < batch_count; batch_index++) { for (uint16_t transfer_index = 0; transfer_index < dynamic_batch_size; transfer_index++) { const auto last_desc_interrupts_domain = ((dynamic_batch_size - 1) == transfer_index) ? - vdma::InterruptsDomain::DEVICE : vdma::InterruptsDomain::NONE; + InterruptsDomain::DEVICE : InterruptsDomain::NONE; auto desc_count_local = desc_list->program_last_descriptor(single_transfer_size, last_desc_interrupts_domain, acc_desc_offset); CHECK_EXPECTED(desc_count_local, "Failed to program descs for inter context channels. Given max_batch_size is too big."); @@ -698,8 +770,9 @@ Expected> ResourcesManager::create_mapped_b CHECK_EXPECTED(mapped_buffer); m_hw_only_boundary_buffers.emplace_back(mapped_buffer.release()); - uint32_t STARTING_DESC = 0; - auto status = desc_list->configure_to_use_buffer(*m_hw_only_boundary_buffers.back(), boundary_channel_ptr->get_channel_id(), STARTING_DESC); + static const auto DEFAULT_BUFFER_OFFSET = 0; + auto status = desc_list->configure_to_use_buffer(*m_hw_only_boundary_buffers.back(), + m_hw_only_boundary_buffers.back()->size(), DEFAULT_BUFFER_OFFSET, boundary_channel_ptr->get_channel_id()); CHECK_SUCCESS_AS_EXPECTED(status); auto desc_programed = program_desc_for_hw_only_flow(desc_list, single_transfer_size, dynamic_batch_size, batch_count); @@ -842,4 +915,52 @@ Expected ResourcesManager::run_hw_only_infer() fw_infer_results.infer_cycles); } +hailo_status ResourcesManager::fill_internal_buffers_info() +{ + for (const auto &context_metadata : m_core_op_metadata->dynamic_contexts()) { + for (const auto &layer_info : context_metadata.get_ddr_output_layers()) { + auto status = m_internal_buffer_manager->add_layer_buffer_info(layer_info); + CHECK_SUCCESS(status); + } + for (const auto &layer_info : context_metadata.get_inter_context_input_layers()) { + auto status = m_internal_buffer_manager->add_layer_buffer_info(layer_info); + CHECK_SUCCESS(status); + } + } + + auto status = m_internal_buffer_manager->plan_and_execute(InternalBufferPlanner::Type::SINGLE_BUFFER_PER_BUFFER_TYPE, + m_core_op_metadata->dynamic_contexts().size()); + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + +bool ResourcesManager::should_use_ddr_action_list(size_t num_contexts, HailoRTDriver::DmaType dma_type) +{ + // Only allow env variable to affect in case of DmaType DRAM + if ((HailoRTDriver::DmaType::DRAM == dma_type) && ((MAX_NUM_CONTEXTS_FOR_CONTROL_BUILDER < num_contexts) + || (is_env_variable_on(DDR_ACTION_LIST_ENV_VAR, DDR_ACTION_LIST_ENV_VAR_VALUE, sizeof(DDR_ACTION_LIST_ENV_VAR_VALUE))))) { + return true; + } + return false; +} + +Expected> ResourcesManager::create_action_list_buffer_builder( + size_t num_dynamic_contexts, HailoRTDriver &driver) +{ + static const auto total_num_contexts = CONTROL_PROTOCOL__CONTEXT_SWITCH_NUMBER_OF_NON_DYNAMIC_CONTEXTS + + num_dynamic_contexts; + + if (should_use_ddr_action_list(total_num_contexts, driver.dma_type())) { + auto ddr_action_list_buffer_builder = DDRActionListBufferBuilder::create(total_num_contexts, driver); + CHECK_EXPECTED(ddr_action_list_buffer_builder); + return std::static_pointer_cast(ddr_action_list_buffer_builder.release()); + } else { + auto control_action_list_buffer_builder = ControlActionListBufferBuilder::create(); + CHECK_EXPECTED(control_action_list_buffer_builder); + return std::static_pointer_cast(control_action_list_buffer_builder.release()); + } + +} + } /* namespace hailort */ diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp index ea8f0d14..9b892460 100644 --- a/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp +++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp @@ -31,17 +31,16 @@ #include "core_op/resource_manager/intermediate_buffer.hpp" #include "core_op/resource_manager/config_buffer.hpp" #include "core_op/resource_manager/channel_allocator.hpp" -#include "core_op/resource_manager/context_switch_buffer_builder.hpp" +#include "core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.hpp" +#include "core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.hpp" #include "device_common/control_protocol.hpp" #include "vdma/channel/boundary_channel.hpp" #include "vdma/pcie/pcie_device.hpp" - +#include "internal_buffer_manager.hpp" namespace hailort { -#define DEFAULT_ACTUAL_BATCH_SIZE (1) - struct EdgeLayer { LayerInfo layer_info; @@ -84,11 +83,10 @@ struct DdrChannelsInfo class ContextResources final { public: - static Expected create(HailoRTDriver &driver, CONTROL_PROTOCOL__context_switch_context_type_t context_type, - const std::vector &config_channels_ids, const ConfigBufferInfoMap &config_buffer_infos); - - const std::vector &get_controls() const; - ContextSwitchBufferBuilder &builder(); + static Expected create(HailoRTDriver &driver, + CONTROL_PROTOCOL__context_switch_context_type_t context_type, const uint16_t context_index, + const std::vector &config_channels_ids, const ConfigBufferInfoMap &config_buffer_infos, + std::shared_ptr internal_buffer_manager); hailo_status add_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id, const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info, const SupportedFeatures &supported_features); @@ -110,21 +108,26 @@ class ContextResources final { const SupportedFeatures &supported_features); std::vector &get_config_buffers(); + CONTROL_PROTOCOL__context_switch_context_type_t get_context_type() const { + return m_context_type; + } private: ContextResources(HailoRTDriver &driver, CONTROL_PROTOCOL__context_switch_context_type_t context_type, - std::vector &&config_buffers) : + std::vector &&config_buffers, std::shared_ptr internal_buffer_manager) : m_driver(std::ref(driver)), - m_builder(context_type), - m_config_buffers(std::move(config_buffers)) + m_context_type(context_type), + m_config_buffers(std::move(config_buffers)), + m_internal_buffer_manager(std::move(internal_buffer_manager)) {} std::reference_wrapper m_driver; - ContextSwitchBufferBuilder m_builder; + CONTROL_PROTOCOL__context_switch_context_type_t m_context_type; std::vector m_config_buffers; std::vector m_edge_layers; std::vector m_ddr_channels_infos; + std::shared_ptr m_internal_buffer_manager; }; class ResourcesManager final @@ -134,23 +137,24 @@ class ResourcesManager final const ConfigureNetworkParams &config_params, std::shared_ptr core_op_metadata, uint8_t core_op_index); - // TODO: HRT-9432 needs to call stop_vdma_interrupts_dispatcher and any other resource on dtor. + // TODO: HRT-9432 needs to call stop_vdma_interrupts_dispatcher and any other resource on dtor. ~ResourcesManager() = default; ResourcesManager(const ResourcesManager &other) = delete; ResourcesManager &operator=(const ResourcesManager &other) = delete; ResourcesManager &operator=(ResourcesManager &&other) = delete; ResourcesManager(ResourcesManager &&other) noexcept; - ExpectedRef create_intermediate_buffer(uint32_t transfer_size, uint16_t batch_size, - uint8_t src_stream_index, uint8_t src_context_index, vdma::ChannelId d2h_channel_id, - IntermediateBuffer::StreamingType streaming_type); + ExpectedRef create_intermediate_buffer( + uint32_t transfer_size, uint16_t batch_size, uint8_t src_stream_index, uint16_t src_context_index, + vdma::ChannelId d2h_channel_id, IntermediateBuffer::StreamingType streaming_type); ExpectedRef get_intermediate_buffer(const IntermediateBufferKey &key); hailo_status create_boundary_vdma_channel(const LayerInfo &layer_info); Expected get_control_core_op_header(); - Expected> add_new_context(CONTROL_PROTOCOL__context_switch_context_type_t type, - const ConfigBufferInfoMap &config_info={}); + Expected> add_new_context( + CONTROL_PROTOCOL__context_switch_context_type_t context_type, + const uint16_t context_index, const ConfigBufferInfoMap &config_info={}); const SupportedFeatures &get_supported_features() const { @@ -181,16 +185,23 @@ class ResourcesManager final return m_boundary_channels; } + std::shared_ptr& get_action_list_buffer_builder() + { + return m_action_list_buffer_builder; + } + Expected get_default_streams_interface(); Expected read_intermediate_buffer(const IntermediateBufferKey &key); hailo_status configure(); - hailo_status enable_state_machine(uint16_t dynamic_batch_size, + hailo_status enable_state_machine(uint16_t dynamic_batch_size, uint16_t batch_count = CONTROL_PROTOCOL__INIFINITE_BATCH_COUNT); hailo_status reset_state_machine(); hailo_status start_vdma_interrupts_dispatcher(); hailo_status stop_vdma_interrupts_dispatcher(); + hailo_status start_vdma_transfer_launcher(); + hailo_status stop_vdma_transfer_launcher(); Expected get_network_batch_size(const std::string &network_name) const; Expected get_boundary_vdma_channel_by_stream_name(const std::string &stream_name); Expected> get_boundary_vdma_channel_by_stream_name(const std::string &stream_name) const; @@ -207,6 +218,24 @@ class ResourcesManager final size_t single_frame_transfer_size, uint32_t infer_cycles); hailo_status set_hw_infer_done_notification(std::condition_variable &infer_done_cond); Expected run_hw_only_infer(); + hailo_status fill_internal_buffers_info(); + static bool should_use_ddr_action_list(size_t num_contexts, HailoRTDriver::DmaType dma_type); + static Expected> create_action_list_buffer_builder( + size_t num_dynamic_contexts, HailoRTDriver &driver); + bool get_can_fast_batch_switch() + { + return m_core_op_metadata->get_can_fast_batch_switch(); + } + + void set_is_activated(bool is_activated) + { + m_is_activated = is_activated; + } + + bool get_is_activated() const + { + return m_is_activated; + } private: hailo_status fill_infer_features(CONTROL_PROTOCOL__application_header_t &app_header); @@ -224,24 +253,29 @@ class ResourcesManager final std::map m_intermediate_buffers; std::shared_ptr m_core_op_metadata; uint8_t m_core_op_index; - uint8_t m_dynamic_context_count; - uint8_t m_total_context_count; + uint16_t m_dynamic_context_count; + uint16_t m_total_context_count; const std::vector m_network_index_map; LatencyMetersMap m_latency_meters; // Latency meter per network // TODO: HRT-9429 - fast access to channel by id, using array, using engine_index and channel_index. std::map m_boundary_channels; bool m_is_configured; + bool m_is_activated; // Config channels ids are shared between all context. The following vector contains the channel id for each // config_stream_index. std::vector m_config_channels_ids; // Mapped buffers would be used only in hw only flow std::vector> m_hw_only_boundary_buffers; + std::shared_ptr m_internal_buffer_manager; + std::shared_ptr m_action_list_buffer_builder; ResourcesManager(VdmaDevice &vdma_device, HailoRTDriver &driver, ChannelAllocator &&channel_allocator, const ConfigureNetworkParams config_params, std::shared_ptr &&core_op_metadata, uint8_t core_op_index, const std::vector &&network_index_map, LatencyMetersMap &&latency_meters, - std::vector &&config_channels_ids); + std::vector &&config_channels_ids, + std::shared_ptr internal_buffer_manager, + std::shared_ptr &&action_list_buffer_builder); }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp index 26d8a1bd..e86c141f 100644 --- a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp +++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp @@ -10,7 +10,7 @@ #include "resource_manager_builder.hpp" #include "device_common/control.hpp" #include "periph_calculator.hpp" - +#include "hef/hef_internal.hpp" namespace hailort { @@ -78,7 +78,7 @@ static Expected calculate_credit_params(const CONTROL_PROTOCOL__hw_co static Expected update_layer_info(const LayerInfo &original_layer_info, const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info, - const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch, const bool should_optimize_credits, + const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const HEFHwArch &hw_arch, const bool should_optimize_credits, const bool is_periph_calculated_in_hailort, const bool is_core_hw_padding_config_in_dfc) { LayerInfo local_layer_info = original_layer_info; @@ -104,7 +104,7 @@ static Expected update_layer_info(const LayerInfo &original_layer_inf static hailo_status fill_boundary_input_layer_impl(ContextResources &context_resources, ResourcesManager &resources_manager, const LayerInfo layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts, - const ProtoHEFHwArch &hw_arch, bool should_optimize_credits) + const HEFHwArch &hw_arch, bool should_optimize_credits) { const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info); @@ -129,7 +129,7 @@ static hailo_status fill_boundary_input_layer_impl(ContextResources &context_res static hailo_status fill_boundary_input_layer(ContextResources &context_resources, ResourcesManager &resources_manager, const LayerInfo layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts, - const ProtoHEFHwArch &hw_arch, bool should_optimize_credits) + const HEFHwArch &hw_arch, bool should_optimize_credits) { if (layer_info.is_multi_planar) { for (auto &plane : layer_info.planes) { @@ -144,7 +144,7 @@ static hailo_status fill_boundary_input_layer(ContextResources &context_resource static hailo_status fill_inter_context_input_layer(ContextResources &context_resources, ResourcesManager &resources_manager, const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts, - const ProtoHEFHwArch &hw_arch, bool should_optimize_credits) + const HEFHwArch &hw_arch, bool should_optimize_credits) { const auto channel_id = resources_manager.get_available_channel_id(to_layer_identifier(layer_info), HailoRTDriver::DmaDirection::H2D, layer_info.dma_engine_index); @@ -168,16 +168,16 @@ static hailo_status fill_inter_context_input_layer(ContextResources &context_res inter_context_buffer.get_host_buffer_info(), resources_manager.get_supported_features()); CHECK_SUCCESS(status); - LOGGER__DEBUG("Intermediate input stream {}, src_context:{}, dst_context: {}, h2d_channel {}.", - layer_info.stream_index, layer_info.context_index, layer_info.connected_context_info.context_index, - channel_id.value()); + LOGGER__DEBUG("Intermediate edge key: {}:{} src_context:{}, dst_context: {}, h2d_channel {}.", + connected_context.context_index, connected_context.stream_index, + layer_info.connected_context_info.context_index, layer_info.context_index, channel_id.value()); return HAILO_SUCCESS; } static hailo_status fill_boundary_output_layer(ContextResources &context_resources, ResourcesManager &resources_manager, const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts, - const ProtoHEFHwArch &hw_arch, bool should_optimize_credits) + const HEFHwArch &hw_arch, bool should_optimize_credits) { const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info); @@ -202,7 +202,7 @@ static hailo_status fill_boundary_output_layer(ContextResources &context_resourc static hailo_status fill_inter_context_output_layer(ContextResources &context_resources, ResourcesManager &resources_manager, const LayerInfo &layer_info, - const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch, bool should_optimize_credits) + const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const HEFHwArch &hw_arch, bool should_optimize_credits) { const auto channel_id = resources_manager.get_available_channel_id(to_layer_identifier(layer_info), HailoRTDriver::DmaDirection::D2H, layer_info.dma_engine_index); @@ -214,8 +214,8 @@ static hailo_status fill_inter_context_output_layer(ContextResources &context_re CHECK_EXPECTED_AS_STATUS(network_batch_size); auto inter_context_buffer_exp = resources_manager.create_intermediate_buffer(frame_credits_in_bytes, - network_batch_size.value(), layer_info.stream_index, layer_info.context_index, channel_id.value(), - IntermediateBuffer::StreamingType::BURST); + network_batch_size.value(), layer_info.stream_index, layer_info.context_index, + channel_id.value(), IntermediateBuffer::StreamingType::BURST); CHECK_EXPECTED_AS_STATUS(inter_context_buffer_exp); auto &inter_context_buffer = inter_context_buffer_exp->get(); @@ -236,7 +236,7 @@ static hailo_status fill_inter_context_output_layer(ContextResources &context_re static hailo_status fill_ddr_output_layer(ContextResources &context_resources, ResourcesManager &resources_manager, const LayerInfo &layer_info, - const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch) + const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const HEFHwArch &hw_arch) { CHECK(resources_manager.get_supported_features().padded_ddr_buffers, HAILO_INVALID_HEF, "Failed opening non-compatible HEF that uses the following deprecated features: host-managed DDR buffers." @@ -299,7 +299,7 @@ static hailo_status fill_ddr_output_layer(ContextResources &context_resources, } static hailo_status fill_ddr_input_layer(ContextResources &context_resources, ResourcesManager &resources_manager, - const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch) + const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const HEFHwArch &hw_arch) { auto connected_stream_index = layer_info.connected_context_info.stream_index; auto ddr_info = context_resources.get_ddr_channels_info(connected_stream_index); @@ -355,7 +355,7 @@ static hailo_status add_ddr_buffers_info(std::vector &config_resources, const bool support_pre_fetch, std::vector &processed_configuration_actions) { assert(ContextSwitchConfigAction::Type::WriteDataCcw == configuration_action->get_type()); - const auto &write_ccw_action = *static_cast(configuration_action.get()); + auto &write_ccw_action = *static_cast(configuration_action.get()); const auto config_stream_index = write_ccw_action.config_stream_index(); assert(config_stream_index < config_resources.size()); - auto status = write_ccw_to_buffer(config_resources[config_stream_index], write_ccw_action, support_pre_fetch); + auto status = write_ccw_action.write_to_config_buffer(config_resources[config_stream_index], support_pre_fetch); CHECK_SUCCESS(status); status = push_fetch_config_actions(config_resources[config_stream_index], config_stream_index, @@ -589,13 +569,6 @@ static hailo_status proccess_write_ccw_action(const ContextSwitchConfigActionPtr return HAILO_SUCCESS; } -// TODO HRT-10073: change to supported features list -static bool is_hailo1x_device_type(const hailo_device_architecture_t dev_arch) -{ - // Compare with HAILO1X device archs - return (HAILO_ARCH_HAILO15H == dev_arch) || (HAILO_ARCH_HAILO15M == dev_arch) || (HAILO_ARCH_PLUTO == dev_arch); -} - static Expected find_dummy_stream(const LayerInfo &layer_info, const ContextResources &context_resources, const bool is_null_shmifo_supported) { @@ -610,9 +583,9 @@ static Expected find_dummy_stream(const LayerInfo &layer_info, const Co } } -static hailo_status add_change_vdma_to_stream_mapping_impl(const ProtoHEFHwArch &hw_arch, +static hailo_status add_change_vdma_to_stream_mapping_impl(const HEFHwArch &hw_arch, const LayerInfo &layer_info, const ResourcesManager &resources_manager, - ContextResources &context_resources, uint8_t context_index, + ContextResources &context_resources, uint16_t context_index, std::vector &processed_configuration_actions) { auto vdma_channel = resources_manager.get_boundary_vdma_channel_by_stream_name(layer_info.name); @@ -623,7 +596,7 @@ static hailo_status add_change_vdma_to_stream_mapping_impl(const ProtoHEFHwArch uint8_t stream_index = layer_info.stream_index; if (is_dummy_stream) { auto dummy_stream_index = find_dummy_stream(layer_info, context_resources, - is_hailo1x_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch))); + HailoRTCommon::is_hailo1x_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch))); CHECK_EXPECTED_AS_STATUS(dummy_stream_index); stream_index = *dummy_stream_index; } @@ -635,9 +608,9 @@ static hailo_status add_change_vdma_to_stream_mapping_impl(const ProtoHEFHwArch return HAILO_SUCCESS; } -static hailo_status add_change_vdma_to_stream_mapping(const ProtoHEFHwArch &hw_arch, +static hailo_status add_change_vdma_to_stream_mapping(const HEFHwArch &hw_arch, const CoreOpMetadata &core_op_metadata, const ResourcesManager &resources_manager, - ContextResources &context_resources, uint8_t context_index, + ContextResources &context_resources, uint16_t context_index, std::vector &processed_configuration_actions) { for (const auto &layer_info : core_op_metadata.get_all_layer_infos()) { @@ -726,7 +699,7 @@ static hailo_status push_edge_layer_activation_actions( return HAILO_SUCCESS; } -static hailo_status proccess_trigger_new_data_input_action(const ProtoHEFHwArch &hw_arch, +static hailo_status proccess_trigger_new_data_input_action(const HEFHwArch &hw_arch, const ContextSwitchConfigActionPtr &configuration_action, uint32_t trigger_new_data_from_input_group_start, uint32_t trigger_new_data_from_input_group_end, @@ -734,7 +707,7 @@ static hailo_status proccess_trigger_new_data_input_action(const ProtoHEFHwArch const CoreOpMetadata &core_op_metadata, const ResourcesManager &resources_manager, ContextResources &context_resources, - uint8_t context_index, + uint16_t context_index, std::vector &processed_configuration_actions, bool is_single_context) { const bool PUSH_ALL_EDGE_LAYERS = false; @@ -782,10 +755,9 @@ static hailo_status proccess_trigger_new_data_input_action(const ProtoHEFHwArch static hailo_status add_fetch_config_actions(std::vector &configuration_actions, std::vector &config_resources, bool support_pre_fetch) { - std::vector processed_configuration_actions; for (uint32_t action_index = 0; action_index < configuration_actions.size(); action_index++) { - const auto &configuration_action = configuration_actions[action_index]; + auto &configuration_action = configuration_actions[action_index]; if (ContextSwitchConfigAction::Type::WriteDataCcw == configuration_action->get_type()) { auto status = proccess_write_ccw_action(configuration_action, config_resources, support_pre_fetch, processed_configuration_actions); @@ -840,9 +812,9 @@ static hailo_status add_config_channel_activation_actions(std::vector &configuration_actions, const CoreOpMetadata &core_op_metadata, - const ResourcesManager &resources_manager, ContextResources &context_resources, uint8_t context_index, + const ResourcesManager &resources_manager, ContextResources &context_resources, uint16_t context_index, bool is_single_context) { const auto repeated_indexes = get_repreated_actions_boundary_indices(configuration_actions); @@ -915,15 +887,21 @@ static hailo_status handle_repeated_actions(std::vector &actions) +static hailo_status write_action_list(const ContextResources & context_resources, + std::shared_ptr &builder, const std::vector &actions) { + // Mark first action buffer of context to know when new context is starting (needed for dynamic contexts) + bool is_first_action_buffer_of_context = true; for (const auto &action : actions) { auto action_buffers = action->serialize(context_resources); CHECK_EXPECTED_AS_STATUS(action_buffers); for (auto &action_buffer : action_buffers.value()) { - builder.write_action(MemoryView(action_buffer)); + const bool last_action_buffer_in_context = (action_buffer == *(action_buffers.value().end() - 1)) && + (action == *(actions.end() - 1)); + builder->write_action(MemoryView(action_buffer), context_resources.get_context_type(), + is_first_action_buffer_of_context, last_action_buffer_in_context); + is_first_action_buffer_of_context = false; } } @@ -952,9 +930,9 @@ static hailo_status add_edge_layer_end_of_context_actions(const ContextResources return HAILO_SUCCESS; } -static hailo_status fill_context_recipes_for_multi_context(const ProtoHEFHwArch &hw_arch, +static hailo_status fill_context_recipes_for_multi_context(const HEFHwArch &hw_arch, ContextResources &context_resources, ResourcesManager &resources_manager, - uint8_t context_index, const CoreOpMetadata &core_op_metadata, const ContextMetadata &context_metadata, + uint16_t context_index, const CoreOpMetadata &core_op_metadata, const ContextMetadata &context_metadata, bool is_single_context) { hailo_status status = HAILO_UNINITIALIZED; @@ -966,7 +944,7 @@ static hailo_status fill_context_recipes_for_multi_context(const ProtoHEFHwArch // Parse context std::vector actions = context_metadata.get_actions(); - const auto support_pre_fetch = is_hailo1x_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch)); + const auto support_pre_fetch = HailoRTCommon::is_hailo1x_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch)); status = add_fetch_config_actions(actions, context_resources.get_config_buffers(), support_pre_fetch); CHECK_SUCCESS(status); @@ -991,7 +969,7 @@ static hailo_status fill_context_recipes_for_multi_context(const ProtoHEFHwArch status = handle_repeated_actions(actions); CHECK_SUCCESS(status); - return write_action_list(context_resources, context_resources.builder(), actions); + return write_action_list(context_resources, resources_manager.get_action_list_buffer_builder(), actions); } static hailo_status create_boundary_channels(ResourcesManager &resources_manager, @@ -1013,7 +991,7 @@ static hailo_status create_boundary_channels(ResourcesManager &resources_manager static hailo_status fill_activation_config_recepies_for_multi_context( ContextResources &context_resources, ResourcesManager &resources_manager, - std::shared_ptr core_op_metadata, const ProtoHEFHwArch &hw_arch) + std::shared_ptr core_op_metadata, const HEFHwArch &hw_arch) { auto hw_consts = Control::get_hw_consts(resources_manager.get_device()); CHECK_EXPECTED_AS_STATUS(hw_consts); @@ -1045,7 +1023,7 @@ static hailo_status fill_activation_config_recepies_for_multi_context( actions.emplace_back(action.release()); } - return write_action_list(context_resources, context_resources.builder(), actions); + return write_action_list(context_resources, resources_manager.get_action_list_buffer_builder(), actions); } static Expected create_switch_lcu_batch_action(const ContextSwitchConfigActionPtr action, @@ -1057,8 +1035,9 @@ static Expected create_switch_lcu_batch_action(con uint32_t kernel_done_count = 0; CHECK_AS_EXPECTED((ContextSwitchConfigAction::Type::EnableLcuDefault == action->get_type()) || + (ContextSwitchConfigAction::Type::SwitchLcuBatch == action->get_type()) || (ContextSwitchConfigAction::Type::EnableLcuNonDefault == action->get_type()), HAILO_INVALID_ARGUMENT, - "Invalid action type - must be enable lcu (default or non default), Received type {}", action->get_type()); + "Invalid action type - must be enable lcu (default or non default) or switch lcu batch, Received type {}", action->get_type()); const auto params_buffer = action->serialize_params(context_resources); CHECK_EXPECTED(params_buffer); @@ -1069,19 +1048,25 @@ static Expected create_switch_lcu_batch_action(con lcu_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_LCU_INDEX_READ(params->packed_lcu_id); network_index = params->network_index; kernel_done_count = CONTEXT_SWITCH_DEFS__ENABLE_LCU_DEFAULT_KERNEL_COUNT; - } else { + } else if (ContextSwitchConfigAction::Type::EnableLcuNonDefault == action->get_type()) { const auto params = reinterpret_cast(params_buffer.value().data()); cluster_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_CLUSTER_INDEX_READ(params->packed_lcu_id); lcu_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_LCU_INDEX_READ(params->packed_lcu_id); network_index = params->network_index; kernel_done_count = params->kernel_done_count; + } else if (ContextSwitchConfigAction::Type::SwitchLcuBatch == action->get_type()) { + const auto params = reinterpret_cast(params_buffer.value().data()); + cluster_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_CLUSTER_INDEX_READ(params->packed_lcu_id); + lcu_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_LCU_INDEX_READ(params->packed_lcu_id); + network_index = params->network_index; + kernel_done_count = params->kernel_done_count; } return SwitchLcuBatchAction::create(cluster_index, lcu_index, network_index, kernel_done_count); } static hailo_status fill_batch_switching_context_edge_layers(ContextResources &context_resources, const CoreOpMetadata &core_op_metadata, ResourcesManager &resources_manager, - const ProtoHEFHwArch &hw_arch) + const HEFHwArch &hw_arch) { auto hw_consts = Control::get_hw_consts(resources_manager.get_device()); CHECK_EXPECTED_AS_STATUS(hw_consts); @@ -1130,7 +1115,8 @@ static hailo_status add_lcu_actions_to_batch_switch_context(ContextResources &co // In the batch switch context static const std::set ENABLE_LCU_ACTIONS = { ContextSwitchConfigAction::Type::EnableLcuDefault, - ContextSwitchConfigAction::Type::EnableLcuNonDefault + ContextSwitchConfigAction::Type::EnableLcuNonDefault, + ContextSwitchConfigAction::Type::SwitchLcuBatch }; const auto lcu_batch_switch_actions = core_op_metadata.preliminary_context().get_actions_of_type(ENABLE_LCU_ACTIONS); @@ -1161,7 +1147,7 @@ static hailo_status create_change_boundary_input_batch_actions(const ContextReso } static hailo_status add_edge_layers_actions_to_batch_switch_context(ContextResources &context_resources, const CoreOpMetadata &core_op_metadata, - ResourcesManager &resources_manager, const ProtoHEFHwArch &hw_arch, std::vector &actions) + ResourcesManager &resources_manager, const HEFHwArch &hw_arch, std::vector &actions) { auto status = fill_batch_switching_context_edge_layers(context_resources, core_op_metadata, resources_manager, hw_arch); CHECK_SUCCESS(status); @@ -1197,7 +1183,7 @@ static hailo_status add_edge_layers_actions_to_batch_switch_context(ContextResou static hailo_status fill_batch_switching_context_config_recepies_for_multi_context( ContextResources &context_resources, const CoreOpMetadata &core_op_metadata, ResourcesManager &resources_manager, - const ProtoHEFHwArch &hw_arch) + const HEFHwArch &hw_arch) { std::vector actions; @@ -1210,10 +1196,10 @@ static hailo_status fill_batch_switching_context_config_recepies_for_multi_conte status = handle_repeated_actions(actions); CHECK_SUCCESS(status); - return write_action_list(context_resources, context_resources.builder(), actions); + return write_action_list(context_resources, resources_manager.get_action_list_buffer_builder(), actions); } -static hailo_status fill_preliminary_config_recepies_for_multi_context(const ProtoHEFHwArch &hw_arch, +static hailo_status fill_preliminary_config_recepies_for_multi_context(const HEFHwArch &hw_arch, ContextResources &context_resources, ResourcesManager &resources_manager, std::shared_ptr core_op_metadata, const ContextMetadata &preliminary_context, bool is_single_context) @@ -1231,7 +1217,7 @@ static hailo_status fill_preliminary_config_recepies_for_multi_context(const Pro // Parse preliminary config std::vector actions = preliminary_context.get_actions(); - const auto support_pre_fetch = is_hailo1x_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch)); + const auto support_pre_fetch = HailoRTCommon::is_hailo1x_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch)); auto status = add_fetch_config_actions(actions, context_resources.get_config_buffers(), support_pre_fetch); CHECK_SUCCESS(status); @@ -1247,14 +1233,12 @@ static hailo_status fill_preliminary_config_recepies_for_multi_context(const Pro status = handle_repeated_actions(actions); CHECK_SUCCESS(status); - return write_action_list(context_resources, context_resources.builder(), actions); + return write_action_list(context_resources, resources_manager.get_action_list_buffer_builder(), actions); } - - Expected> ResourcesManagerBuilder::build(uint8_t current_core_op_index, VdmaDevice &device, HailoRTDriver &driver, const ConfigureNetworkParams &config_params, - std::shared_ptr core_op_metadata, const ProtoHEFHwArch &hw_arch) + std::shared_ptr core_op_metadata, const HEFHwArch &hw_arch, std::shared_ptr shef_file_handle) { const auto num_contexts = core_op_metadata->dynamic_contexts().size() + CONTROL_PROTOCOL__CONTEXT_SWITCH_NUMBER_OF_NON_DYNAMIC_CONTEXTS; @@ -1278,13 +1262,24 @@ Expected> ResourcesManagerBuilder::build(uint8 auto status = create_boundary_channels(resources_manager.value(), *core_op_metadata); CHECK_SUCCESS_AS_EXPECTED(status); - auto activation_context = resources_manager->add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_ACTIVATION); + status = resources_manager->fill_internal_buffers_info(); + CHECK_SUCCESS_AS_EXPECTED(status); + + // No allocation of edge layers in the activation context. No need for context index here + auto INVLID_CONTEXT_INDEX = static_cast(UINT16_MAX); + auto ACTIVATION_CONTEXT_INDEX = INVLID_CONTEXT_INDEX; + + auto activation_context = resources_manager->add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_ACTIVATION, + ACTIVATION_CONTEXT_INDEX); CHECK_EXPECTED(activation_context); status = fill_activation_config_recepies_for_multi_context(activation_context.value().get(), resources_manager.value(), core_op_metadata, hw_arch); CHECK_SUCCESS_AS_EXPECTED(status); - auto batch_switching_context = resources_manager->add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_BATCH_SWITCHING); + // No allocation of edge layers in the batch switching context. No need for context index here + auto BATCH_SWITCH_CONTEXT_INDEX = INVLID_CONTEXT_INDEX; + auto batch_switching_context = resources_manager->add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_BATCH_SWITCHING, + BATCH_SWITCH_CONTEXT_INDEX); CHECK_EXPECTED(batch_switching_context); status = fill_batch_switching_context_config_recepies_for_multi_context(batch_switching_context.value().get(), *core_op_metadata, resources_manager.value(), hw_arch); @@ -1292,17 +1287,25 @@ Expected> ResourcesManagerBuilder::build(uint8 const bool is_single_context = core_op_metadata->dynamic_contexts().size() == 1; + if (nullptr != shef_file_handle) { + // We will start reading CCWs from the HEF file so we need to open it + status = shef_file_handle->open(); + CHECK_SUCCESS_AS_EXPECTED(status); + } + + auto PRELIMINARY_CONTEXT_INDEX = static_cast(0); auto preliminary_context = resources_manager->add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_PRELIMINARY, - core_op_metadata->preliminary_context().config_buffers_info()); + PRELIMINARY_CONTEXT_INDEX, core_op_metadata->preliminary_context().config_buffers_info()); CHECK_EXPECTED(preliminary_context); status = fill_preliminary_config_recepies_for_multi_context(hw_arch, preliminary_context.value().get(), resources_manager.value(), core_op_metadata, core_op_metadata->preliminary_context(), is_single_context); CHECK_SUCCESS_AS_EXPECTED(status); - uint8_t context_index = 0; + uint16_t context_index = 0; + auto FIRST_DYNAMIC_CONTEXT_INDEX = 1; for (const auto &context_metadata : core_op_metadata->dynamic_contexts()) { auto new_context = resources_manager->add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_DYNAMIC, - context_metadata.config_buffers_info()); + static_cast(FIRST_DYNAMIC_CONTEXT_INDEX + context_index), context_metadata.config_buffers_info()); CHECK_EXPECTED(new_context); status = fill_context_recipes_for_multi_context(hw_arch, new_context.value().get(), resources_manager.value(), @@ -1313,6 +1316,11 @@ Expected> ResourcesManagerBuilder::build(uint8 context_index++; } + if (nullptr != shef_file_handle) { + status = shef_file_handle->close(); + CHECK_SUCCESS_AS_EXPECTED(status); + } + status = resources_manager->configure(); CHECK_SUCCESS_AS_EXPECTED(status); diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.hpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.hpp index e596cb06..a97f9552 100644 --- a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.hpp +++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.hpp @@ -10,21 +10,20 @@ #ifndef _HAILO_RESOURCE_MANAGER_BUILDER_HPP_ #define _HAILO_RESOURCE_MANAGER_BUILDER_HPP_ -#include "hef/hef_internal.hpp" #include "core_op/resource_manager/resource_manager.hpp" namespace hailort { +class ShefFileHandle; class ResourcesManagerBuilder final { public: ResourcesManagerBuilder() = delete; - /* TODO HRT-5067 - work with hailo_device_architecture_t instead of ProtoHEFHwArch */ static Expected> build(uint8_t net_group_index, VdmaDevice &device, HailoRTDriver &driver, const ConfigureNetworkParams &config_params, - std::shared_ptr core_op, const ProtoHEFHwArch &hw_arch); + std::shared_ptr core_op, const HEFHwArch &hw_arch, std::shared_ptr shef_file_handle); }; diff --git a/hailort/libhailort/src/device_common/control.cpp b/hailort/libhailort/src/device_common/control.cpp index 1ad286ce..ae9249b3 100644 --- a/hailort/libhailort/src/device_common/control.cpp +++ b/hailort/libhailort/src/device_common/control.cpp @@ -10,7 +10,8 @@ #include "common/utils.hpp" #include "common/logger_macros.hpp" -#include "hef/hef_internal.hpp" +#include "hailo/hailort_common.hpp" +#include "hef/core_op_metadata.hpp" #include "device_common/control.hpp" #include "hw_consts.hpp" #include "utils/soc_utils/partial_cluster_reader.hpp" @@ -91,9 +92,8 @@ Expected control__parse_identify_results(CONTROL_PROTOC // Device architecture can be HAILO_ARCH_HAILO15H or HAILO_ARCH_HAILO15M - but the FW will always return HAILO_ARCH_HAILO15H // Based on a file the SCU gives us we can deduce the actual type if (HAILO_ARCH_HAILO15H == board_info.device_architecture) { - auto dev_arch_exp = PartialClusterReader::get_actual_dev_arch_from_fuse(board_info.device_architecture); - CHECK_EXPECTED(dev_arch_exp); - board_info.device_architecture = dev_arch_exp.release(); + TRY(const auto dev_arch, PartialClusterReader::get_actual_dev_arch_from_fuse(board_info.device_architecture)); + board_info.device_architecture = dev_arch; } /* Write identify results to log */ @@ -208,6 +208,57 @@ hailo_status control__parse_core_identify_results(CONTROL_PROTOCOL__core_identif return HAILO_SUCCESS; } +hailo_status log_detailed_fw_error(const Device &device, const CONTROL_PROTOCOL__status_t &fw_status, const CONTROL_PROTOCOL__OPCODE_t opcode) +{ + const char *firmware_status_text = NULL; + // Special care for user_config_examine - warning log will be printed if not loaded, since it can happen on happy-flow (e.g. no EEPROM) + if ((fw_status.major_status == CONTROL_PROTOCOL_STATUS_USER_CONFIG_EXAMINE_FAILED) && + (fw_status.minor_status == FIRMWARE_CONFIGS_STATUS_USER_CONFIG_NOT_LOADED)) { + LOGGER__WARNING("Failed to examine user config, as it is not loaded or is not supported by the device."); + } + + LOGGER__ERROR("Firmware control has failed. Major status: {:#x}, Minor status: {:#x}", + fw_status.major_status, + fw_status.minor_status); + auto common_status = FIRMWARE_STATUS__get_textual((FIRMWARE_STATUS_t)fw_status.major_status, &firmware_status_text); + if (HAILO_COMMON_STATUS__SUCCESS == common_status) { + LOGGER__ERROR("Firmware major status: {}", firmware_status_text); + } else { + LOGGER__ERROR("Cannot find textual address for firmware status {:#x}, common_status = {}", + (FIRMWARE_STATUS_t)fw_status.major_status, common_status); + } + common_status = FIRMWARE_STATUS__get_textual((FIRMWARE_STATUS_t)fw_status.minor_status, &firmware_status_text); + if (HAILO_COMMON_STATUS__SUCCESS == common_status) { + LOGGER__ERROR("Firmware minor status: {}", firmware_status_text); + } else { + LOGGER__ERROR("Cannot find textual address for firmware status {:#x}, common_status = {}", + (FIRMWARE_STATUS_t)fw_status.minor_status, common_status); + } + + if ((CONTROL_PROTOCOL_STATUS_CONTROL_UNSUPPORTED == fw_status.minor_status) || + (CONTROL_PROTOCOL_STATUS_CONTROL_UNSUPPORTED == fw_status.major_status)) { + auto device_arch = device.get_architecture(); + auto dev_arch_str = (device_arch) ? HailoRTCommon::get_device_arch_str(*device_arch) : "Unable to parse arch"; + LOGGER__ERROR("Opcode {} is not supported on the device." \ + " This error usually occurs when the control is not supported for the device arch - ({}), or not compiled to the FW", + CONTROL_PROTOCOL__get_textual_opcode(opcode), dev_arch_str); + } + + if ((CONTROL_PROTOCOL_STATUS_UNSUPPORTED_DEVICE == fw_status.minor_status) || + (CONTROL_PROTOCOL_STATUS_UNSUPPORTED_DEVICE == fw_status.major_status)) { + LOGGER__ERROR("Opcode {} is not supported on the current board.", CONTROL_PROTOCOL__get_textual_opcode(opcode)); + return HAILO_UNSUPPORTED_OPCODE; + } + + if ((HAILO_CONTROL_STATUS_UNSUPPORTED_OPCODE == fw_status.minor_status) || + (HAILO_CONTROL_STATUS_UNSUPPORTED_OPCODE == fw_status.major_status)) { + LOGGER__ERROR("Opcode {} is not supported", CONTROL_PROTOCOL__get_textual_opcode(opcode)); + return HAILO_UNSUPPORTED_OPCODE; + } + + return HAILO_FW_CONTROL_FAILURE; +} + hailo_status Control::parse_and_validate_response(uint8_t *message, uint32_t message_size, CONTROL_PROTOCOL__response_header_t **header, CONTROL_PROTOCOL__payload_t **payload, CONTROL_PROTOCOL__request_t *request, Device &device) @@ -215,7 +266,6 @@ hailo_status Control::parse_and_validate_response(uint8_t *message, uint32_t mes hailo_status status = HAILO_UNINITIALIZED; HAILO_COMMON_STATUS_t common_status = HAILO_COMMON_STATUS__UNINITIALIZED; CONTROL_PROTOCOL__status_t fw_status = {}; - const char *firmware_status_text = NULL; /* Parse the response */ common_status = CONTROL_PROTOCOL__parse_response(message, message_size, header, payload, &fw_status); @@ -228,51 +278,12 @@ hailo_status Control::parse_and_validate_response(uint8_t *message, uint32_t mes if (HAILO_SUCCESS != status) { goto exit; } - /* Valdiate response was succesfull - both major and minor should be error free */ + /* Validate response was successful - both major and minor should be error free */ if (0 != fw_status.major_status) { - status = HAILO_FW_CONTROL_FAILURE; - LOGGER__ERROR("Firmware control has failed. Major status: {:#x}, Minor status: {:#x}", - fw_status.major_status, - fw_status.minor_status); - common_status = FIRMWARE_STATUS__get_textual((FIRMWARE_STATUS_t)fw_status.major_status, &firmware_status_text); - if (HAILO_COMMON_STATUS__SUCCESS == common_status) { - LOGGER__ERROR("Firmware major status: {}", firmware_status_text); - } else { - LOGGER__ERROR("Cannot find textual address for firmware status {:#x}, common_status = {}", - (FIRMWARE_STATUS_t)fw_status.major_status, common_status); - } - common_status = FIRMWARE_STATUS__get_textual((FIRMWARE_STATUS_t)fw_status.minor_status, &firmware_status_text); - if (HAILO_COMMON_STATUS__SUCCESS == common_status) { - LOGGER__ERROR("Firmware minor status: {}", firmware_status_text); - } else { - LOGGER__ERROR("Cannot find textual address for firmware status {:#x}, common_status = {}", - (FIRMWARE_STATUS_t)fw_status.minor_status, common_status); - } - - if ((CONTROL_PROTOCOL_STATUS_CONTROL_UNSUPPORTED == fw_status.minor_status) || - (CONTROL_PROTOCOL_STATUS_CONTROL_UNSUPPORTED == fw_status.major_status)) { - auto device_arch = device.get_architecture(); - auto dev_arch_str = (device_arch) ? HailoRTCommon::get_device_arch_str(*device_arch) : "Unable to parse arch"; - LOGGER__ERROR("Opcode {} is not supported on the device." \ - " This error usually occurs when the control is not supported for the device arch - ({}), or not compiled to the FW", - CONTROL_PROTOCOL__get_textual_opcode((CONTROL_PROTOCOL__OPCODE_t)BYTE_ORDER__ntohl(request->header.common_header.opcode)), - dev_arch_str); - } - - if ((CONTROL_PROTOCOL_STATUS_UNSUPPORTED_DEVICE == fw_status.minor_status) || - (CONTROL_PROTOCOL_STATUS_UNSUPPORTED_DEVICE == fw_status.major_status)) { - LOGGER__ERROR("Opcode {} is not supported on the current board.", - CONTROL_PROTOCOL__get_textual_opcode((CONTROL_PROTOCOL__OPCODE_t)BYTE_ORDER__ntohl(request->header.common_header.opcode))); - } - - if ((HAILO_CONTROL_STATUS_UNSUPPORTED_OPCODE == fw_status.minor_status) || - (HAILO_CONTROL_STATUS_UNSUPPORTED_OPCODE == fw_status.major_status)) { - status = HAILO_UNSUPPORTED_OPCODE; - LOGGER__ERROR("Opcode {} is not supported", - CONTROL_PROTOCOL__get_textual_opcode((CONTROL_PROTOCOL__OPCODE_t)BYTE_ORDER__ntohl(request->header.common_header.opcode))); - } - + status = log_detailed_fw_error(device, fw_status, + static_cast(BYTE_ORDER__ntohl(request->header.common_header.opcode))); goto exit; + } /* Validate response opcode is same as request */ @@ -2382,7 +2393,7 @@ hailo_status Control::context_switch_set_network_group_header(Device &device, } hailo_status Control::context_switch_set_context_info_chunk(Device &device, - const CONTROL_PROTOCOL__context_switch_context_info_single_control_t &context_info) + const CONTROL_PROTOCOL__context_switch_context_info_chunk_t &context_info) { hailo_status status = HAILO_UNINITIALIZED; HAILO_COMMON_STATUS_t common_status = HAILO_COMMON_STATUS__UNINITIALIZED; @@ -2422,7 +2433,7 @@ hailo_status Control::context_switch_set_context_info_chunk(Device &device, } hailo_status Control::context_switch_set_context_info(Device &device, - const std::vector &context_infos) + const std::vector &context_infos) { for (const auto &context_info : context_infos) { auto status = context_switch_set_context_info_chunk(device, context_info); @@ -2543,7 +2554,7 @@ hailo_status Control::set_pause_frames(Device &device, uint8_t rx_pause_frames_e } hailo_status Control::download_context_action_list_chunk(Device &device, uint32_t network_group_id, - CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint8_t context_index, + CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index, uint16_t action_list_offset, size_t action_list_max_size, uint32_t *base_address, uint8_t *action_list, uint16_t *action_list_length, bool *is_action_list_end, uint32_t *batch_counter) { @@ -2614,7 +2625,7 @@ hailo_status Control::download_context_action_list_chunk(Device &device, uint32_ } hailo_status Control::download_context_action_list(Device &device, uint32_t network_group_id, - CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint8_t context_index, size_t action_list_max_size, + CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index, size_t action_list_max_size, uint32_t *base_address, uint8_t *action_list, uint16_t *action_list_length, uint32_t *batch_counter) { hailo_status status = HAILO_UNINITIALIZED; @@ -3073,14 +3084,10 @@ Expected Control::get_partial_clusters_layout_bitmap(Device &device) return std::stoi(std::string(force_layout_env)); } - auto dev_arch_exp = device.get_architecture(); - CHECK_EXPECTED(dev_arch_exp); - const auto dev_arch = dev_arch_exp.release(); + TRY(const auto dev_arch, device.get_architecture()); // In Both cases of Hailo15H and Hailo15M read fuse file (If no file found will return default value of all clusters) if ((HAILO_ARCH_HAILO15H == dev_arch) || (HAILO_ARCH_HAILO15M == dev_arch)) { - auto bitmap_exp = PartialClusterReader::get_partial_clusters_layout_bitmap(dev_arch); - CHECK_EXPECTED(bitmap_exp); - const auto bitmap = bitmap_exp.release(); + TRY(const auto bitmap, PartialClusterReader::get_partial_clusters_layout_bitmap(dev_arch)); if (PARTIAL_CLUSTERS_LAYOUT_BITMAP__HAILO15_DEFAULT == bitmap) { return Expected(PARTIAL_CLUSTERS_LAYOUT_IGNORE); } else { diff --git a/hailort/libhailort/src/device_common/control.hpp b/hailort/libhailort/src/device_common/control.hpp index 01a180fc..6b0ed3dc 100644 --- a/hailort/libhailort/src/device_common/control.hpp +++ b/hailort/libhailort/src/device_common/control.hpp @@ -280,7 +280,7 @@ class Control final */ // TODO: fix static hailo_status download_context_action_list(Device &device, uint32_t network_group_id, - CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint8_t context_index, + CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index, size_t action_list_max_size, uint32_t *base_address, uint8_t *action_list, uint16_t *action_list_length, uint32_t *batch_counter); @@ -343,7 +343,7 @@ class Control final static hailo_status write_memory(Device &device, uint32_t address, const uint8_t *data, uint32_t data_length); static hailo_status read_memory(Device &device, uint32_t address, uint8_t *data, uint32_t data_length); static hailo_status context_switch_set_context_info(Device &device, - const std::vector &context_infos); + const std::vector &context_infos); static hailo_status context_switch_set_network_group_header(Device &device, const CONTROL_PROTOCOL__application_header_t &network_group_header); static hailo_status wd_enable(Device &device, uint8_t cpu_id, bool should_enable); @@ -398,11 +398,11 @@ class Control final uint8_t *buffer, uint32_t *actual_read_data_length); static hailo_status write_user_config_chunk(Device &device, uint32_t offset, const uint8_t *data, uint32_t chunk_size); static hailo_status download_context_action_list_chunk(Device &device, uint32_t network_group_id, - CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint8_t context_index, uint16_t action_list_offset, + CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index, uint16_t action_list_offset, size_t action_list_max_size, uint32_t *base_address, uint8_t *action_list, uint16_t *action_list_length, bool *is_action_list_end, uint32_t *batch_counter); static hailo_status context_switch_set_context_info_chunk(Device &device, - const CONTROL_PROTOCOL__context_switch_context_info_single_control_t &context_info); + const CONTROL_PROTOCOL__context_switch_context_info_chunk_t &context_info); static hailo_status change_context_switch_status(Device &device, CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_t state_machine_status, uint8_t network_group_index, uint16_t dynamic_batch_size, uint16_t batch_count); diff --git a/hailort/libhailort/src/device_common/control_protocol.cpp b/hailort/libhailort/src/device_common/control_protocol.cpp index aad1a2b7..af0ad08a 100644 --- a/hailort/libhailort/src/device_common/control_protocol.cpp +++ b/hailort/libhailort/src/device_common/control_protocol.cpp @@ -59,6 +59,10 @@ const char *CONTROL_PROTOCOL__get_textual_opcode(CONTROL_PROTOCOL__OPCODE_t opco #define CHANGE_HW_INFER_REQUEST_PARAMETER_COUNT (5) +#define CHECK_NOT_NULL_COMMON_STATUS(arg, status) _CHECK(nullptr != (arg), (status), "CHECK_NOT_NULL for {} failed", #arg) +#define CHECK_COMMON_STATUS(cond, ret_val, ...) \ + _CHECK((cond), (ret_val), CONSTRUCT_MSG("CHECK failed", ##__VA_ARGS__)) + /* Functions declarations */ HAILO_COMMON_STATUS_t control_protocol__parse_message(uint8_t *message, uint32_t message_size, @@ -252,11 +256,11 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_set_fw_logger_request(CONTROL_PROTO { size_t local_request_size = 0; - CHECK(request != nullptr, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); - CHECK(request_size != nullptr, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); + CHECK_COMMON_STATUS(request != nullptr, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); + CHECK_COMMON_STATUS(request_size != nullptr, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); - CHECK(level <= (uint8_t) CONTROL_PROTOCOL__FW_MAX_LOGGER_LEVEL, HAILO_STATUS__CONTROL_PROTOCOL__INVALID_ARGUMENT); - CHECK(interface_mask <= CONTROL_PROTOCOL__FW_MAX_LOGGER_INTERFACE, HAILO_STATUS__CONTROL_PROTOCOL__INVALID_ARGUMENT); + CHECK_COMMON_STATUS(level <= (uint8_t) CONTROL_PROTOCOL__FW_MAX_LOGGER_LEVEL, HAILO_STATUS__CONTROL_PROTOCOL__INVALID_ARGUMENT); + CHECK_COMMON_STATUS(interface_mask <= CONTROL_PROTOCOL__FW_MAX_LOGGER_INTERFACE, HAILO_STATUS__CONTROL_PROTOCOL__INVALID_ARGUMENT); static_assert((uint32_t) FW_LOGGER_LEVEL_TRACE == (uint32_t) HAILO_FW_LOGGER_LEVEL_TRACE, "mismatch in FW_LOGGER_LEVEL_TRACE and HAILO_FW_LOGGER_LEVEL_TRACE"); @@ -294,8 +298,8 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_set_throttling_state_request(CONTRO { size_t local_request_size = 0; - CHECK_NOT_NULL(request, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); - CHECK_NOT_NULL(request_size, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); + CHECK_NOT_NULL_COMMON_STATUS(request, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); + CHECK_NOT_NULL_COMMON_STATUS(request_size, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); /* Header */ local_request_size = CONTROL_PROTOCOL__REQUEST_BASE_SIZE + sizeof(CONTROL_PROTOCOL__set_throttling_state_request_t); @@ -318,8 +322,8 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_set_overcurrent_state_request(CONTR { size_t local_request_size = 0; - CHECK_NOT_NULL(request, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); - CHECK_NOT_NULL(request_size, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); + CHECK_NOT_NULL_COMMON_STATUS(request, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); + CHECK_NOT_NULL_COMMON_STATUS(request_size, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); /* Header */ local_request_size = CONTROL_PROTOCOL__REQUEST_BASE_SIZE + sizeof(CONTROL_PROTOCOL__set_overcurrent_state_request_t); @@ -347,8 +351,8 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_set_clock_freq_request(CONTROL_PROT { size_t local_request_size = 0; - CHECK(request != nullptr, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); - CHECK(request_size != nullptr, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); + CHECK_COMMON_STATUS(request != nullptr, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); + CHECK_COMMON_STATUS(request_size != nullptr, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); /* Header */ local_request_size = CONTROL_PROTOCOL__REQUEST_BASE_SIZE + sizeof(CONTROL_PROTOCOL__set_clock_freq_request_t); @@ -1663,7 +1667,7 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_context_switch_set_network_group_he HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_context_switch_set_context_info_request( CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, - const CONTROL_PROTOCOL__context_switch_context_info_single_control_t *context_info) + const CONTROL_PROTOCOL__context_switch_context_info_chunk_t *context_info) { HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED; size_t local_request_size = 0; @@ -1678,17 +1682,17 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_context_switch_set_context_info_req sizeof(CONTROL_PROTOCOL__context_switch_set_context_info_request_t) + context_info->context_network_data_length; control_protocol__pack_request_header(request, sequence, HAILO_CONTROL_OPCODE_CONTEXT_SWITCH_SET_CONTEXT_INFO, 4); - /* is_first_control_per_context */ - request->parameters.context_switch_set_context_info_request.is_first_control_per_context_length = - BYTE_ORDER__htonl(sizeof(request->parameters.context_switch_set_context_info_request.is_first_control_per_context)); - request->parameters.context_switch_set_context_info_request.is_first_control_per_context = - context_info->is_first_control_per_context; + /* is_first_chunk_per_context */ + request->parameters.context_switch_set_context_info_request.is_first_chunk_per_context_length = + BYTE_ORDER__htonl(sizeof(request->parameters.context_switch_set_context_info_request.is_first_chunk_per_context)); + request->parameters.context_switch_set_context_info_request.is_first_chunk_per_context = + context_info->is_first_chunk_per_context; - /* is_last_control_per_context */ - request->parameters.context_switch_set_context_info_request.is_last_control_per_context_length = - BYTE_ORDER__htonl(sizeof(request->parameters.context_switch_set_context_info_request.is_last_control_per_context)); - request->parameters.context_switch_set_context_info_request.is_last_control_per_context = - context_info->is_last_control_per_context; + /* is_last_chunk_per_context */ + request->parameters.context_switch_set_context_info_request.is_last_chunk_per_context_length = + BYTE_ORDER__htonl(sizeof(request->parameters.context_switch_set_context_info_request.is_last_chunk_per_context)); + request->parameters.context_switch_set_context_info_request.is_last_chunk_per_context = + context_info->is_last_chunk_per_context; /* context_type */ request->parameters.context_switch_set_context_info_request.context_type_length = @@ -1751,8 +1755,8 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_set_pause_frames_request(CONTROL_PR size_t *request_size, uint32_t sequence, uint8_t rx_pause_frames_enable) { - CHECK_NOT_NULL(request, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); - CHECK_NOT_NULL(request_size, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); + CHECK_NOT_NULL_COMMON_STATUS(request, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); + CHECK_NOT_NULL_COMMON_STATUS(request_size, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); /* Header */ size_t local_request_size = CONTROL_PROTOCOL__REQUEST_BASE_SIZE + sizeof(CONTROL_PROTOCOL__set_pause_frames_t); @@ -1770,7 +1774,7 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_set_pause_frames_request(CONTROL_PR HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_download_context_action_list_request(CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, uint32_t network_group_id, - CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint8_t context_index, uint16_t action_list_offset) + CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index, uint16_t action_list_offset) { HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED; size_t local_request_size = 0; @@ -2325,8 +2329,8 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_run_bist_test_request( { size_t local_request_size = 0; - CHECK_NOT_NULL(request, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); - CHECK_NOT_NULL(request_size, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); + CHECK_NOT_NULL_COMMON_STATUS(request, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); + CHECK_NOT_NULL_COMMON_STATUS(request_size, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED); /* Header */ local_request_size = CONTROL_PROTOCOL__REQUEST_BASE_SIZE + diff --git a/hailort/libhailort/src/device_common/control_protocol.hpp b/hailort/libhailort/src/device_common/control_protocol.hpp index ae0b9674..5fb914fd 100644 --- a/hailort/libhailort/src/device_common/control_protocol.hpp +++ b/hailort/libhailort/src/device_common/control_protocol.hpp @@ -97,12 +97,12 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_context_switch_set_network_group_he const CONTROL_PROTOCOL__application_header_t *network_group_header); HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_context_switch_set_context_info_request( CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, - const CONTROL_PROTOCOL__context_switch_context_info_single_control_t *context_info); + const CONTROL_PROTOCOL__context_switch_context_info_chunk_t *context_info); HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_idle_time_set_measuremment_request(CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, uint8_t measurement_enable); HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_idle_time_get_measuremment_request(CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence); HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_download_context_action_list_request(CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, uint32_t network_group_id, - CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint8_t context_index, uint16_t action_list_offset); + CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index, uint16_t action_list_offset); HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_change_context_switch_status_request( CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_t state_machine_status, uint8_t application_index, diff --git a/hailort/libhailort/src/device_common/d2h_events_parser.cpp b/hailort/libhailort/src/device_common/d2h_events_parser.cpp index 5ac599fa..e60f7c36 100644 --- a/hailort/libhailort/src/device_common/d2h_events_parser.cpp +++ b/hailort/libhailort/src/device_common/d2h_events_parser.cpp @@ -30,6 +30,9 @@ using namespace hailort; /* Function prototype for control operations */ typedef HAILO_COMMON_STATUS_t (*firmware_notifications_parser_t) (D2H_EVENT_MESSAGE_t *d2h_notification_message); +#define CHECK_COMMON_STATUS(cond, ret_val, ...) \ + _CHECK((cond), (ret_val), CONSTRUCT_MSG("CHECK failed", ##__VA_ARGS__)) + /********************************************************************** * Private Declarations **********************************************************************/ @@ -328,11 +331,11 @@ static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_health_monitor_cpu_ecc_error_noti { HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED; - CHECK(D2H_EVENT_HEALTH_MONITOR_CPU_ECC_EVENT_PARAMETER_COUNT == d2h_notification_message->header.parameter_count, + CHECK_COMMON_STATUS(D2H_EVENT_HEALTH_MONITOR_CPU_ECC_EVENT_PARAMETER_COUNT == d2h_notification_message->header.parameter_count, HAILO_STATUS__D2H_EVENTS__INCORRECT_PARAMETER_COUNT, "d2h event invalid parameter count: {}", d2h_notification_message->header.parameter_count); - CHECK(sizeof(d2h_notification_message->message_parameters.health_monitor_cpu_ecc_event) == d2h_notification_message->header.payload_length, + CHECK_COMMON_STATUS(sizeof(d2h_notification_message->message_parameters.health_monitor_cpu_ecc_event) == d2h_notification_message->header.payload_length, HAILO_STATUS__D2H_EVENTS__INCORRECT_PARAMETER_LENGTH, "d2h event invalid payload_length: {}", d2h_notification_message->header.payload_length); @@ -374,11 +377,11 @@ static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_context_switch_breakpoint_reached { HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED; - CHECK(D2H_EVENT_CONTEXT_SWITCH_BREAKPOINT_REACHED_EVENT_PARAMETER_COUNT == d2h_notification_message->header.parameter_count, + CHECK_COMMON_STATUS(D2H_EVENT_CONTEXT_SWITCH_BREAKPOINT_REACHED_EVENT_PARAMETER_COUNT == d2h_notification_message->header.parameter_count, HAILO_STATUS__D2H_EVENTS__INCORRECT_PARAMETER_COUNT, "d2h event invalid parameter count: {}", d2h_notification_message->header.parameter_count); - CHECK(d2h_notification_message->header.payload_length == + CHECK_COMMON_STATUS(d2h_notification_message->header.payload_length == sizeof(d2h_notification_message->message_parameters.context_switch_breakpoint_reached_event), HAILO_STATUS__D2H_EVENTS__INCORRECT_PARAMETER_LENGTH, "d2h event invalid payload_length: {}", d2h_notification_message->header.payload_length); @@ -400,11 +403,11 @@ static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_context_switch_run_time_error_not const char *run_time_error_status_text = NULL; uint32_t run_time_error_status = 0; - CHECK(D2H_EVENT_CONTEXT_SWITCH_RUN_TIME_ERROR_EVENT_PARAMETER_COUNT == d2h_notification_message->header.parameter_count, + CHECK_COMMON_STATUS(D2H_EVENT_CONTEXT_SWITCH_RUN_TIME_ERROR_EVENT_PARAMETER_COUNT == d2h_notification_message->header.parameter_count, HAILO_STATUS__D2H_EVENTS__INCORRECT_PARAMETER_COUNT, "d2h event invalid parameter count: {}", d2h_notification_message->header.parameter_count); - CHECK(d2h_notification_message->header.payload_length == + CHECK_COMMON_STATUS(d2h_notification_message->header.payload_length == sizeof(d2h_notification_message->message_parameters.context_switch_run_time_error_event), HAILO_STATUS__D2H_EVENTS__INCORRECT_PARAMETER_LENGTH, "d2h event invalid payload_length: {}", d2h_notification_message->header.payload_length); @@ -412,7 +415,7 @@ static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_context_switch_run_time_error_not run_time_error_status = d2h_notification_message->message_parameters.context_switch_run_time_error_event.exit_status; status = FIRMWARE_STATUS__get_textual((FIRMWARE_STATUS_t)run_time_error_status, &run_time_error_status_text); - CHECK((HAILO_COMMON_STATUS__SUCCESS == status), status, + CHECK_COMMON_STATUS((HAILO_COMMON_STATUS__SUCCESS == status), status, "Cannot find textual address for run time status {:#x}, status = {}", (FIRMWARE_STATUS_t)run_time_error_status, status); LOGGER__ERROR("Got Context switch run time error on net_group index {}, batch index {}, context index {}, action index {} with status {}", diff --git a/hailort/libhailort/src/device_common/device.cpp b/hailort/libhailort/src/device_common/device.cpp index 57f6fb39..71224599 100644 --- a/hailort/libhailort/src/device_common/device.cpp +++ b/hailort/libhailort/src/device_common/device.cpp @@ -387,7 +387,7 @@ hailo_status Device::set_sleep_state(hailo_sleep_state_t sleep_state) return Control::set_sleep_state(*this, sleep_state); } -hailo_status Device::dma_map(void *address, size_t size, hailo_stream_direction_t direction) +hailo_status Device::dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction) { (void) address; (void) size; @@ -395,21 +395,14 @@ hailo_status Device::dma_map(void *address, size_t size, hailo_stream_direction_ return HAILO_NOT_IMPLEMENTED; } -hailo_status Device::dma_unmap(void *address, hailo_stream_direction_t direction) +hailo_status Device::dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction) { (void) address; + (void) size; (void) direction; return HAILO_NOT_IMPLEMENTED; } -Expected> Device::try_dma_map(vdma::DmaAbleBufferPtr buffer, - hailo_stream_direction_t direction) -{ - (void) buffer; - (void) direction; - return make_unexpected(HAILO_NOT_IMPLEMENTED); -} - hailo_status Device::direct_write_memory(uint32_t address, const void *buffer, uint32_t size) { (void) address; @@ -538,7 +531,7 @@ Expected> Device::get_number_of_dynamic_contexts_per_networ } Expected Device::download_context_action_list(uint32_t network_group_id, uint8_t context_type, - uint8_t context_index, uint32_t *base_address, uint32_t *batch_counter, uint16_t max_size) + uint16_t context_index, uint32_t *base_address, uint32_t *batch_counter, uint16_t max_size) { CHECK_ARG_NOT_NULL_AS_EXPECTED(base_address); CHECK_ARG_NOT_NULL_AS_EXPECTED(batch_counter); @@ -575,7 +568,7 @@ hailo_status Device::set_context_action_list_timestamp_batch(uint16_t batch_inde hailo_status Device::set_context_switch_breakpoint(uint8_t breakpoint_id, bool break_at_any_network_group_index, uint8_t network_group_index, bool break_at_any_batch_index, uint16_t batch_index, bool break_at_any_context_index, - uint8_t context_index, bool break_at_any_action_index, uint16_t action_index) + uint16_t context_index, bool break_at_any_action_index, uint16_t action_index) { CONTROL_PROTOCOL__context_switch_breakpoint_data_t breakpoint_data = { break_at_any_network_group_index, diff --git a/hailort/libhailort/src/device_common/device_internal.cpp b/hailort/libhailort/src/device_common/device_internal.cpp index eca7a3a7..1fa6d72e 100644 --- a/hailort/libhailort/src/device_common/device_internal.cpp +++ b/hailort/libhailort/src/device_common/device_internal.cpp @@ -15,7 +15,7 @@ #include "device_common/device_internal.hpp" #include "network_group/network_group_internal.hpp" #include "utils/sensor_config_utils.hpp" - +#include "hef/hef_internal.hpp" namespace hailort { @@ -26,7 +26,8 @@ DeviceBase::DeviceBase(Type type) : m_d2h_notification_thread(), m_notif_fetch_thread_params(make_shared_nothrow()), m_d2h_callbacks{{0,0}}, - m_callbacks_lock() + m_callbacks_lock(), + m_is_shutdown_core_ops_called(false) // TODO: Handle m_notif_fetch_thread_params null pointer { #ifndef NDEBUG @@ -565,8 +566,17 @@ void DeviceBase::d2h_notification_thread_main(const std::string &device_id) continue; } - hailo_notification_t callback_notification; uint32_t notification_fw_id = notification.header.event_id; + + if (HEALTH_MONITOR_CLOSED_STREAMS_D2H_EVENT_ID == notification_fw_id) { + if (!m_is_shutdown_core_ops_called) { + LOGGER__WARNING("Aborting Infer, Device {} got closed streams notification from \'Health Monitor\'", device_id); + shutdown_core_ops(); + m_is_shutdown_core_ops_called = true; + } + } + + hailo_notification_t callback_notification; hailo_notification_id_t hailo_notification_id; hailo_status status = fw_notification_id_to_hailo((D2H_EVENT_ID_t)notification_fw_id, &hailo_notification_id); if (HAILO_SUCCESS != status) { @@ -600,9 +610,10 @@ hailo_status DeviceBase::check_hef_is_compatible(Hef &hef) const auto device_arch = get_architecture(); CHECK_EXPECTED_AS_STATUS(device_arch, "Can't get device architecture (is the FW loaded?)"); - if (!is_hef_compatible(device_arch.value(), hef.pimpl->get_device_arch())) { + if (!is_hef_compatible(device_arch.value(), static_cast(hef.pimpl->get_device_arch()))) { auto device_arch_str = HailoRTCommon::get_device_arch_str(device_arch.value()); - auto hef_arch_str = HailoRTCommon::get_device_arch_str(hef_arch_to_device_arch(hef.pimpl->get_device_arch())); + auto hef_arch_str = + HailoRTCommon::get_device_arch_str(hef_arch_to_device_arch(static_cast(hef.pimpl->get_device_arch()))); LOGGER__ERROR("HEF format is not compatible with device. Device arch: {}, HEF arch: {}", device_arch_str.c_str(), hef_arch_str.c_str()); @@ -615,16 +626,19 @@ hailo_status DeviceBase::check_hef_is_compatible(Hef &hef) CHECK_EXPECTED_AS_STATUS(extended_device_info_expected, "Can't get device extended info"); hailo_extended_device_information_t extended_device_information = extended_device_info_expected.release(); check_clock_rate_for_hailo8(extended_device_information.neural_network_core_clock_rate, - hef.pimpl->get_device_arch()); + static_cast(hef.pimpl->get_device_arch())); } - if ((ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8L == hef.pimpl->get_device_arch()) && (HAILO_ARCH_HAILO8 == device_arch.value())) { - LOGGER__WARNING( - "HEF was compiled for Hailo8L device, while the device itself is Hailo8. " \ - "This will result in lower performance."); + if ((static_cast(HEFHwArch::HW_ARCH__HAILO8L) == hef.pimpl->get_device_arch()) && + (HAILO_ARCH_HAILO8 == device_arch.value())) { + LOGGER__WARNING("HEF was compiled for Hailo8L device, while the device itself is Hailo8. " \ + "This will result in lower performance."); + } else if ((static_cast(HEFHwArch::HW_ARCH__HAILO15M) == hef.pimpl->get_device_arch()) && + (HAILO_ARCH_HAILO15H == device_arch.value())) { + LOGGER__WARNING("HEF was compiled for Hailo15M device, while the device itself is Hailo15H. " \ + "This will result in lower performance."); } - return HAILO_SUCCESS; } @@ -714,46 +728,46 @@ hailo_status DeviceBase::validate_fw_version_for_platform(const hailo_device_ide return validate_binary_version_for_platform(&fw_version, &min_supported_fw_version, fw_binary_type); } -bool DeviceBase::is_hef_compatible(hailo_device_architecture_t device_arch, ProtoHEFHwArch hef_arch) +bool DeviceBase::is_hef_compatible(hailo_device_architecture_t device_arch, HEFHwArch hef_arch) { switch (device_arch) { case HAILO_ARCH_HAILO8: - return (hef_arch == PROTO__HW_ARCH__HAILO8P) || (hef_arch == PROTO__HW_ARCH__HAILO8R) || (hef_arch == PROTO__HW_ARCH__HAILO8L); + return (hef_arch == HEFHwArch::HW_ARCH__HAILO8P) || (hef_arch == HEFHwArch::HW_ARCH__HAILO8R) || (hef_arch == HEFHwArch::HW_ARCH__HAILO8L); case HAILO_ARCH_HAILO8L: - return (hef_arch == PROTO__HW_ARCH__HAILO8L); + return (hef_arch == HEFHwArch::HW_ARCH__HAILO8L); case HAILO_ARCH_HAILO15H: // Compare with HW_ARCH__LAVENDER and HW_ARCH__GINGER to support hefs compiled for them - return (hef_arch == PROTO__HW_ARCH__GINGER) || (hef_arch == PROTO__HW_ARCH__LAVENDER) || - (hef_arch == PROTO__HW_ARCH__HAILO15H) || (hef_arch == PROTO__HW_ARCH__HAILO15M); + return (hef_arch == HEFHwArch::HW_ARCH__GINGER) || (hef_arch == HEFHwArch::HW_ARCH__LAVENDER) || + (hef_arch == HEFHwArch::HW_ARCH__HAILO15H) || (hef_arch == HEFHwArch::HW_ARCH__HAILO15M); case HAILO_ARCH_PLUTO: - return (hef_arch == PROTO__HW_ARCH__PLUTO); + return (hef_arch == HEFHwArch::HW_ARCH__PLUTO); case HAILO_ARCH_HAILO15M: - return (hef_arch == PROTO__HW_ARCH__HAILO15M); + return (hef_arch == HEFHwArch::HW_ARCH__HAILO15M); default: return false; } } -hailo_device_architecture_t DeviceBase::hef_arch_to_device_arch(ProtoHEFHwArch hef_arch) +hailo_device_architecture_t DeviceBase::hef_arch_to_device_arch(HEFHwArch hef_arch) { switch (hef_arch) { - case PROTO__HW_ARCH__SAGE_A0: + case HEFHwArch::HW_ARCH__SAGE_A0: return HAILO_ARCH_HAILO8_A0; - case PROTO__HW_ARCH__HAILO8: - case PROTO__HW_ARCH__HAILO8P: - case PROTO__HW_ARCH__HAILO8R: - case PROTO__HW_ARCH__SAGE_B0: - case PROTO__HW_ARCH__PAPRIKA_B0: + case HEFHwArch::HW_ARCH__HAILO8: + case HEFHwArch::HW_ARCH__HAILO8P: + case HEFHwArch::HW_ARCH__HAILO8R: + case HEFHwArch::HW_ARCH__SAGE_B0: + case HEFHwArch::HW_ARCH__PAPRIKA_B0: return HAILO_ARCH_HAILO8; - case PROTO__HW_ARCH__HAILO8L: + case HEFHwArch::HW_ARCH__HAILO8L: return HAILO_ARCH_HAILO8L; - case PROTO__HW_ARCH__HAILO15H: - case PROTO__HW_ARCH__GINGER: - case PROTO__HW_ARCH__LAVENDER: + case HEFHwArch::HW_ARCH__HAILO15H: + case HEFHwArch::HW_ARCH__GINGER: + case HEFHwArch::HW_ARCH__LAVENDER: return HAILO_ARCH_HAILO15H; - case PROTO__HW_ARCH__PLUTO: + case HEFHwArch::HW_ARCH__PLUTO: return HAILO_ARCH_PLUTO; - case PROTO__HW_ARCH__HAILO15M: + case HEFHwArch::HW_ARCH__HAILO15M: return HAILO_ARCH_HAILO15M; default: @@ -761,9 +775,9 @@ hailo_device_architecture_t DeviceBase::hef_arch_to_device_arch(ProtoHEFHwArch h } } -void DeviceBase::check_clock_rate_for_hailo8(uint32_t clock_rate, ProtoHEFHwArch hef_hw_arch) +void DeviceBase::check_clock_rate_for_hailo8(uint32_t clock_rate, HEFHwArch hef_hw_arch) { - uint32_t expected_clock_rate = (hef_hw_arch == ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8R) ? HAILO8R_CLOCK_RATE : HAILO8_CLOCK_RATE; + uint32_t expected_clock_rate = (hef_hw_arch == HEFHwArch::HW_ARCH__HAILO8R) ? HAILO8R_CLOCK_RATE : HAILO8_CLOCK_RATE; if (expected_clock_rate != clock_rate) { LOGGER__WARNING( "HEF was compiled assuming clock rate of {} MHz, while the device clock rate is {} MHz. " \ diff --git a/hailort/libhailort/src/device_common/device_internal.hpp b/hailort/libhailort/src/device_common/device_internal.hpp index 8ffe7671..29ac623e 100644 --- a/hailort/libhailort/src/device_common/device_internal.hpp +++ b/hailort/libhailort/src/device_common/device_internal.hpp @@ -23,7 +23,6 @@ #include "hailo/hailort.h" #include "d2h_event_queue.hpp" -#include "hef/hef_internal.hpp" #include "firmware_header.h" #include "firmware_header_utils.h" @@ -43,6 +42,23 @@ namespace hailort #define CLOCKS_IN_MHZ (1000 * 1000) +enum class HEFHwArch // Must be aligned to ProtoHEFHwArch +{ + HW_ARCH__HAILO8 = 0, + HW_ARCH__HAILO8P = 1, + HW_ARCH__HAILO8R = 2, + HW_ARCH__HAILO8L = 3, + HW_ARCH__HAILO15H = 103, + HW_ARCH__HAILO15M = 4, + + HW_ARCH__SAGE_A0 = 100, + HW_ARCH__SAGE_B0 = 101, + HW_ARCH__PAPRIKA_B0 = 102, + HW_ARCH__GINGER = 104, + HW_ARCH__LAVENDER = 105, + HW_ARCH__PLUTO = 106, +}; + class DeviceBase : public Device { public: @@ -81,7 +97,7 @@ class DeviceBase : public Device virtual Expected read_user_config() override; virtual hailo_status write_user_config(const MemoryView &buffer) override; virtual hailo_status erase_user_config() override; - static hailo_device_architecture_t hef_arch_to_device_arch(ProtoHEFHwArch hef_arch); + static hailo_device_architecture_t hef_arch_to_device_arch(HEFHwArch hef_arch); virtual Expected get_architecture() const override { @@ -101,6 +117,7 @@ class DeviceBase : public Device // Special value to signal the d2h notification thread to terminate static const uint32_t TERMINATE_EVENT_ID = std::numeric_limits::max(); + virtual void shutdown_core_ops() = 0; virtual hailo_reset_device_mode_t get_default_reset_mode() = 0; virtual hailo_status reset_impl(CONTROL_PROTOCOL__reset_type_t reset_type) = 0; virtual Expected read_notification() = 0; @@ -126,8 +143,8 @@ class DeviceBase : public Device firmware_version_t *min_supported_binary_version, FW_BINARY_TYPE_t fw_binary_type); static hailo_status validate_fw_version_for_platform(const hailo_device_identity_t &board_info, firmware_version_t fw_version, FW_BINARY_TYPE_t fw_binary_type); - static bool is_hef_compatible(hailo_device_architecture_t device_arch, ProtoHEFHwArch hw_arch); - static void check_clock_rate_for_hailo8(uint32_t clock_rate, ProtoHEFHwArch hef_hw_arch); + static bool is_hef_compatible(hailo_device_architecture_t device_arch, HEFHwArch hw_arch); + static void check_clock_rate_for_hailo8(uint32_t clock_rate, HEFHwArch hef_hw_arch); hailo_status store_sensor_control_buffers(const std::vector &control_buffers, uint32_t section_index, hailo_sensor_types_t sensor_type, uint32_t reset_config_size, uint16_t config_height, uint16_t config_width, uint16_t config_fps, const std::string &config_name); virtual void notification_fetch_thread(std::shared_ptr params); @@ -140,6 +157,7 @@ class DeviceBase : public Device d2h_notification_callback_t m_d2h_callbacks[HAILO_NOTIFICATION_ID_COUNT]; std::mutex m_callbacks_lock; + bool m_is_shutdown_core_ops_called; }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/eth/eth_device.cpp b/hailort/libhailort/src/eth/eth_device.cpp index 764c87b8..c7685a50 100644 --- a/hailort/libhailort/src/eth/eth_device.cpp +++ b/hailort/libhailort/src/eth/eth_device.cpp @@ -20,6 +20,7 @@ #include "eth/udp.hpp" #include "device_common/control.hpp" #include "network_group/network_group_internal.hpp" +#include "hef/hef_internal.hpp" #include #include @@ -304,6 +305,17 @@ hailo_reset_device_mode_t EthernetDevice::get_default_reset_mode() return HAILO_RESET_DEVICE_MODE_CHIP; } +// TODO - HRT-13234, move to DeviceBase +void EthernetDevice::shutdown_core_ops() +{ + for (auto core_op : m_core_ops) { + auto status = core_op->shutdown(); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to shutdown core op with status {}", status); + } + } +} + hailo_status EthernetDevice::reset_impl(CONTROL_PROTOCOL__reset_type_t reset_type) { hailo_status status = HAILO_UNINITIALIZED; diff --git a/hailort/libhailort/src/eth/eth_device.hpp b/hailort/libhailort/src/eth/eth_device.hpp index fca41f1b..880be87b 100644 --- a/hailort/libhailort/src/eth/eth_device.hpp +++ b/hailort/libhailort/src/eth/eth_device.hpp @@ -30,6 +30,7 @@ class EthernetDevice : public DeviceBase { virtual Expected read_log(MemoryView &buffer, hailo_cpu_id_t cpu_id) override; virtual hailo_status wait_for_wakeup() override; virtual void increment_control_sequence() override; + virtual void shutdown_core_ops() override; virtual hailo_reset_device_mode_t get_default_reset_mode() override; virtual hailo_status reset_impl(CONTROL_PROTOCOL__reset_type_t reset_type) override; @@ -72,6 +73,7 @@ class EthernetDevice : public DeviceBase { const hailo_eth_device_info_t m_device_info; std::string m_device_id; Udp m_control_udp; + // TODO - HRT-13234, move to DeviceBase std::vector> m_core_ops; std::vector> m_network_groups; // TODO: HRT-9547 - Remove when ConfiguredNetworkGroup will be kept in global context ActiveCoreOpHolder m_active_core_op_holder; diff --git a/hailort/libhailort/src/eth/eth_stream.cpp b/hailort/libhailort/src/eth/eth_stream.cpp index 61164ebd..b84db336 100644 --- a/hailort/libhailort/src/eth/eth_stream.cpp +++ b/hailort/libhailort/src/eth/eth_stream.cpp @@ -134,7 +134,7 @@ Expected EthernetInputStream::sync_write_raw_buffer(const MemoryView &bu size_t size = buffer.size(); status = m_udp.send((uint8_t*)buffer.data(), &size, this->configuration.use_dataflow_padding, this->configuration.max_payload_size); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { LOGGER__INFO("Udp send was aborted!"); return make_unexpected(status); } @@ -157,7 +157,7 @@ hailo_status EthernetInputStream::write_impl(const MemoryView &buffer) } else { status = eth_stream__write_all_no_sync(buffer.data(), offset, buffer.size()); } - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { LOGGER__INFO("eth_stream__write_all was aborted!"); return status; } @@ -191,7 +191,7 @@ hailo_status EthernetInputStream::eth_stream__write_with_remainder(const void *b while (offset < offset_end_without_remainder) { transfer_size = offset_end_without_remainder - offset; auto expected_bytes_written = sync_write_raw_buffer(MemoryView::create_const(static_cast(buffer) + offset, transfer_size)); - if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_written.status()) { + if (HAILO_STREAM_ABORT == expected_bytes_written.status()) { LOGGER__INFO("sync_write_raw_buffer was aborted!"); return expected_bytes_written.status(); } @@ -200,7 +200,7 @@ hailo_status EthernetInputStream::eth_stream__write_with_remainder(const void *b } if (0 < remainder_size) { auto expected_bytes_written = sync_write_raw_buffer(MemoryView::create_const(static_cast(buffer) + offset, remainder_size)); - if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_written.status()) { + if (HAILO_STREAM_ABORT == expected_bytes_written.status()) { LOGGER__INFO("sync_write_raw_buffer was aborted!"); return expected_bytes_written.status(); } @@ -236,7 +236,7 @@ hailo_status TokenBucketEthernetInputStream::eth_stream__write_with_remainder(co transfer_size = offset_end_without_remainder - offset; auto expected_bytes_written = sync_write_raw_buffer(MemoryView::create_const(static_cast(buffer) + offset, transfer_size)); - if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_written.status()) { + if (HAILO_STREAM_ABORT == expected_bytes_written.status()) { LOGGER__INFO("sync_write_raw_buffer was aborted!"); return expected_bytes_written.status(); } @@ -249,7 +249,7 @@ hailo_status TokenBucketEthernetInputStream::eth_stream__write_with_remainder(co (void)token_bucket.consumeWithBorrowAndWait(static_cast(remainder_size), rate_bytes_per_sec, BURST_SIZE); auto expected_bytes_written = sync_write_raw_buffer(MemoryView::create_const(static_cast(buffer) + offset, remainder_size)); - if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_written.status()) { + if (HAILO_STREAM_ABORT == expected_bytes_written.status()) { LOGGER__INFO("sync_write_raw_buffer was aborted!"); return expected_bytes_written.status(); } @@ -316,7 +316,7 @@ hailo_status EthernetInputStream::eth_stream__write_all_with_sync(const void *bu for (size_t i = 0; i < number_of_frames; i++) { // Write frame by frame, whereas the remainder packet is the sync packet status = eth_stream__write_with_remainder(buffer, offset, frame_size, this->configuration.sync_size); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { LOGGER__INFO("eth_stream__write_with_remainder was aborted!"); return status; } @@ -515,7 +515,7 @@ hailo_status EthernetOutputStream::read_all_no_sync(void *buffer, size_t offset, transfer_size = offset_end - offset; MemoryView buffer_view(static_cast(buffer) + offset, transfer_size); auto expected_bytes_read = this->sync_read_raw_buffer(buffer_view); - if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_read.status()) { + if (HAILO_STREAM_ABORT == expected_bytes_read.status()) { LOGGER__INFO("sync_read_raw_buffer was aborted!"); return expected_bytes_read.status(); } @@ -555,7 +555,7 @@ hailo_status EthernetOutputStream::read_all_with_sync(void *buffer, size_t offse status = expected_bytes_read.status(); if (HAILO_TIMEOUT == status) { return handle_timeout(buffer, offset, initial_offset, frame_size); - } else if (HAILO_STREAM_ABORTED_BY_USER == status) { + } else if (HAILO_STREAM_ABORT == status) { LOGGER__INFO("sync_read_raw_buffer was aborted"); return status; } else if (HAILO_SUCCESS != status) { @@ -658,7 +658,7 @@ hailo_status EthernetOutputStream::read_impl(MemoryView buffer) } else { status = this->read_all_no_sync(buffer.data(), 0, buffer.size()); } - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { LOGGER__INFO("read was aborted!"); return status; } @@ -676,7 +676,7 @@ Expected EthernetOutputStream::sync_read_raw_buffer(MemoryView &buffer) auto buffer_size = buffer.size(); status = m_udp.recv((uint8_t*)buffer.data(),&buffer_size); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { LOGGER__INFO("Udp recv was aborted!"); return make_unexpected(status); } diff --git a/hailort/libhailort/src/eth/network_rate_calculator.cpp b/hailort/libhailort/src/eth/network_rate_calculator.cpp index 2d7def5b..76937d8f 100644 --- a/hailort/libhailort/src/eth/network_rate_calculator.cpp +++ b/hailort/libhailort/src/eth/network_rate_calculator.cpp @@ -117,10 +117,10 @@ Expected> NetworkUdpRateCalculator::calculate_in total_input_rate, total_output_rate, max_supported_bandwidth); if (total_output_rate > total_input_rate) { // Output is bigger than max rate. Adjusting input rate accordingly - auto input_output_ratio = (total_input_rate / total_output_rate); + double input_output_ratio = ((double)total_input_rate / total_output_rate); LOGGER__WARNING("Output Bps ({}) is bigger than input Bps ({}) output (ratio is: {})", total_output_rate, total_input_rate, input_output_ratio); - max_supported_bandwidth *= input_output_ratio; + max_supported_bandwidth = static_cast(input_output_ratio * max_supported_bandwidth); } auto total_inputs_rate_to_max_supported_ratio = (static_cast(max_supported_bandwidth) / total_input_rate); for (auto &rate_pair : input_rates) { diff --git a/hailort/libhailort/src/eth/udp.cpp b/hailort/libhailort/src/eth/udp.cpp index af97e7b9..1d820e48 100644 --- a/hailort/libhailort/src/eth/udp.cpp +++ b/hailort/libhailort/src/eth/udp.cpp @@ -129,7 +129,7 @@ hailo_status Udp::send(uint8_t *buffer, size_t *size, bool use_padding, size_t m status = m_socket.send_to((const uint8_t*)send_ptr, *size, MSG_CONFIRM, (const struct sockaddr *) &m_device_address, m_device_address_length, &number_of_sent_bytes); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { LOGGER__INFO("Socket send_to was aborted!"); return status; } @@ -161,7 +161,7 @@ hailo_status Udp::recv(uint8_t *buffer, size_t *size) status = m_socket.recv_from(buffer, *size, 0, (struct sockaddr *) &m_device_address, m_device_address_length, &number_of_received_bytes); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { LOGGER__INFO("Socket recv_from was aborted!"); return status; } diff --git a/hailort/libhailort/src/hailort.cpp b/hailort/libhailort/src/hailort.cpp index 6f1faa88..96b1ce20 100644 --- a/hailort/libhailort/src/hailort.cpp +++ b/hailort/libhailort/src/hailort.cpp @@ -36,6 +36,7 @@ #include "vdevice/vdevice_internal.hpp" #include "utils/profiler/tracer_macros.hpp" #include "utils/exported_resource_manager.hpp" +#include "utils/buffer_storage.hpp" #include #include @@ -46,9 +47,10 @@ using namespace hailort; // Note: Async stream API uses BufferPtr as a param. When exporting BufferPtrs to the user via c-api, they must be // stored in some container, otherwise their ref count may reach zero and they will be freed, despite the // c-api user still using them. (shared_ptr doesn't have a release method like unique_ptr) -// Singleton holding a mapping between the address of a buffer allocated/mapped via hailo_allocate_buffer/hailo_dma_map_buffer_to_device +// Singleton holding a mapping between the address of a buffer allocated/mapped via hailo_allocate_buffer // to the underlying BufferPtr. When a buffer is freed via hailo_free_buffer, the BufferPtr object will be removed from // the storage. +// TODO HRT-12726: remove the export manager using ExportedBufferManager = ExportedResourceManager; COMPAT__INITIALIZER(hailort__initialize_logger) @@ -1090,11 +1092,11 @@ hailo_status hailo_allocate_buffer(size_t size, const hailo_buffer_parameters_t CHECK_ARG_NOT_NULL(buffer_out); CHECK(0 != size, HAILO_INVALID_ARGUMENT, "Buffer size must be greater than zero"); - auto buffer_storage_params = BufferStorageParams::create(*allocation_params); - CHECK_EXPECTED_AS_STATUS(buffer_storage_params); + BufferStorageParams buffer_storage_params{}; + buffer_storage_params.flags = allocation_params->flags; // Create buffer - auto buffer = Buffer::create_shared(size, *buffer_storage_params); + auto buffer = Buffer::create_shared(size, buffer_storage_params); CHECK_EXPECTED_AS_STATUS(buffer); // Store the buffer in manager (otherwise it'll be freed at the end of this func) @@ -1112,45 +1114,34 @@ hailo_status hailo_free_buffer(void *buffer) return ExportedBufferManager::unregister_resource(buffer); } -// TODO: hailo_dma_map_buffer_to_device/hailo_dma_unmap_buffer_from_device aren't thread safe when crossed with +// TODO: hailo_device_dma_map_buffer/hailo_device_dma_unmap_buffer aren't thread safe when crossed with // hailo_allocate_buffer/hailo_free_buffer (HRT-10669) -hailo_status hailo_dma_map_buffer_to_device(void *buffer, size_t size, hailo_device device, hailo_dma_buffer_direction_t direction) +hailo_status hailo_device_dma_map_buffer(hailo_device device,void *address, size_t size, hailo_dma_buffer_direction_t direction) { - CHECK_ARG_NOT_NULL(buffer); CHECK_ARG_NOT_NULL(device); - - auto hailort_allocated_buffer = ExportedBufferManager::get_resource(buffer); - if (hailort_allocated_buffer) { - // TODO: this will change here HRT-10983 - // The buffer has been allocated by hailort - // The mapping is held by the Buffer object - auto mapping_result = hailort_allocated_buffer->get()->storage().dma_map(*reinterpret_cast(device), direction); - CHECK_EXPECTED_AS_STATUS(mapping_result); - const auto is_new_mapping = mapping_result.value(); - return is_new_mapping ? HAILO_SUCCESS : HAILO_DMA_MAPPING_ALREADY_EXISTS; - } - - // The buffer has been allocated by the user - return reinterpret_cast(device)->dma_map(buffer, size, - (HAILO_DMA_BUFFER_DIRECTION_H2D == direction) ? HAILO_H2D_STREAM : HAILO_D2H_STREAM); + CHECK_ARG_NOT_NULL(address); + return reinterpret_cast(device)->dma_map(address, size, direction); } -hailo_status hailo_dma_unmap_buffer_from_device(void *buffer, hailo_device device, hailo_dma_buffer_direction_t direction) +hailo_status hailo_device_dma_unmap_buffer(hailo_device device, void *address, size_t size, hailo_dma_buffer_direction_t direction) { - CHECK_ARG_NOT_NULL(buffer); CHECK_ARG_NOT_NULL(device); + CHECK_ARG_NOT_NULL(address); + return reinterpret_cast(device)->dma_unmap(address, size, direction); +} - auto hailort_allocated_buffer = ExportedBufferManager::get_resource(buffer); - if (hailort_allocated_buffer) { - // TODO: mappings get dtor'd when the Buffer object is dtor'd. - // We want all the mapping to be held in one place for hailort::Buffers and for user alloacted buffers - // so this will change (HRT-10983) - return HAILO_SUCCESS; - } +hailo_status hailo_vdevice_dma_map_buffer(hailo_vdevice vdevice,void *address, size_t size, hailo_dma_buffer_direction_t direction) +{ + CHECK_ARG_NOT_NULL(vdevice); + CHECK_ARG_NOT_NULL(address); + return reinterpret_cast(vdevice)->dma_map(address, size, direction); +} - // The buffer has been allocated by the user - return reinterpret_cast(device)->dma_unmap(buffer, - (HAILO_DMA_BUFFER_DIRECTION_H2D == direction) ? HAILO_H2D_STREAM : HAILO_D2H_STREAM); +hailo_status hailo_vdevice_dma_unmap_buffer(hailo_vdevice vdevice, void *address, size_t size, hailo_dma_buffer_direction_t direction) +{ + CHECK_ARG_NOT_NULL(vdevice); + CHECK_ARG_NOT_NULL(address); + return reinterpret_cast(vdevice)->dma_unmap(address, size, direction); } hailo_status hailo_calculate_eth_input_rate_limits(hailo_hef hef, const char *network_group_name, uint32_t fps, @@ -1318,20 +1309,7 @@ hailo_status hailo_stream_read_raw_buffer_async(hailo_output_stream stream, void CHECK_ARG_NOT_NULL(buffer); CHECK_ARG_NOT_NULL(callback); - auto buffer_ref = ExportedBufferManager::get_resource(buffer); - if (HAILO_NOT_FOUND == buffer_ref.status()) { - // User addr (buffer hasn't been allocated by hailo_allocate_buffer) - return (reinterpret_cast(stream))->read_async(buffer, size, - wrap_c_user_callback(callback, opaque)); - } - - // buffer has been allocated by hailo_allocate_buffer - CHECK_EXPECTED_AS_STATUS(buffer_ref); - auto buffer_ptr = buffer_ref->get(); - assert(buffer_ptr != nullptr); - CHECK(size == buffer_ptr->size(), HAILO_INVALID_ARGUMENT); - - return (reinterpret_cast(stream))->read_async(buffer_ptr, + return (reinterpret_cast(stream))->read_async(buffer, size, wrap_c_user_callback(callback, opaque)); } @@ -1342,20 +1320,7 @@ hailo_status hailo_stream_write_raw_buffer_async(hailo_input_stream stream, cons CHECK_ARG_NOT_NULL(buffer); CHECK_ARG_NOT_NULL(callback); - auto buffer_ref = ExportedBufferManager::get_resource(const_cast(buffer)); - if (HAILO_NOT_FOUND == buffer_ref.status()) { - // User addr (buffer hasn't been allocated by hailo_allocate_buffer) - return (reinterpret_cast(stream))->write_async(buffer, size, - wrap_c_user_callback(callback, opaque)); - } - - // buffer has been allocated by hailo_allocate_buffer - CHECK_EXPECTED_AS_STATUS(buffer_ref); - auto buffer_ptr = buffer_ref->get(); - assert(buffer_ptr != nullptr); - CHECK(size == buffer_ptr->size(), HAILO_INVALID_ARGUMENT); - - return (reinterpret_cast(stream))->write_async(buffer_ptr, + return (reinterpret_cast(stream))->write_async(buffer, size, wrap_c_user_callback(callback, opaque)); } @@ -2222,16 +2187,24 @@ hailo_status hailo_vstream_write_raw_buffer(hailo_input_vstream input_vstream, c CHECK_ARG_NOT_NULL(buffer); auto status = reinterpret_cast(input_vstream)->write(MemoryView::create_const(buffer, buffer_size)); + if (HAILO_STREAM_ABORT == status) { + return status; + } CHECK_SUCCESS(status); return HAILO_SUCCESS; } hailo_status hailo_vstream_write_pix_buffer(hailo_input_vstream input_vstream, const hailo_pix_buffer_t *buffer) { + CHECK(HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR == buffer->memory_type, HAILO_NOT_SUPPORTED, "Memory type of pix buffer must be of type USERPTR!"); + CHECK_ARG_NOT_NULL(input_vstream); CHECK_ARG_NOT_NULL(buffer); auto status = reinterpret_cast(input_vstream)->write(*buffer); + if (HAILO_STREAM_ABORT == status) { + return status; + } CHECK_SUCCESS(status); return HAILO_SUCCESS; } @@ -2240,8 +2213,11 @@ hailo_status hailo_vstream_read_raw_buffer(hailo_output_vstream output_vstream, { CHECK_ARG_NOT_NULL(output_vstream); CHECK_ARG_NOT_NULL(dst); - + auto status = reinterpret_cast(output_vstream)->read(MemoryView(dst, dst_size)); + if (HAILO_STREAM_ABORT == status) { + return status; + } CHECK_SUCCESS(status); return HAILO_SUCCESS; } diff --git a/hailort/libhailort/src/hef/context_switch_actions.cpp b/hailort/libhailort/src/hef/context_switch_actions.cpp index b060466f..db6e9c2a 100644 --- a/hailort/libhailort/src/hef/context_switch_actions.cpp +++ b/hailort/libhailort/src/hef/context_switch_actions.cpp @@ -10,6 +10,7 @@ #include "context_switch_actions.hpp" #include "core_op/resource_manager/resource_manager.hpp" +#include "hef/hef_internal.hpp" #include "context_switch_defs.h" @@ -171,22 +172,60 @@ Expected DeactivateConfigChannelAction::serialize_params(const ContextRe return Buffer::create(reinterpret_cast(¶ms), sizeof(params)); } -Expected WriteDataCcwAction::create( +Expected WriteDataCcwActionByBuffer::create( Buffer &&data, uint8_t config_stream_index, size_t total_ccw_burst) { CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(total_ccw_burst), HAILO_INVALID_HEF, "Too many ccw burst {} (must fit in uint16)", total_ccw_burst); - auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WriteDataCcwAction( + auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WriteDataCcwActionByBuffer( std::move(data), config_stream_index, static_cast(total_ccw_burst))); CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } -WriteDataCcwAction::WriteDataCcwAction(Buffer &&data, uint8_t config_stream_index, uint16_t total_ccw_burst) : +hailo_status WriteDataCcwActionByBuffer::write_to_config_buffer(ConfigBuffer& config_buffer, bool should_support_pre_fetch) +{ + bool is_last_write = config_buffer.size_left() == size(); + if (should_support_pre_fetch && is_last_write) { + auto status = config_buffer.pad_with_nops(); + CHECK_SUCCESS(status); + } + + auto status = config_buffer.write(MemoryView(m_data)); + CHECK_SUCCESS(status); + + if (should_support_pre_fetch && is_last_write) { + auto desc_count = config_buffer.program_descriptors(); + CHECK_EXPECTED_AS_STATUS(desc_count); + } + + return HAILO_SUCCESS; +} + +Expected WriteDataCcwAction::create(uint32_t offset, size_t size, uint8_t config_stream_index, + size_t total_ccw_burst, std::shared_ptr shef_file_handle) +{ + CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(total_ccw_burst), HAILO_INVALID_HEF, + "Too many ccw burst {} (must fit in uint16)", total_ccw_burst); + auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WriteDataCcwAction( + offset, size, config_stream_index, static_cast(total_ccw_burst), shef_file_handle)); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); + return result; +} + +WriteDataCcwActionByBuffer::WriteDataCcwActionByBuffer(Buffer &&data, uint8_t config_stream_index, uint16_t total_ccw_burst) : + WriteDataCcwAction(0, 0, config_stream_index, total_ccw_burst, nullptr), + m_data(std::move(data)) +{} + +WriteDataCcwAction::WriteDataCcwAction(uint32_t offset, size_t size, uint8_t config_stream_index, uint16_t total_ccw_burst, + std::shared_ptr shef_file_handle) : ContextSwitchConfigAction(Type::WriteDataCcw), - m_data(std::move(data)), + m_offset(offset), + m_size(size), m_config_stream_index(config_stream_index), - m_total_ccw_burst(total_ccw_burst) + m_total_ccw_burst(total_ccw_burst), + m_shef_file_handle(shef_file_handle) {} Expected> WriteDataCcwAction::serialize(const ContextResources &) const @@ -207,6 +246,24 @@ Expected WriteDataCcwAction::serialize_params(const ContextResources &) return make_unexpected(HAILO_NOT_IMPLEMENTED); } +hailo_status WriteDataCcwAction::write_to_config_buffer(ConfigBuffer& config_buffer, bool should_support_pre_fetch) +{ + bool is_last_write = config_buffer.size_left() == size(); + + auto buffer = m_shef_file_handle->read(m_offset, m_size); + CHECK_EXPECTED_AS_STATUS(buffer); + + auto status = config_buffer.write(MemoryView(buffer.value())); + CHECK_SUCCESS(status); + + if (should_support_pre_fetch && is_last_write) { + auto desc_count = config_buffer.program_descriptors(); + CHECK_EXPECTED_AS_STATUS(desc_count); + } + + return HAILO_SUCCESS; +} + Expected AddCcwBurstAction::create(uint8_t config_stream_index, uint16_t ccw_bursts) { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) AddCcwBurstAction(config_stream_index, ccw_bursts)); diff --git a/hailort/libhailort/src/hef/context_switch_actions.hpp b/hailort/libhailort/src/hef/context_switch_actions.hpp index 63170949..31ab7d4b 100644 --- a/hailort/libhailort/src/hef/context_switch_actions.hpp +++ b/hailort/libhailort/src/hef/context_switch_actions.hpp @@ -19,6 +19,7 @@ #include "device_common/control_protocol.hpp" #include "context_switch_defs.h" +#include "core_op/resource_manager/config_buffer.hpp" namespace hailort @@ -155,11 +156,12 @@ class DeactivateConfigChannelAction : public ContextSwitchConfigAction const vdma::ChannelId m_channel_id; }; +class ShefFileHandle; class WriteDataCcwAction : public ContextSwitchConfigAction { public: - static Expected create(Buffer &&data, uint8_t config_stream_index, - size_t total_ccw_burst); + static Expected create(uint32_t offset, size_t size, uint8_t config_stream_index, + size_t total_ccw_burst, std::shared_ptr shef_file_handle); WriteDataCcwAction(WriteDataCcwAction &&) = default; WriteDataCcwAction(const WriteDataCcwAction &) = delete; WriteDataCcwAction &operator=(WriteDataCcwAction &&) = delete; @@ -170,17 +172,41 @@ class WriteDataCcwAction : public ContextSwitchConfigAction virtual bool supports_repeated_block() const override; virtual Expected serialize_params(const ContextResources &context_resources) const override; - const MemoryView data() const { return MemoryView::create_const(m_data.data(), m_data.size()); } uint8_t config_stream_index() const { return m_config_stream_index; } uint16_t total_ccw_burst() const { return m_total_ccw_burst; } + virtual size_t size() const { return m_size; } + virtual hailo_status write_to_config_buffer(ConfigBuffer& config_buffer, bool should_support_pre_fetch); + +protected: + WriteDataCcwAction(uint32_t offset, size_t size, uint8_t config_stream_index, + uint16_t total_ccw_burst, std::shared_ptr shef_file_handle); + + uint32_t m_offset; + size_t m_size; + const uint8_t m_config_stream_index; + const uint16_t m_total_ccw_burst; + std::shared_ptr m_shef_file_handle; +}; + +class WriteDataCcwActionByBuffer : public WriteDataCcwAction +{ +public: + static Expected create(Buffer &&data, uint8_t config_stream_index, + size_t total_ccw_burst); + WriteDataCcwActionByBuffer(WriteDataCcwActionByBuffer &&) = default; + WriteDataCcwActionByBuffer(const WriteDataCcwActionByBuffer &) = delete; + WriteDataCcwActionByBuffer &operator=(WriteDataCcwActionByBuffer &&) = delete; + WriteDataCcwActionByBuffer &operator=(const WriteDataCcwActionByBuffer &) = delete; + virtual ~WriteDataCcwActionByBuffer() = default; + + virtual size_t size() const override { return m_data.size(); } + virtual hailo_status write_to_config_buffer(ConfigBuffer& config_buffer, bool should_support_pre_fetch) override; private: - WriteDataCcwAction(Buffer &&data, uint8_t config_stream_index, + WriteDataCcwActionByBuffer(Buffer &&data, uint8_t config_stream_index, uint16_t total_ccw_burst); Buffer m_data; - const uint8_t m_config_stream_index; - const uint16_t m_total_ccw_burst; }; class AddCcwBurstAction : public ContextSwitchConfigAction diff --git a/hailort/libhailort/src/hef/core_op_metadata.cpp b/hailort/libhailort/src/hef/core_op_metadata.cpp index c600123d..b700dd8a 100644 --- a/hailort/libhailort/src/hef/core_op_metadata.cpp +++ b/hailort/libhailort/src/hef/core_op_metadata.cpp @@ -8,7 +8,6 @@ **/ #include "core_op_metadata.hpp" -#include "hef_internal.hpp" #include namespace hailort @@ -52,9 +51,10 @@ static bool is_edge_under_mux(const LayerInfo &info, const std::string &edge_nam } ContextMetadata::ContextMetadata(std::vector &&actions, - ConfigBufferInfoMap&& config_buffers_info) : + ConfigBufferInfoMap&& config_buffers_info, bool const_input_layer_found) : m_actions(std::move(actions)), - m_config_buffers_info(std::move(config_buffers_info)) + m_config_buffers_info(std::move(config_buffers_info)), + m_const_input_layer_found(const_input_layer_found) {} const ConfigBufferInfoMap &ContextMetadata::config_buffers_info() const @@ -67,6 +67,11 @@ const std::vector &ContextMetadata::get_actions() return m_actions; } +bool ContextMetadata::const_input_layer_found() const +{ + return m_const_input_layer_found; +} + std::vector ContextMetadata::get_actions_of_type( const std::set &action_types) const { @@ -183,12 +188,15 @@ CoreOpMetadata::CoreOpMetadata(const std::string &core_op_name, std::vector &&dynamic_contexts, std::vector &&config_channels_info, SupportedFeatures &supported_features, - std::vector sorted_network_names) + std::vector sorted_network_names, + bool can_fast_batch_switch) : m_preliminary_context(std::move(preliminary_context)), m_dynamic_contexts(std::move(dynamic_contexts)), m_config_channels_info(std::move(config_channels_info)), m_core_op_name(core_op_name), m_supported_features(supported_features), - m_sorted_network_names(sorted_network_names) {} + m_sorted_network_names(sorted_network_names), + m_can_fast_batch_switch(can_fast_batch_switch) + {} std::vector CoreOpMetadata::get_input_layer_infos() const { @@ -375,30 +383,83 @@ Expected NetworkGroupMetadata::create(const std::string &n SupportedFeatures &supported_features, const std::vector &sorted_network_names, std::vector &ops_metadata) { - auto all_layers_infos = get_all_layer_infos(core_ops_metadata_per_arch); - CHECK_EXPECTED(all_layers_infos); + return NetworkGroupMetadata(network_group_name, std::move(core_ops_metadata_per_arch), sorted_output_names, + supported_features, sorted_network_names, ops_metadata); +} + +Expected NetworkGroupMetadata::get_core_op_metadata() const +/* This function is used for names getters (such as get_vstream_names_from_stream_name), + so should be same across all clusters layouts */ +{ + CHECK_AS_EXPECTED(1 == m_core_ops_metadata_per_arch.size(), HAILO_INTERNAL_FAILURE); + auto core_op_metadata_exp = m_core_ops_metadata_per_arch.begin()->second.get_metadata(PARTIAL_CLUSTERS_LAYOUT_IGNORE); + CHECK_EXPECTED(core_op_metadata_exp); + + auto core_op_metadata = core_op_metadata_exp.release(); + return core_op_metadata; +} + +Expected> NetworkGroupMetadata::get_all_layer_infos() const +{ + auto core_op_metadata = get_core_op_metadata(); + CHECK_EXPECTED(core_op_metadata); + + return core_op_metadata.value()->get_all_layer_infos(); +} + +Expected> NetworkGroupMetadata::get_input_layer_infos(const std::string &network_name) const +{ + auto core_op_metadata = get_core_op_metadata(); + CHECK_EXPECTED(core_op_metadata); + + return core_op_metadata.value()->get_input_layer_infos(network_name); +} + +Expected> NetworkGroupMetadata::get_output_layer_infos(const std::string &network_name) const +{ + auto core_op_metadata = get_core_op_metadata(); + CHECK_EXPECTED(core_op_metadata); + + return core_op_metadata.value()->get_output_layer_infos(network_name); +} + +Expected> NetworkGroupMetadata::get_input_vstream_infos(const std::string &network_name) const +{ + auto input_layer_infos = get_input_layer_infos(network_name); + CHECK_EXPECTED(input_layer_infos); std::vector input_vstream_infos; + for (auto &layer_info : input_layer_infos.value()) { + auto vstreams_info = LayerInfoUtils::get_vstream_infos_from_layer_info(layer_info); + input_vstream_infos.insert(input_vstream_infos.end(), + std::make_move_iterator(vstreams_info.begin()), std::make_move_iterator(vstreams_info.end())); + } + CHECK_AS_EXPECTED(0 != input_vstream_infos.size(), HAILO_NOT_FOUND, "No VStreams where found for network {}", network_name); + + return input_vstream_infos; +} + +Expected> NetworkGroupMetadata::get_output_vstream_infos(const std::string &network_name) const +{ + auto output_layer_infos = get_output_layer_infos(network_name); + CHECK_EXPECTED(output_layer_infos); + std::vector output_vstream_infos; - for (auto &layer_info : all_layers_infos.value()) { - if (std::any_of(ops_metadata.begin(), ops_metadata.end(), + for (auto &layer_info : output_layer_infos.value()) { + if (std::any_of(m_ops_metadata.begin(), m_ops_metadata.end(), [&layer_info](auto &op_metadata) { return contains(op_metadata->get_input_names(), layer_info.name); })) { continue; // all output_vstream_infos that relates to the op are coming from the op itself instead of layer_infos } + auto vstreams_info = LayerInfoUtils::get_vstream_infos_from_layer_info(layer_info); - if (HAILO_D2H_STREAM == layer_info.direction) { - // In case of fused nms layers, several LayerInfos will contain data about the same fused layer - for (auto &vstream_info : vstreams_info) { - if (!LayerInfoUtils::vstream_info_already_in_vector(output_vstream_infos, vstream_info.name)) { - output_vstream_infos.push_back(vstream_info); - } + // In case of fused nms layers, several LayerInfos will contain data about the same fused layer + for (auto &vstream_info : vstreams_info) { + if (!LayerInfoUtils::vstream_info_already_in_vector(output_vstream_infos, vstream_info.name)) { + output_vstream_infos.push_back(vstream_info); } - } else { - input_vstream_infos.insert(input_vstream_infos.end(), - std::make_move_iterator(vstreams_info.begin()), std::make_move_iterator(vstreams_info.end())); } } - for (auto &metadata : ops_metadata) { + for (auto &metadata : m_ops_metadata) { auto vstream_info = metadata->get_output_vstream_info(); CHECK_EXPECTED(vstream_info); output_vstream_infos.push_back(vstream_info.release()); @@ -407,18 +468,18 @@ Expected NetworkGroupMetadata::create(const std::string &n // Sort vstream infos by sorted_output_names hailo_status status = HAILO_SUCCESS; std::sort(output_vstream_infos.begin(), output_vstream_infos.end(), - [&sorted_output_names, &status](const auto &info1, const auto &info2) + [this, &status](const auto &info1, const auto &info2) { - const auto index1 = std::find(sorted_output_names.begin(), sorted_output_names.end(), std::string(info1.name)); - const auto index2 = std::find(sorted_output_names.begin(), sorted_output_names.end(), std::string(info2.name)); + const auto index1 = std::find(m_sorted_output_names.begin(), m_sorted_output_names.end(), std::string(info1.name)); + const auto index2 = std::find(m_sorted_output_names.begin(), m_sorted_output_names.end(), std::string(info2.name)); - if (sorted_output_names.end() == index1) { + if (m_sorted_output_names.end() == index1) { LOGGER__ERROR("VStream {} not found in sorted output names", info1.name); status = HAILO_INTERNAL_FAILURE; return false; } - if (sorted_output_names.end() == index2) { + if (m_sorted_output_names.end() == index2) { LOGGER__ERROR("VStream {} not found in sorted output names", info2.name); status = HAILO_INTERNAL_FAILURE; return false; @@ -428,34 +489,9 @@ Expected NetworkGroupMetadata::create(const std::string &n }); CHECK_SUCCESS_AS_EXPECTED(status); - return NetworkGroupMetadata(network_group_name, std::move(core_ops_metadata_per_arch), sorted_output_names, supported_features, sorted_network_names, - input_vstream_infos, output_vstream_infos, ops_metadata); -} - -Expected> NetworkGroupMetadata::get_input_vstream_infos(const std::string &network_name) const -{ - std::vector res; - for (auto &vstream_info : m_input_vstreams_infos) { - if ((network_name == std::string(vstream_info.network_name)) || (network_name.empty()) || (network_name == default_network_name())) { - res.push_back(vstream_info); - } - } - CHECK_AS_EXPECTED(0 != res.size(), HAILO_NOT_FOUND, "No VStreams where found for network {}", network_name); - - return res; -} - -Expected> NetworkGroupMetadata::get_output_vstream_infos(const std::string &network_name) const -{ - std::vector res; - for (auto &vstream_info : m_output_vstreams_infos) { - if ((network_name == std::string(vstream_info.network_name)) || (network_name.empty()) || (network_name == default_network_name())) { - res.push_back(vstream_info); - } - } - CHECK_AS_EXPECTED(0 != res.size(), HAILO_NOT_FOUND, "No VStreams where found for network {}", network_name); + CHECK_AS_EXPECTED(0 != output_vstream_infos.size(), HAILO_NOT_FOUND, "No VStreams where found for network {}", network_name); - return res; + return output_vstream_infos; } Expected> NetworkGroupMetadata::get_all_vstream_infos(const std::string &network_name) const @@ -486,7 +522,7 @@ Expected> NetworkGroupMetadata::get_vstream_names_from_ } } - auto all_layers_infos = get_all_layer_infos(m_core_ops_metadata_per_arch); + auto all_layers_infos = get_all_layer_infos(); CHECK_EXPECTED(all_layers_infos); for (auto &layer_info : all_layers_infos.release()) { if (layer_info.is_multi_planar) { @@ -521,7 +557,7 @@ Expected> NetworkGroupMetadata::get_stream_names_from_v } } - auto all_layers_infos = get_all_layer_infos(m_core_ops_metadata_per_arch); + auto all_layers_infos = get_all_layer_infos(); CHECK_EXPECTED(all_layers_infos); for (auto &layer_info : all_layers_infos.value()) { if (layer_info.is_mux) { diff --git a/hailort/libhailort/src/hef/core_op_metadata.hpp b/hailort/libhailort/src/hef/core_op_metadata.hpp index 73a3f4d0..d00ca896 100644 --- a/hailort/libhailort/src/hef/core_op_metadata.hpp +++ b/hailort/libhailort/src/hef/core_op_metadata.hpp @@ -12,7 +12,7 @@ #include "hef/layer_info.hpp" #include "hef/context_switch_actions.hpp" -#include "net_flow/ops/op_metadata.hpp" +#include "net_flow/ops_metadata/op_metadata.hpp" namespace hailort @@ -31,6 +31,7 @@ struct SupportedFeatures { bool output_scale_by_feature = false; bool periph_calculation_in_hailort = false; bool core_hw_padding_config_in_dfc = false; + bool batch_register_config = false; }; // For each config_stream_index we store vector of all ccw write length. The vector is used to build the config buffer.g @@ -40,7 +41,7 @@ using ConfigBufferInfoMap = std::unordered_map>; class ContextMetadata final { public: ContextMetadata(std::vector &&actions, - ConfigBufferInfoMap&& config_buffers_info); + ConfigBufferInfoMap&& config_buffers_info, bool const_input_layer_found); const std::vector &get_actions() const; std::vector get_actions_of_type( @@ -61,9 +62,12 @@ class ContextMetadata final { Expected get_layers_transfer_size(const std::vector &layer_infos) const; Expected get_context_transfer_size() const; + + bool const_input_layer_found() const; private: std::vector m_actions; ConfigBufferInfoMap m_config_buffers_info; + bool m_const_input_layer_found; std::vector m_boundary_input_layers; std::vector m_boundary_output_layers; @@ -84,7 +88,8 @@ class CoreOpMetadata final { std::vector &&dynamic_contexts, std::vector &&config_channels_info, SupportedFeatures &supported_features, - std::vector sorted_network_names); + std::vector sorted_network_names, + bool can_fast_batch_switch); std::vector get_input_layer_infos() const; std::vector get_output_layer_infos() const; @@ -125,6 +130,11 @@ class CoreOpMetadata final { return m_sorted_network_names; } + bool get_can_fast_batch_switch() const + { + return m_can_fast_batch_switch; + } + private: // TODO: Remove const std::string default_network_name() const @@ -138,6 +148,7 @@ class CoreOpMetadata final { std::string m_core_op_name; SupportedFeatures m_supported_features; std::vector m_sorted_network_names; + bool m_can_fast_batch_switch; }; using CoreOpMetadataPtr = std::shared_ptr; @@ -168,15 +179,11 @@ class NetworkGroupMetadata final { std::vector &sorted_output_names, SupportedFeatures &supported_features, const std::vector &sorted_network_names, - std::vector &input_vstreams_infos, - std::vector &output_vstreams_infos, std::vector &ops_metadata) : m_network_group_name(network_group_name), m_sorted_output_names(sorted_output_names), m_supported_features(supported_features), m_sorted_network_names(sorted_network_names), - m_input_vstreams_infos(input_vstreams_infos), - m_output_vstreams_infos(output_vstreams_infos), m_core_ops_metadata_per_arch(std::move(core_ops_metadata_per_arch)), m_ops_metadata(ops_metadata) {}; @@ -216,25 +223,16 @@ class NetworkGroupMetadata final { } private: - static Expected> get_all_layer_infos(std::map &core_ops_metadata_per_arch) - /* This function is used for names getters (such as get_vstream_names_from_stream_name), - so should be same across all clusters layouts */ - { - CHECK_AS_EXPECTED(1 == core_ops_metadata_per_arch.size(), HAILO_INTERNAL_FAILURE); - auto core_op_metadata = core_ops_metadata_per_arch.begin()->second.get_metadata(PARTIAL_CLUSTERS_LAYOUT_IGNORE); - CHECK_EXPECTED(core_op_metadata); - - return core_op_metadata.value()->get_all_layer_infos(); - } + Expected get_core_op_metadata() const; + Expected> get_all_layer_infos() const; + Expected> get_input_layer_infos(const std::string &network_name) const; + Expected> get_output_layer_infos(const std::string &network_name) const; std::string m_network_group_name; std::vector m_sorted_output_names; SupportedFeatures m_supported_features; std::vector m_sorted_network_names; - std::vector m_input_vstreams_infos; - std::vector m_output_vstreams_infos; - std::map m_core_ops_metadata_per_arch; // Key is core_op_name std::vector m_ops_metadata; diff --git a/hailort/libhailort/src/hef/hef.cpp b/hailort/libhailort/src/hef/hef.cpp index 7e959ec2..389af5be 100644 --- a/hailort/libhailort/src/hef/hef.cpp +++ b/hailort/libhailort/src/hef/hef.cpp @@ -24,6 +24,7 @@ #include "net_flow/ops/nms_post_process.hpp" #include "net_flow/ops/yolov5_post_process.hpp" +#include "net_flow/ops/yolov5_bbox_only_post_process.hpp" #include "net_flow/ops/yolox_post_process.hpp" #include "net_flow/ops/ssd_post_process.hpp" #include "net_flow/ops/argmax_post_process.hpp" @@ -36,6 +37,8 @@ #include "eth/hcp_config_core_op.hpp" #include "hef/layer_info.hpp" #include "device_common/control.hpp" +#include "hw_consts.hpp" +#include "utils/profiler/tracer_macros.hpp" #include "byte_order.h" #include "context_switch_defs.h" @@ -49,6 +52,7 @@ #include #include #include +#include namespace hailort @@ -280,7 +284,7 @@ Expected Hef::get_bottleneck_fps(const std::string &net_group_name) c Expected Hef::get_hef_device_arch() const { - return DeviceBase::hef_arch_to_device_arch(pimpl->get_device_arch()); + return DeviceBase::hef_arch_to_device_arch(static_cast(pimpl->get_device_arch())); } Expected Hef::device_arch_to_string(const hailo_device_architecture_t arch) @@ -320,6 +324,37 @@ Expected> Hef::get_vstream_names_from_stream_name(const return pimpl->get_vstream_names_from_stream_name(stream_name, net_group_name_str); } +ShefFileHandle::ShefFileHandle(const std::string &hef_path, uint32_t ccws_buffer_offset) + : m_hef_path(hef_path), m_ccws_buffer_offset(ccws_buffer_offset) {} + +hailo_status ShefFileHandle::open() +{ + m_hef_file = std::ifstream(m_hef_path, std::ios::in | std::ios::binary); + CHECK(m_hef_file.is_open(), HAILO_OPEN_FILE_FAILURE, "Failed to open HEF file \"{}\". errno: {}", m_hef_path, errno); + return HAILO_SUCCESS; +} + +Expected ShefFileHandle::read(uint32_t offset, size_t size) +{ + auto buffer = Buffer::create(size); + CHECK_EXPECTED(buffer); + + m_hef_file.seekg(m_ccws_buffer_offset + offset, m_hef_file.beg); + CHECK_AS_EXPECTED(m_hef_file.good(), HAILO_FILE_OPERATION_FAILURE, "Seeking in file failed"); + + m_hef_file.read(reinterpret_cast(buffer->data()), size); + CHECK_AS_EXPECTED(m_hef_file.good(), HAILO_FILE_OPERATION_FAILURE, "Failed reading ccw"); + + return buffer; +} + +hailo_status ShefFileHandle::close() +{ + m_hef_file.close(); + CHECK_AS_EXPECTED(m_hef_file.good(), HAILO_CLOSE_FAILURE, "Closing file failed"); + return HAILO_SUCCESS; +} + Expected Hef::Impl::create(const std::string &hef_path) { hailo_status status = HAILO_UNINITIALIZED; @@ -358,7 +393,7 @@ static hailo_status calc_istream_md5(std::ifstream &s, MD5_SUM_t &calculated_md5 while (!s.eof()) { s.read(md5_buffer, HEF__MD5_BUFFER_SIZE); CHECK(!s.bad(), HAILO_FILE_OPERATION_FAILURE, "ifstream::read() failed"); - MD5_Update(&md5, &md5_buffer, static_cast(s.gcount())); + MD5_Update(&md5, &md5_buffer, s.gcount()); } MD5_Final(calculated_md5, &md5); @@ -369,14 +404,15 @@ static hailo_status calc_istream_md5(std::ifstream &s, MD5_SUM_t &calculated_md5 return HAILO_SUCCESS; } -hailo_status Hef::Impl::validate_hef_header(const hef__header_t &header, MD5_SUM_t &calculated_md5, size_t proto_size) +hailo_status Hef::Impl::validate_hef_header(const hef__header_t &header, MD5_SUM_t &calculated_md5, size_t hef_file_residue_size) { - CHECK(HEADER_MAGIC == BYTE_ORDER__htonl(header.magic), HAILO_INVALID_HEF, + CHECK(HEADER_MAGIC == header.magic, HAILO_INVALID_HEF, "HEF magic does not match. detected magic - {:x}", header.magic); - CHECK(HEADER_VERSION == BYTE_ORDER__htonl(header.version), HAILO_INVALID_HEF, "HEF version does not match"); + auto version = header.version; + CHECK((HEADER_VERSION_0 == version) || (HEADER_VERSION_1 == version), HAILO_INVALID_HEF, "HEF version does not match"); - CHECK(proto_size == BYTE_ORDER__htonl(header.hef_proto_length), HAILO_INVALID_HEF, + CHECK(hef_file_residue_size == header.hef_proto_size + header.ccws_size, HAILO_INVALID_HEF, "HEF file length does not match"); CHECK(0 == memcmp(&calculated_md5, &header.expected_md5, sizeof(MD5_SUM_t)), HAILO_INVALID_HEF, @@ -406,6 +442,13 @@ void Hef::Impl::init_md5(MD5_SUM_t &calculated_md5) memcpy(m_md5, calculated_md5, sizeof(m_md5)); } +void Hef::Impl::clear_hef_buffer() +{ +#ifdef HAILO_SUPPORT_MULTI_PROCESS + m_hef_buffer = Buffer(); +#endif // HAILO_SUPPORT_MULTI_PROCESS +} + hailo_status Hef::Impl::parse_hef_file(const std::string &hef_path) { #ifdef HAILO_SUPPORT_MULTI_PROCESS @@ -418,23 +461,36 @@ hailo_status Hef::Impl::parse_hef_file(const std::string &hef_path) CHECK(hef_file.is_open(), HAILO_OPEN_FILE_FAILURE, "Failed to open HEF file \"{}\". errno: {}", hef_path, errno); hef__header_t header = {}; - hef_file.read((char*)&header, sizeof(header)); + hef_file.read(reinterpret_cast(&header), sizeof(header)); CHECK(hef_file.good(), HAILO_FILE_OPERATION_FAILURE, "Failed reading HEF header"); - auto proto_size = get_istream_size(hef_file); - CHECK_EXPECTED_AS_STATUS(proto_size); + header.magic = BYTE_ORDER__htonl(header.magic); + header.version = BYTE_ORDER__htonl(header.version); + header.hef_proto_size = BYTE_ORDER__htonl(header.hef_proto_size); + header.ccws_size = BYTE_ORDER__htonl(header.ccws_size); + + auto hef_file_residue_size = get_istream_size(hef_file); + CHECK_EXPECTED_AS_STATUS(hef_file_residue_size); MD5_SUM_t calculated_md5 = {}; auto status = calc_istream_md5(hef_file, calculated_md5); CHECK_SUCCESS(status); - status = validate_hef_header(header, calculated_md5, proto_size.value()); + status = validate_hef_header(header, calculated_md5, hef_file_residue_size.value()); CHECK_SUCCESS(status); + if (HEADER_VERSION_1 == header.version) { + auto ptr = make_shared_nothrow(hef_path, + static_cast(sizeof(header) + header.hef_proto_size)); + CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY); + m_shef_file_handle = ptr; + } + init_md5(calculated_md5); ProtoHEFHef hef_message; - auto rb = hef_message.ParseFromIstream(&hef_file); + google::protobuf::io::IstreamInputStream zero_copy_input(&hef_file); + auto rb = hef_message.ParseFromBoundedZeroCopyStream(&zero_copy_input, header.hef_proto_size); CHECK(rb, HAILO_INVALID_HEF, "Failed parsing HEF file"); status = transfer_protobuf_field_ownership(hef_message); CHECK_SUCCESS(status); @@ -448,6 +504,7 @@ hailo_status Hef::Impl::parse_hef_file(const std::string &hef_path) status = validate_hef_extensions(); CHECK_SUCCESS(status); + TRACE(HefLoadedTrace, hef_path, m_header.sdk_version(), m_md5); return HAILO_SUCCESS; } @@ -460,7 +517,12 @@ hailo_status Hef::Impl::parse_hef_memview(const MemoryView &hef_memview) #endif // HAILO_SUPPORT_MULTI_PROCESS CHECK(hef_memview.size() >= sizeof(hef__header_t), HAILO_INVALID_HEF, "Invalid HEF header"); - const hef__header_t &header = reinterpret_cast(*hef_memview.data()); + const hef__header_t &raw_header = reinterpret_cast(*hef_memview.data()); + auto header = raw_header; + header.magic = BYTE_ORDER__htonl(header.magic); + header.version = BYTE_ORDER__htonl(header.version); + header.hef_proto_size = BYTE_ORDER__htonl(header.hef_proto_size); + header.ccws_size = BYTE_ORDER__htonl(header.ccws_size); auto proto_buffer = (hef_memview.data() + sizeof(header)); auto proto_size = (hef_memview.size() - sizeof(header)); @@ -664,19 +726,24 @@ static Expected> parse_config_channels_info(const Expected Hef::Impl::create_metadata_per_arch(const ProtoHEFCoreOpMock &core_op, const std::vector &sorted_network_names) { - auto preliminary_context = HefUtils::parse_preliminary_context(core_op.preliminary_config, m_supported_features); + auto preliminary_context = HefUtils::parse_preliminary_context(core_op.preliminary_config, m_supported_features, m_shef_file_handle); CHECK_EXPECTED(preliminary_context); - auto dynamic_contexts = HefUtils::parse_dynamic_contexts(core_op, m_supported_features, get_device_arch()); + auto dynamic_contexts = HefUtils::parse_dynamic_contexts(core_op, m_supported_features, get_device_arch(), m_shef_file_handle); CHECK_EXPECTED(dynamic_contexts); - auto config_channels_info = parse_config_channels_info(core_op); + auto config_channels_info = parse_config_channels_info(core_op); CHECK_EXPECTED(config_channels_info); + // If const input layer is found in the preliminary context, or first dynamic context we can't use fast batch switch + const auto can_fast_batch_switch = + !(preliminary_context.value().const_input_layer_found() || dynamic_contexts.value()[0].const_input_layer_found()); + // Currently, CoreOp name is the same as network_group_name, thats why we init it with it. // TODO: HRT-9551 - Change it when supporting multi core ops. auto metadata_per_arch = make_shared_nothrow(core_op.network_group_metadata.network_group_name(), - preliminary_context.release(), dynamic_contexts.release(), config_channels_info.release(), m_supported_features, sorted_network_names); + preliminary_context.release(), dynamic_contexts.release(), config_channels_info.release(), + m_supported_features, sorted_network_names, can_fast_batch_switch); CHECK_NOT_NULL_AS_EXPECTED(metadata_per_arch, HAILO_OUT_OF_HOST_MEMORY); return metadata_per_arch; } @@ -792,7 +859,7 @@ const MemoryView Hef::Impl::get_hef_memview() } #endif // HAILO_SUPPORT_MULTI_PROCESS -Hef::Impl::Impl(const std::string &hef_path, hailo_status &status) +Hef::Impl::Impl(const std::string &hef_path, hailo_status &status) : m_shef_file_handle(nullptr) { status = HAILO_UNINITIALIZED; GOOGLE_PROTOBUF_VERIFY_VERSION; @@ -806,7 +873,7 @@ Hef::Impl::Impl(const std::string &hef_path, hailo_status &status) status = HAILO_SUCCESS; } -Hef::Impl::Impl(const MemoryView &hef_memview, hailo_status &status) +Hef::Impl::Impl(const MemoryView &hef_memview, hailo_status &status) : m_shef_file_handle(nullptr) { status = HAILO_UNINITIALIZED; GOOGLE_PROTOBUF_VERIFY_VERSION; @@ -852,6 +919,8 @@ SupportedFeatures Hef::Impl::get_supported_features(const ProtoHEFHeader &header header, hef_extensions, included_features); supported_features.core_hw_padding_config_in_dfc = check_hef_optional_extension(ProtoHEFExtensionType::HW_PADDING, header, hef_optional_extensions); + supported_features.batch_register_config = check_hef_extension(ProtoHEFExtensionType::BATCH_REGISTER_CONFIG, + header, hef_extensions, included_features); return supported_features; } @@ -865,6 +934,7 @@ net_flow::NmsPostProcessConfig create_post_process_nms_config(const ProtoHEFOp & nms_config.number_of_classes = op_proto.nms_op().classes(); nms_config.background_removal = op_proto.nms_op().background_removal(); nms_config.background_removal_index = op_proto.nms_op().background_removal_index(); + nms_config.bbox_only = op_proto.nms_op().bbox_decoding_only(); return nms_config; } @@ -913,6 +983,21 @@ Expected> create_input return inputs_metadata; } +uint32_t compute_num_of_proposals(const std::unordered_map &inputs_metadatas, std::map> &anchors) +{ + uint32_t num_of_proposals = 0; + for (const auto &input_metadata_pair : inputs_metadatas) { + auto &name = input_metadata_pair.first; + auto &input_metadata = input_metadata_pair.second; + assert(contains(anchors, name)); + auto &layer_anchors = anchors.at(name); + auto num_of_anchors = net_flow::YOLOv5PostProcessOp::get_num_of_anchors(layer_anchors); + num_of_proposals += static_cast(num_of_anchors * input_metadata.shape.height * input_metadata.shape.width); + } + return num_of_proposals; +} + Expected create_yolov5_op_metadata(const ProtoHEFOp &op_proto, const std::map &pad_index_to_streams_info, const std::map &input_to_output_pads, const std::string &network_name) @@ -929,13 +1014,41 @@ Expected create_yolov5_op_metadata(const Pro std::unordered_map outputs_metadata; net_flow::BufferMetaData output_metadata{}; output_metadata.format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type( - { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::YOLOV5); + { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::YOLOV5, + nms_config.bbox_only); + outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata}); return net_flow::Yolov5OpMetadata::create(inputs_metadata.release(), outputs_metadata, nms_config, yolo_config.release(), network_name); } +Expected create_yolov5_bbox_only_op_metadata(const ProtoHEFOp &op_proto, + const std::map &pad_index_to_streams_info, const std::map &input_to_output_pads, + const std::string &network_name) +{ + auto nms_config = create_post_process_nms_config(op_proto); + + auto yolo_v5_config = create_yolov5_config(op_proto.nms_op().yolo_nms_op().bbox_decoders(), + op_proto.nms_op().yolo_nms_op().image_height(), op_proto.nms_op().yolo_nms_op().image_width(), pad_index_to_streams_info); + CHECK_EXPECTED(yolo_v5_config); + + auto inputs_metadata = create_inputs_metadata(op_proto, pad_index_to_streams_info, input_to_output_pads); + CHECK_EXPECTED(inputs_metadata); + + std::unordered_map outputs_metadata; + net_flow::BufferMetaData output_metadata{}; + uint32_t num_of_proposals = compute_num_of_proposals(inputs_metadata.value(), yolo_v5_config->anchors); + output_metadata.shape = {1, num_of_proposals, YOLOV5_BBOX_NUM_OF_VALUES + op_proto.nms_op().classes()}; + + output_metadata.format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type( + { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::YOLOV5, nms_config.bbox_only); + outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata}); + + return net_flow::Yolov5BboxOnlyOpMetadata::create(inputs_metadata.release(), outputs_metadata, nms_config, yolo_v5_config.release(), + network_name); +} + Expected create_yolov5_seg_op_metadata(const ProtoHEFOp &op_proto, const std::map &pad_index_to_streams_info, const std::map &input_to_output_pads, const std::string &network_name) @@ -950,15 +1063,19 @@ Expected create_yolov5_seg_op_metadata(const auto proto_layer_name = op_proto.nms_op().yolo_seg_op().proto_info().proto_layer(); CHECK_AS_EXPECTED(contains(inputs_metadata.value(), proto_layer_name), HAILO_INVALID_HEF); - net_flow::YoloV5SegPostProcessConfig yolov5_seg_config = - {static_cast(op_proto.nms_op().yolo_seg_op().mask_threshold()), - op_proto.nms_op().yolo_seg_op().proto_info().proto_layer()}; + + const uint32_t SIZE_FACTOR = 2; + net_flow::YoloV5SegPostProcessConfig yolov5_seg_config = {}; + yolov5_seg_config.mask_threshold = static_cast(op_proto.nms_op().yolo_seg_op().mask_threshold()); + yolov5_seg_config.max_accumulated_mask_size = static_cast( + yolov5_config->image_height * yolov5_config->image_width * SIZE_FACTOR); + yolov5_seg_config.proto_layer_name = proto_layer_name; std::unordered_map outputs_metadata; net_flow::BufferMetaData output_metadata{}; output_metadata.format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type ({ HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK, HAILO_FORMAT_FLAGS_NONE }, - net_flow::OperationType::YOLOV5SEG); + net_flow::OperationType::YOLOV5SEG, nms_config.bbox_only); outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata}); return net_flow::Yolov5SegOpMetadata::create(inputs_metadata.release(), outputs_metadata, nms_config, yolov5_config.release(), @@ -979,7 +1096,7 @@ Expected create_yolov8_op_metadata(const Pro std::unordered_map outputs_metadata; net_flow::BufferMetaData output_metadata{}; output_metadata.format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type( - { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::YOLOV8); + { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::YOLOV8, nms_config.bbox_only); outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata}); for (auto &bbox_proto : op_proto.nms_op().yolov8_nms_op().bbox_decoders()) { @@ -1024,7 +1141,7 @@ Expected create_yolox_op_metadata(const Prot std::unordered_map outputs_metadata; net_flow::BufferMetaData output_metadata{}; output_metadata.format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type( - { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::YOLOX); + { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::YOLOX, nms_config.bbox_only); outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata}); for (const auto &bbox_proto : op_proto.nms_op().yolox_nms_op().bbox_decoders()) { @@ -1100,7 +1217,7 @@ Expected create_ssd_op_metadata(const ProtoH std::unordered_map outputs_metadata; net_flow::BufferMetaData output_metadata{}; output_metadata.format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type( - { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::SSD); + { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::SSD, nms_config.bbox_only); outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata}); for (auto &input_pad : op_proto.input_pads()) { @@ -1168,7 +1285,7 @@ Expected create_iou_op_metadata(const ProtoH std::unordered_map outputs_metadata; net_flow::BufferMetaData output_metadata{}; output_metadata.format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type( - { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, op_type); + { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, op_type, nms_config.bbox_only); outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata}); for (auto &input_pad : op_proto.input_pads()) { @@ -1239,7 +1356,8 @@ Expected> create_logits_op_metadata(const // TODO: HRT-10603 const auto &op_input_layer_info = pad_index_to_streams_info.at(output_pad_index); - auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch)); + auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value( + DeviceBase::hef_arch_to_device_arch(static_cast(hef_arch))); CHECK_EXPECTED(max_periph_bytes_from_hef); // TODO HRT-12099 - return invalid hef error when remove support for hefs with no max_shmifo size @@ -1303,11 +1421,19 @@ Expected> Hef::Impl::create_ops_ net_flow::PostProcessOpMetadataPtr post_process_op_metadata; switch (op_proto.nms_op().nms_op_case()) { case ProtoHEFNmsOp::kYoloNmsOp: { - auto expected_post_process_op_metadata = create_yolov5_op_metadata(op_proto, pad_index_to_streams_info, + if (op_proto.nms_op().bbox_decoding_only()) { + auto expected_post_process_op_metadata = create_yolov5_bbox_only_op_metadata(op_proto, pad_index_to_streams_info, + input_to_output_pads, network_name); + CHECK_EXPECTED(expected_post_process_op_metadata); + post_process_op_metadata = expected_post_process_op_metadata.release(); + break; + } else { + auto expected_post_process_op_metadata = create_yolov5_op_metadata(op_proto, pad_index_to_streams_info, input_to_output_pads, network_name); - CHECK_EXPECTED(expected_post_process_op_metadata); - post_process_op_metadata = expected_post_process_op_metadata.release(); - break; + CHECK_EXPECTED(expected_post_process_op_metadata); + post_process_op_metadata = expected_post_process_op_metadata.release(); + break; + } } case ProtoHEFNmsOp::kYoloxNmsOp: { auto expected_post_process_op_metadata = create_yolox_op_metadata(op_proto, pad_index_to_streams_info, @@ -1791,7 +1917,7 @@ static hailo_3d_image_shape_t parse_layer_hw_shape(const ProtoHEFEdgeLayerBase & hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBase &base_info, const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFNetworkGroupMetadata &network_group_proto, - bool transposed, const uint8_t context_index, const uint8_t network_index, LayerInfo &layer_info, + bool transposed, const uint16_t context_index, const uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch, const bool is_part_of_mux_layer) { auto format_order_exp = HailoRTDefaults::get_device_format_order(base_info.format()); @@ -1826,7 +1952,8 @@ hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBas CHECK_EXPECTED_AS_STATUS(type); layer_info.format.type = type.value(); - auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch)); + auto max_periph_bytes_from_hef = + HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(static_cast(hef_arch))); CHECK_EXPECTED_AS_STATUS(max_periph_bytes_from_hef); // TODO HRT-12099 - return invalid hef error when remove support for hefs with no max_shmifo size const auto max_periph_bytes = (0 == base_info.max_shmifo_size()) ? max_periph_bytes_from_hef.value() : @@ -1871,7 +1998,7 @@ hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBas hailo_status HefUtils::fill_layer_info(const ProtoHEFEdgeLayerInfo &info, const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op, - hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, + hailo_stream_direction_t direction, const uint16_t context_index, const std::string &partial_network_name, uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch, const bool is_part_of_mux_layer) { @@ -2005,7 +2132,7 @@ hailo_status HefUtils::fill_fused_nms_info(const ProtoHEFEdgeLayerFused &info, L hailo_status HefUtils::fill_mux_info(const ProtoHEFEdgeLayerMux &info, const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op, - hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, + hailo_stream_direction_t direction, const uint16_t context_index, const std::string &partial_network_name, uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch) { if (HAILO_MAX_STREAM_NAME_SIZE < (info.name().length() + 1)) { @@ -2083,7 +2210,7 @@ Expected convert_planes_format_to_hailo_format_order(const hailo_status HefUtils::fill_planes_info(const ProtoHEFEdgeLayerPlanes &info, const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op, - hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, + hailo_stream_direction_t direction, const uint16_t context_index, const std::string &partial_network_name, uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch) { auto layer_type = get_layer_type(edge_connection_type); @@ -2146,7 +2273,7 @@ hailo_status HefUtils::fill_planes_info(const ProtoHEFEdgeLayerPlanes &info, hailo_status HefUtils::fill_boundary_layers_info( const ProtoHEFCoreOpMock &core_op, - const uint8_t context_index, + const uint16_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, ContextMetadata &context_metadata, @@ -2154,7 +2281,7 @@ hailo_status HefUtils::fill_boundary_layers_info( { auto layer_info = get_boundary_layer_info(core_op, context_index, layer, supported_features, hef_arch); CHECK_EXPECTED_AS_STATUS(layer_info); - + context_metadata.add_boundary_layer(layer_info.release()); return HAILO_SUCCESS; @@ -2162,7 +2289,7 @@ hailo_status HefUtils::fill_boundary_layers_info( hailo_status HefUtils::fill_inter_context_layers_info( const ProtoHEFCoreOpMock &core_op, - const uint8_t context_index, + const uint16_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, ContextMetadata &context_metadata) @@ -2176,7 +2303,7 @@ hailo_status HefUtils::fill_inter_context_layers_info( hailo_status HefUtils::fill_ddr_layers_info( const ProtoHEFCoreOpMock &core_op, - const uint8_t context_index, + const uint16_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, ContextMetadata &context_metadata) @@ -2191,7 +2318,7 @@ hailo_status HefUtils::fill_ddr_layers_info( hailo_status HefUtils::check_ddr_pairs_match( const std::vector &context_ddr_input_layers, const std::vector &context_ddr_output_layers, - const uint8_t context_index) + const uint16_t context_index) { CHECK(context_ddr_input_layers.size() == context_ddr_output_layers.size(), HAILO_INVALID_HEF, "DDR pairs must be equal in size for context {}" ,context_index); @@ -2310,7 +2437,7 @@ static std::pair old_hef_parse_initial_l3(uint32_t initial_l3 } static Expected parse_action(const ProtoHEFAction &proto_action, - const SupportedFeatures &supported_features) + const SupportedFeatures &supported_features, bool &const_input_layer_found) { switch (proto_action.action_case()) { case ProtoHEFAction::kDisableLcu: @@ -2328,6 +2455,8 @@ static Expected parse_action(const ProtoHEFAction "Failed to parse HEF. Invalid lcu_index: {}.", proto_action.enable_lcu().lcu_index()); CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(proto_action.enable_lcu().lcu_kernel_done_address()), HAILO_INVALID_HEF, "Failed to parse HEF. Invalid lcu_kernel_done_address: {}.", proto_action.enable_lcu().lcu_kernel_done_address()); + CHECK_AS_EXPECTED(IS_FIT_IN_UINT32(proto_action.enable_lcu().lcu_kernel_done_count()), HAILO_INVALID_HEF, + "Failed to parse HEF. Invalid lcu_kernel_done_count: {}.", proto_action.enable_lcu().lcu_kernel_done_count()); auto support_multi_networks = supported_features.multi_network_support; auto network_index = static_cast((support_multi_networks) ? proto_action.enable_lcu().network_index() : 0); @@ -2340,6 +2469,24 @@ static Expected parse_action(const ProtoHEFAction return EnableLcuAction::create(cluster_index, lcu_index, network_index, kernel_done_address, kernel_done_count); } + case ProtoHEFAction::kSwitchLcuBatch: + { + CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(proto_action.switch_lcu_batch().cluster_index()), HAILO_INVALID_HEF, + "Failed to parse HEF. Invalid cluster_index: {}.", proto_action.switch_lcu_batch().cluster_index()); + CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(proto_action.switch_lcu_batch().lcu_index()), HAILO_INVALID_HEF, + "Failed to parse HEF. Invalid lcu_index: {}.", proto_action.switch_lcu_batch().lcu_index()); + + auto support_multi_networks = supported_features.multi_network_support; + auto network_index = static_cast((support_multi_networks) ? proto_action.switch_lcu_batch().network_index() : 0); + + const auto cluster_index = static_cast(proto_action.switch_lcu_batch().cluster_index()); + const auto lcu_index = static_cast(proto_action.switch_lcu_batch().lcu_index()); + // the kernel_done_count field isn't used but required as legacy. + const auto NULL_KERNEL_DONE_COUNT = (uint32_t)0; + + return SwitchLcuBatchAction::create(cluster_index, lcu_index, network_index, + NULL_KERNEL_DONE_COUNT); + } case ProtoHEFAction::kEnableSequencer: { CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(proto_action.enable_sequencer().cluster_index()), HAILO_INVALID_HEF, @@ -2465,6 +2612,11 @@ static Expected parse_action(const ProtoHEFAction const auto network_index = static_cast((support_multi_networks) ? proto_action.write_data_by_type().network_index() : 0); const auto shift = static_cast(proto_action.write_data_by_type().shift()); + // If data_type is BATCH_SIZE - can't fast batch switch + if (ProtoHEFWriteDataType::BATCH_SIZE == data_type) { + const_input_layer_found = true; + } + return WriteDataByTypeAction::create(address, data_type, data, shift, mask, network_index); } default: @@ -2521,9 +2673,46 @@ static hailo_status merge_write_ccw_actions( config_buffer_infos[config_stream_index].emplace_back(static_cast(config_buffer->size())); const size_t total_ccw_burst = ccw_buffers.size(); - auto action = WriteDataCcwAction::create(config_buffer.release(), config_stream_index, total_ccw_burst); + auto action = WriteDataCcwActionByBuffer::create(config_buffer.release(), config_stream_index, total_ccw_burst); + CHECK_EXPECTED_AS_STATUS(action); + + actions.emplace_back(action.release()); + } + + return HAILO_SUCCESS; +} +static hailo_status build_write_ccw_actions( + std::vector &actions, + ConfigBufferInfoMap &config_buffer_infos, + const std::vector &write_ccw_actions, + std::shared_ptr shef_file_handle) +{ + std::unordered_map ccws_per_config_index; + for (const auto *write_ccw_action : write_ccw_actions) { + CHECK(IS_FIT_IN_UINT8(write_ccw_action->cfg_channel_index()), HAILO_INVALID_HEF, + "Invalid cfg channel index"); + const auto config_stream_index = static_cast(write_ccw_action->cfg_channel_index()); + if (ccws_per_config_index.find(config_stream_index) == ccws_per_config_index.end()) { + ccws_per_config_index[config_stream_index] = 0; + } + ccws_per_config_index[config_stream_index]++; + } + for (const auto *write_ccw_action : write_ccw_actions) { + if (write_ccw_action->data().size() == 0) { + continue; + } + const shef__ccw_offset_t *ccw_offset = reinterpret_cast(write_ccw_action->data().data()); + const auto config_stream_index = static_cast(write_ccw_action->cfg_channel_index()); + + assert(BYTE_ORDER__htonl(ccw_offset->size) < std::numeric_limits::max()); + config_buffer_infos[config_stream_index].emplace_back(static_cast(BYTE_ORDER__htonl(ccw_offset->size))); + + const size_t total_ccw_burst = ccws_per_config_index[config_stream_index]; + auto action = WriteDataCcwAction::create(BYTE_ORDER__htonl(ccw_offset->offset), BYTE_ORDER__htonl(ccw_offset->size), + config_stream_index, total_ccw_burst, shef_file_handle); CHECK_EXPECTED_AS_STATUS(action); + actions.emplace_back(action.release()); } @@ -2533,7 +2722,9 @@ static hailo_status merge_write_ccw_actions( static hailo_status parse_operation(std::vector &actions, ConfigBufferInfoMap &config_buffer_infos, const ProtoHEFOperation &operation_proto, - const SupportedFeatures &supported_features) + const SupportedFeatures &supported_features, + std::shared_ptr shef_file_handle, + bool &const_input_layer_found) { auto trigger_action = parse_trigger_action(operation_proto.trigger()); CHECK_EXPECTED_AS_STATUS(trigger_action); @@ -2554,12 +2745,17 @@ static hailo_status parse_operation(std::vector &a (next_action_index == operation_proto.actions_size()) || (operation_proto.actions(next_action_index).action_case() != ProtoHEFAction::kWriteDataCcw); if (is_last_ccw) { - auto status = merge_write_ccw_actions(actions, config_buffer_infos, current_write_ccw_actions); - CHECK_SUCCESS(status); + if (nullptr != shef_file_handle) { + auto status = build_write_ccw_actions(actions, config_buffer_infos, current_write_ccw_actions, shef_file_handle); + CHECK_SUCCESS(status); + } else { + auto status = merge_write_ccw_actions(actions, config_buffer_infos, current_write_ccw_actions); + CHECK_SUCCESS(status); + } current_write_ccw_actions.clear(); } } else { - auto action = parse_action(proto_action, supported_features); + auto action = parse_action(proto_action, supported_features, const_input_layer_found); CHECK_EXPECTED_AS_STATUS(action); actions.emplace_back(action.release()); } @@ -2571,30 +2767,32 @@ static hailo_status parse_operation(std::vector &a static Expected parse_operations( const google::protobuf::RepeatedPtrField &operations_proto, - const SupportedFeatures &supported_features) + const SupportedFeatures &supported_features, std::shared_ptr shef_file_handle) { std::vector actions; ConfigBufferInfoMap config_buffer_infos; + bool const_input_layer_found = false; for (const auto &operation_proto : operations_proto) { - auto status = parse_operation(actions, config_buffer_infos, operation_proto, supported_features); + auto status = parse_operation(actions, config_buffer_infos, operation_proto, supported_features, shef_file_handle, + const_input_layer_found); CHECK_SUCCESS_AS_EXPECTED(status); } - return ContextMetadata(std::move(actions), std::move(config_buffer_infos)); + return ContextMetadata(std::move(actions), std::move(config_buffer_infos), const_input_layer_found); } Expected HefUtils::parse_preliminary_context(const ProtoHEFPreliminaryConfig &preliminary_proto, - const SupportedFeatures &supported_features) + const SupportedFeatures &supported_features, std::shared_ptr shef_file_handle) { - return parse_operations(preliminary_proto.operation(), supported_features); + return parse_operations(preliminary_proto.operation(), supported_features, shef_file_handle); } Expected HefUtils::parse_single_dynamic_context(const ProtoHEFCoreOpMock &core_op, - const ProtoHEFContext &context_proto, uint8_t context_index, const SupportedFeatures &supported_features, - const ProtoHEFHwArch &hef_arch) + const ProtoHEFContext &context_proto, uint16_t context_index, const SupportedFeatures &supported_features, + const ProtoHEFHwArch &hef_arch, std::shared_ptr shef_file_handle) { - auto context_metadata_exp = parse_operations(context_proto.operations(), supported_features); + auto context_metadata_exp = parse_operations(context_proto.operations(), supported_features, shef_file_handle); CHECK_EXPECTED(context_metadata_exp); ContextMetadata context_metadata = context_metadata_exp.release(); @@ -2644,12 +2842,13 @@ static hailo_status validate_unique_boundary_names(const std::vector> HefUtils::parse_dynamic_contexts(const ProtoHEFCoreOpMock &core_op, const SupportedFeatures &supported_features, - const ProtoHEFHwArch &hef_arch) + const ProtoHEFHwArch &hef_arch, std::shared_ptr shef_file_handle) { std::vector contexts_metadata; - for (uint8_t context_index = 0; context_index < core_op.contexts.size(); context_index++) { + for (uint16_t context_index = 0; context_index < core_op.contexts.size(); context_index++) { auto &context_proto = core_op.contexts[context_index]; - auto context_metadata = parse_single_dynamic_context(core_op, context_proto, context_index, supported_features, hef_arch); + auto context_metadata = parse_single_dynamic_context(core_op, context_proto, context_index, supported_features, + hef_arch, shef_file_handle); CHECK_EXPECTED(context_metadata); contexts_metadata.emplace_back(context_metadata.release()); } @@ -2772,7 +2971,7 @@ Expected HefUtils::parse_proto_nms_info(const ProtoHEFNmsInfo } Expected HefUtils::get_boundary_layer_info(const ProtoHEFCoreOpMock &core_op, - const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, + const uint16_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch) { // We parse only boundary layers for user usage @@ -2826,18 +3025,18 @@ static Expected parse_connected_context_info( "Failed to parse HEF. Invalid connected_sys_index: {}.", connected_context_proto.sys_index()); CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(connected_context_proto.engine_id()), HAILO_INVALID_HEF, "Failed to parse HEF. Invalid engine_id: {}. in connected_contexts", connected_context_proto.engine_id()); - CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(connected_context_proto.index()), HAILO_INVALID_HEF, + CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(connected_context_proto.index()), HAILO_INVALID_HEF, "Failed to parse HEF. Invalid connected_context_index: {}.", connected_context_proto.index()); ConnectedContextInfo connected_context{}; - connected_context.context_index = static_cast(connected_context_proto.index()); + connected_context.context_index = static_cast(connected_context_proto.index()); connected_context.stream_index = static_cast(connected_context_proto.sys_index()); connected_context.dma_engine_index = static_cast(connected_context_proto.engine_id()); return connected_context; } Expected HefUtils::get_inter_context_layer_info(const ProtoHEFCoreOpMock &core_op, - const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features) + const uint16_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features) { LayerInfo result = {}; CHECK_AS_EXPECTED(PROTO__EDGE_LAYER_TYPE__INFO == layer.edge_layer_type(), HAILO_INVALID_HEF, "Inter-context layer can't be mux."); @@ -2893,7 +3092,7 @@ Expected HefUtils::get_inter_context_layer_info(const ProtoHEFCoreOpM } Expected HefUtils::get_ddr_layer_info(const ProtoHEFCoreOpMock &core_op, - const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features) + const uint16_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features) { LayerInfo result = {}; CHECK_AS_EXPECTED(PROTO__EDGE_LAYER_TYPE__INFO == layer.edge_layer_type(), HAILO_INVALID_HEF, "DDR layer can't be mux."); @@ -3075,7 +3274,7 @@ Expected> Hef::Impl::create_single_context_core_op_ break; } case ProtoHEFAction::kWriteDataCcw: { - auto config_buffer = parse_ccw_buffer(action.write_data_ccw().data()); + auto config_buffer = parse_ccw_buffer(action.write_data_ccw().data()); // TODO: make this not supported in sHEF CHECK_EXPECTED(config_buffer); config_buffers.emplace_back(config_buffer.release()); break; @@ -3093,6 +3292,12 @@ Expected> Hef::Impl::create_single_context_core_op_ config_buffers.emplace_back(std::move(write_memory_info)); break; } + case ProtoHEFAction::kSwitchLcuBatch: { + LOGGER__ERROR("Parsing error. Context-switch optimization related actions are not supported over Ethernet. " + "If you use the Ethernet interface, please disable context-switch optimizations in the Dataflow Compiler (SDK) and then re-create the HEF. " + "See the Dataflow Compiler user guide for more information."); + return make_unexpected(HAILO_INVALID_HEF); + } case ProtoHEFAction::kAllowInputDataflow: { case ProtoHEFAction::kWaitForModuleConfigDone: // We ignore the 'wait_for_interrupt' actions. After writing the configurations we can be sure everything is configured and dont need to wait for interrupts @@ -3122,6 +3327,11 @@ ProtoHEFHwArch Hef::Impl::get_device_arch() return m_header.hw_arch(); } +std::shared_ptr Hef::Impl::get_shef_file_handle() +{ + return m_shef_file_handle; +} + Expected Hef::Impl::get_bottleneck_fps(const std::string &net_group_name) { auto core_op = get_core_op_by_net_group_name(net_group_name); diff --git a/hailort/libhailort/src/hef/hef_internal.hpp b/hailort/libhailort/src/hef/hef_internal.hpp index b1c40020..7e6d91ab 100644 --- a/hailort/libhailort/src/hef/hef_internal.hpp +++ b/hailort/libhailort/src/hef/hef_internal.hpp @@ -31,19 +31,19 @@ #include "hailo/hef.hpp" #include "hailo/network_group.hpp" #include "hailo/hailort_defaults.hpp" -#include "net_flow/ops/op_metadata.hpp" +#include "net_flow/ops_metadata/op_metadata.hpp" #include "hef/core_op_metadata.hpp" #include "hef/layer_info.hpp" #include "hef/context_switch_actions.hpp" #include "net_flow/ops/op.hpp" -#include "net_flow/pipeline/pipeline_internal.hpp" #include "device_common/control_protocol.hpp" #include "control_protocol.h" #include #include #include +#include extern "C" { #include "md5.h" @@ -53,8 +53,6 @@ extern "C" { namespace hailort { -#define DEFAULT_NMS_NO_BURST_SIZE (1) - class CoreOpMetadata; class CoreOp; using ProtoHEFNetworkGroupPtr = std::shared_ptr; @@ -116,12 +114,19 @@ struct ProtoHEFCoreOpMock { typedef struct { uint32_t magic; uint32_t version; - uint32_t hef_proto_length; - uint32_t reserved; + uint32_t hef_proto_size; + uint32_t ccws_size; MD5_SUM_t expected_md5; } hef__header_t; #pragma pack(pop) +#pragma pack(push, 1) +typedef struct { + uint32_t offset; + uint32_t size; +} shef__ccw_offset_t; +#pragma pack(pop) + typedef enum { HEF__FORMAT__TF_RGB = 0, HEF__FORMAT__FRAMES, @@ -169,7 +174,9 @@ static const std::vector SUPPORTED_EXTENSIONS = { HAILO_NET_FLOW_YOLOV5_SEG_NMS, // Extension added in platform 4.15 release HAILO_NET_FLOW_IOU_NMS, // Extension added in platform 4.15 release HW_PADDING, // Extension added in platform 4.16 release - HAILO_NET_FLOW_YOLOV8_NMS // Extension added in platform 4.16 release + HAILO_NET_FLOW_YOLOV8_NMS, // Extension added in platform 4.16 release + BATCH_REGISTER_CONFIG, // Extension added in platform 4.17 release + HAILO_NET_FLOW_BBOX_DECODING // Extension added in platform 4.18 release }; static inline bool is_h2d_boundary_info_layer(const ProtoHEFEdgeLayer& layer) @@ -223,12 +230,26 @@ class VdmaConfigCoreOp; class VdmaDevice; class HailoRTDriver; +class ShefFileHandle final +{ +public: + ShefFileHandle(const std::string &hef_path, uint32_t ccws_buffer_offset); + hailo_status open(); + Expected read(uint32_t offset, size_t size); + hailo_status close(); + +private: + std::string m_hef_path; + std::ifstream m_hef_file; + uint32_t m_ccws_buffer_offset; +}; class Hef::Impl final { public: static const uint32_t HEADER_MAGIC = 0x01484546; - static const uint32_t HEADER_VERSION = 0; + static const uint32_t HEADER_VERSION_0 = 0; // Old HEF + static const uint32_t HEADER_VERSION_1 = 1; // New HEF (SHEF) static Expected create(const std::string &hef_path); static Expected create(const MemoryView &hef_buffer); @@ -239,6 +260,8 @@ class Hef::Impl final Expected> get_network_group_and_network_name(const std::string &name); + void clear_hef_buffer(); + Expected> get_core_op_by_net_group_name(const std::string &net_group_name=""); Expected> get_network_infos(const std::string &net_group_name=""); @@ -261,6 +284,7 @@ class Hef::Impl final Expected get_number_of_input_streams(const std::string &net_group_name=""); Expected get_number_of_output_streams(const std::string &net_group_name=""); ProtoHEFHwArch get_device_arch(); + std::shared_ptr get_shef_file_handle(); Expected get_bottleneck_fps(const std::string &net_group_name=""); static bool contains_ddr_layers(const ProtoHEFCoreOpMock &core_op); static hailo_status validate_core_op_unique_layer_names(const ProtoHEFCoreOpMock &core_op); @@ -416,6 +440,7 @@ class Hef::Impl final std::vector m_hef_optional_extensions; std::bitset m_supported_extensions_bitset; MD5_SUM_t m_md5; + std::shared_ptr m_shef_file_handle; #ifdef HAILO_SUPPORT_MULTI_PROCESS Buffer m_hef_buffer; @@ -450,46 +475,46 @@ class HefUtils final static hailo_status fill_boundary_layers_info( const ProtoHEFCoreOpMock &core_op, - const uint8_t context_index, + const uint16_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, ContextMetadata &context_metadata, const ProtoHEFHwArch &hef_arch); static Expected get_inter_context_layer_info( - const ProtoHEFCoreOpMock &core_op, const uint8_t context_index, + const ProtoHEFCoreOpMock &core_op, const uint16_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features); static hailo_status fill_inter_context_layers_info( const ProtoHEFCoreOpMock &core_op, - const uint8_t context_index, + const uint16_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, ContextMetadata &context_metadata); static Expected get_ddr_layer_info( - const ProtoHEFCoreOpMock &core_op, const uint8_t context_index, + const ProtoHEFCoreOpMock &core_op, const uint16_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features); static hailo_status fill_ddr_layers_info( const ProtoHEFCoreOpMock &core_op, - const uint8_t context_index, + const uint16_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, ContextMetadata &context_metadata); static hailo_status check_ddr_pairs_match( const std::vector &context_ddr_input_layers, const std::vector &context_ddr_output_layers, - const uint8_t context_index); + const uint16_t context_index); static Expected parse_preliminary_context(const ProtoHEFPreliminaryConfig &preliminary_proto, - const SupportedFeatures &supported_features); + const SupportedFeatures &supported_features, std::shared_ptr shef_file_handle); static Expected parse_single_dynamic_context(const ProtoHEFCoreOpMock &core_op, - const ProtoHEFContext &context_proto, uint8_t context_index, const SupportedFeatures &supported_features, - const ProtoHEFHwArch &hef_arch); + const ProtoHEFContext &context_proto, uint16_t context_index, const SupportedFeatures &supported_features, + const ProtoHEFHwArch &hef_arch, std::shared_ptr shef_file_handle); static Expected> parse_dynamic_contexts(const ProtoHEFCoreOpMock &core_op, - const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch); + const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch, std::shared_ptr shef_file_handle); static Expected parse_proto_nms_info(const ProtoHEFNmsInfo &proto_nms_info, const bool burst_mode_enabled, const ProtoHEFHwArch &hef_arch); static Expected get_boundary_layer_info(const ProtoHEFCoreOpMock &core_op, - const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, + const uint16_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch); - + static Expected get_partial_network_name_by_index(const ProtoHEFCoreOpMock &core_op, uint8_t network_index, const SupportedFeatures &supported_features); static std::string get_network_group_name(const ProtoHEFNetworkGroup &net_group, const SupportedFeatures &supported_features); @@ -500,12 +525,12 @@ class HefUtils final // TODO HRT-12051: Remove is_part_of_mux_layer parameter when core_hw_padding is removed static hailo_status fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBase &base_info, const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFNetworkGroupMetadata &network_group_proto, - bool transposed, const uint8_t context_index, const uint8_t network_index, LayerInfo &layer_info, + bool transposed, const uint16_t context_index, const uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch, const bool is_part_of_mux_layer); // TODO HRT-12051: Remove is_part_of_mux_layer parameter when core_hw_padding is removed static hailo_status fill_layer_info(const ProtoHEFEdgeLayerInfo &info, const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op, - hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, + hailo_stream_direction_t direction, const uint16_t context_index, const std::string &partial_network_name, uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch, const bool is_part_of_mux_layer); static hailo_status fill_fused_nms_info(const ProtoHEFEdgeLayerFused &info, @@ -513,12 +538,12 @@ class HefUtils final const bool burst_mode_enabled, const ProtoHEFHwArch &hef_arch); static hailo_status fill_mux_info(const ProtoHEFEdgeLayerMux &info, const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op, - hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, + hailo_stream_direction_t direction, const uint16_t context_index, const std::string &partial_network_name, uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch); static hailo_status fill_planes_info(const ProtoHEFEdgeLayerPlanes &info, const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op, - hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, + hailo_stream_direction_t direction, const uint16_t context_index, const std::string &partial_network_name, uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch); }; diff --git a/hailort/libhailort/src/hef/layer_info.hpp b/hailort/libhailort/src/hef/layer_info.hpp index 385e86ea..5e996be7 100644 --- a/hailort/libhailort/src/hef/layer_info.hpp +++ b/hailort/libhailort/src/hef/layer_info.hpp @@ -14,7 +14,7 @@ #include "hailo/hailort_common.hpp" #include "hailo/hailort_defaults.hpp" -#include "os/hailort_driver.hpp" +#include "vdma/driver/hailort_driver.hpp" #include "control_protocol.h" #include @@ -45,7 +45,7 @@ struct BufferIndices { }; struct ConnectedContextInfo { - uint8_t context_index; + uint16_t context_index; uint8_t dma_engine_index; uint8_t stream_index; }; @@ -67,7 +67,7 @@ struct LayerInfo { uint8_t network_index; CONTROL_PROTOCOL__nn_stream_config_t nn_stream_config; uint32_t max_shmifo_size; - uint8_t context_index; + uint16_t context_index; uint32_t pad_index = INVALID_PAD_INDEX; // Transformation and shape info diff --git a/hailort/libhailort/src/hw_consts.hpp b/hailort/libhailort/src/hw_consts.hpp index c576eed2..59dfe54b 100644 --- a/hailort/libhailort/src/hw_consts.hpp +++ b/hailort/libhailort/src/hw_consts.hpp @@ -19,12 +19,6 @@ // Max periph bytes per buffer for hailo1x because (we use its value shifted right by 3 - according to the spec) to // configure shmifo credit size - which in hailo15 only has a width of 10 bits #define HAILO1X_PERIPH_BYTES_PER_BUFFER_MAX_SIZE (0x00002000L) -#define HAILO1X_PERIPH_PAYLOAD_MAX_VALUE (0x007FFFFFL) - - -/** Vdma Channel registers ***************************************************/ -#define VDMA_CHANNEL_CONTROL_OFFSET (0x00) -#define VDMA_CHANNEL_NUM_AVAIL_OFFSET (0x02) - +#define HAILO1X_PERIPH_PAYLOAD_MAX_VALUE (0x01FFFFFFL) #endif /* _HAILO_HW_CONSTS_HPP_ */ diff --git a/hailort/libhailort/src/net_flow/CMakeLists.txt b/hailort/libhailort/src/net_flow/CMakeLists.txt index c49a12b3..8c9f7a65 100644 --- a/hailort/libhailort/src/net_flow/CMakeLists.txt +++ b/hailort/libhailort/src/net_flow/CMakeLists.txt @@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/ops/nms_post_process.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ops/yolov5_post_process.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ops/yolov5_bbox_only_post_process.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ops/yolox_post_process.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ops/ssd_post_process.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ops/argmax_post_process.cpp @@ -11,12 +12,19 @@ set(SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/ops/yolov8_post_process.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/pipeline.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/pipeline_builder.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/inference_pipeline.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/pipeline_internal.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/vstream.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/filter_elements.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/queue_elements.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/edge_elements.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/multi_io_elements.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/async_pipeline_builder.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/async_infer_runner.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/infer_model.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/vstream_builder.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/vstream.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/inference_pipeline.cpp ) set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE) diff --git a/hailort/libhailort/src/net_flow/ops/argmax_post_process.cpp b/hailort/libhailort/src/net_flow/ops/argmax_post_process.cpp index 3d753213..d5cdaf91 100644 --- a/hailort/libhailort/src/net_flow/ops/argmax_post_process.cpp +++ b/hailort/libhailort/src/net_flow/ops/argmax_post_process.cpp @@ -131,6 +131,38 @@ ArgmaxFunction ArgmaxPostProcessOp::m_argmax_function_array[ARGMAX_NUM_OF_POSSIB ArgmaxPostProcessOp::execute_not_supported, ArgmaxPostProcessOp::execute_not_supported } + }, + { + { + // F8CR x AUTO + // We don't support input_format_type to be auto + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported + }, + { + // F8CR x UINT8 + ArgmaxPostProcessOp::execute_not_supported, // We don't support output_format_type to be auto + ArgmaxPostProcessOp::F8CR_to_NHW_feature_axis, + ArgmaxPostProcessOp::F8CR_to_NHW_feature_axis, + ArgmaxPostProcessOp::F8CR_to_NHW_feature_axis, + }, + { + // F8CR x UINT16 + ArgmaxPostProcessOp::execute_not_supported, // We don't support output_format_type to be auto + ArgmaxPostProcessOp::F8CR_to_NHW_feature_axis, + ArgmaxPostProcessOp::F8CR_to_NHW_feature_axis, + ArgmaxPostProcessOp::F8CR_to_NHW_feature_axis, + }, + { + // F8CR x FLOAT32 + // We don't support input_format_type to be float32 + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported + } } }; @@ -155,6 +187,9 @@ hailo_status ArgmaxPostProcessOp::execute(const std::map @@ -27,39 +27,13 @@ namespace hailort namespace net_flow { -#define ARGMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS (3) +#define ARGMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS (4) #define ARGMAX_NUM_OF_POSSIBLE_FORMAT_TYPES (4) - -constexpr std::size_t ARGMAX_OUTPUT_FEATURES_SIZE {1}; -constexpr std::size_t ARGMAX_NUMBER_OF_SRCS {1}; -constexpr std::size_t ARGMAX_NUMBER_OF_DSTS {1}; +#define F8CR_FEATURES_IN_CHUNK (8) typedef hailo_status (*ArgmaxFunction)(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata, const std::map &inputs, std::map &outputs); - -class ArgmaxOpMetadata : public OpMetadata -{ -public: - static Expected> create(const std::unordered_map &inputs_metadata, - const std::unordered_map &outputs_metadata, - const std::string &network_name); - std::string get_op_description() override; - hailo_status validate_format_info() override; - static hailo_format_t expand_output_format_autos(const hailo_format_t &output_format, const hailo_format_t &input_format); - - virtual Expected get_output_vstream_info() override; - -private: - ArgmaxOpMetadata(const std::unordered_map &inputs_metadata, - const std::unordered_map &outputs_metadata, - const std::string &network_name) - : OpMetadata(inputs_metadata, outputs_metadata, "Argmax-Post-Process", network_name, OperationType::ARGMAX) - {} - - hailo_status validate_params() override; -}; - class ArgmaxPostProcessOp : public Op { @@ -148,6 +122,41 @@ class ArgmaxPostProcessOp : public Op return HAILO_SUCCESS; } + template + static hailo_status F8CR_to_NHW_feature_axis(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata, + const std::map &inputs, std::map &outputs) + { + auto src_ptr = (SrcType*)inputs.begin()->second.data(); + auto dst_ptr = (DstType*)outputs.begin()->second.data(); + const auto src_row_size = input_metadata.padded_shape.width * input_metadata.padded_shape.features; + const auto dst_row_size = output_metadata.shape.width; + const auto num_of_eight_channels_chunks = input_metadata.padded_shape.features / F8CR_FEATURES_IN_CHUNK; + const auto eight_channels_x_width_size = input_metadata.padded_shape.width * F8CR_FEATURES_IN_CHUNK; + + for (uint32_t r = 0; r < input_metadata.shape.height; r++) { + const SrcType *src_row = src_ptr + (r * src_row_size); + DstType *dst_row = dst_ptr + (r * dst_row_size); + for (uint32_t w = 0; w < input_metadata.shape.width; w++) { + const SrcType *offset_in_row = src_row + (w * F8CR_FEATURES_IN_CHUNK); + DstType max_index = 0; + auto max_value = *offset_in_row; + for (uint32_t channel_chunk_id = 0; channel_chunk_id < num_of_eight_channels_chunks; channel_chunk_id++) { + const SrcType *offset_in_column = offset_in_row + (eight_channels_x_width_size * channel_chunk_id); + uint32_t num_of_channels_in_chunk = ((channel_chunk_id + 1 == num_of_eight_channels_chunks) ? (input_metadata.shape.features % F8CR_FEATURES_IN_CHUNK) : F8CR_FEATURES_IN_CHUNK ); + for (uint32_t c = 0; c < num_of_channels_in_chunk; c++) { + const auto ¤t_value = *(offset_in_column + c); + if (current_value > max_value) { + max_index = static_cast(c + F8CR_FEATURES_IN_CHUNK * channel_chunk_id); + max_value = current_value; + } + } + } + dst_row[w] = max_index; + } + } + return HAILO_SUCCESS; + } + static hailo_status execute_not_supported(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata, const std::map &inputs, std::map &outputs); diff --git a/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp b/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp index 38bd2713..735f2b20 100644 --- a/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp +++ b/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp @@ -10,7 +10,6 @@ **/ #include "net_flow/ops/nms_post_process.hpp" -#include "hef/hef_internal.hpp" namespace hailort { @@ -193,7 +192,7 @@ hailo_status NmsPostProcessOp::hailo_nms_format(MemoryView dst_view) return HAILO_SUCCESS; } -hailo_format_t NmsOpMetadata::expand_output_format_autos_by_op_type(const hailo_format_t &output_format, OperationType type) +hailo_format_t NmsOpMetadata::expand_output_format_autos_by_op_type(const hailo_format_t &output_format, OperationType type, bool bbox_only) { auto format = output_format; @@ -201,6 +200,8 @@ hailo_format_t NmsOpMetadata::expand_output_format_autos_by_op_type(const hailo_ { if (OperationType::YOLOV5SEG == type) { format.order = HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK; + } else if (bbox_only) { + format.order = HAILO_FORMAT_ORDER_NHWC; } else { format.order = HAILO_FORMAT_ORDER_HAILO_NMS; } @@ -256,4 +257,4 @@ hailo_nms_info_t NmsOpMetadata::nms_info() } } -} \ No newline at end of file +} diff --git a/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp b/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp index ae2623b2..e37b2a9c 100644 --- a/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp +++ b/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp @@ -3,8 +3,8 @@ * Distributed under the MIT license (https://opensource.org/licenses/MIT) **/ /** - * @file op.hpp - * @brief Net-Flow op + * @file nms_post_process.hpp + * @brief NMS op * * https://learnopencv.com/object-detection-using-yolov5-and-opencv-dnn-in-c-and-python : * The headline '4.3.5 POST-PROCESSING YOLOv5 Prediction Output' contains explanations on the YOLOv5 post-process. @@ -22,6 +22,7 @@ #include "common/logger_macros.hpp" #include "net_flow/ops/op.hpp" +#include "net_flow/ops_metadata/nms_op_metadata.hpp" namespace hailort @@ -95,70 +96,6 @@ inline bool operator==(const DetectionBbox &first, const DetectionBbox &second) return first.m_class_id == second.m_class_id && first.m_bbox == second.m_bbox; } -struct NmsPostProcessConfig -{ - // User given confidence threshold for a bbox. A bbox will be consider as detection if the - // (objectness * class_score) is higher then the confidence_threshold. - double nms_score_th = 0; - - // User given IoU threshold (intersection over union). This threshold is for performing - // Non-maximum suppression (Removing overlapping boxes). - double nms_iou_th = 0; - - // Maximum amount of bboxes per nms class. - uint32_t max_proposals_per_class = 0; - - // The model's number of classes. (This depends on the dataset that the model trained on). - uint32_t number_of_classes = 0; - - // Toggle background class removal from results - bool background_removal = false; - - // Index of background class for background removal - uint32_t background_removal_index = 0; - - // Indicates whether or not NMS performs IoU over different classes for the same box. - // If set to false - NMS won't intersect different classes, and a box could have multiple labels. - bool cross_classes = false; -}; - -static const float32_t REMOVED_CLASS_SCORE = 0.0f; - -class NmsOpMetadata : public OpMetadata -{ -public: - static Expected> create(const std::unordered_map &inputs_metadata, - const std::unordered_map &outputs_metadata, - const NmsPostProcessConfig &nms_post_process_config, - const std::string &network_name, - const OperationType type, - const std::string &name); - virtual ~NmsOpMetadata() = default; - std::string get_nms_config_description(); - hailo_status validate_format_info() override; - NmsPostProcessConfig &nms_config() { return m_nms_config;}; - hailo_nms_info_t nms_info(); - std::string get_op_description() override; - static hailo_format_t expand_output_format_autos_by_op_type(const hailo_format_t &output_format, OperationType type); - - virtual Expected get_output_vstream_info() override; - -protected: - NmsOpMetadata(const std::unordered_map &inputs_metadata, - const std::unordered_map &outputs_metadata, - const NmsPostProcessConfig &nms_post_process_config, - const std::string &name, - const std::string &network_name, - const OperationType type) - : OpMetadata(inputs_metadata, outputs_metadata, name, network_name, type), - m_nms_config(nms_post_process_config) - {} - - hailo_status validate_params() override; - -private: - NmsPostProcessConfig m_nms_config; -}; class NmsPostProcessOp : public Op { diff --git a/hailort/libhailort/src/net_flow/ops/op.hpp b/hailort/libhailort/src/net_flow/ops/op.hpp index 2d94e7a7..0b958088 100644 --- a/hailort/libhailort/src/net_flow/ops/op.hpp +++ b/hailort/libhailort/src/net_flow/ops/op.hpp @@ -16,7 +16,7 @@ #include "hailo/hailort.h" #include "hailo/buffer.hpp" #include "hailo/network_group.hpp" -#include "net_flow/ops/op_metadata.hpp" +#include "net_flow/ops_metadata/op_metadata.hpp" #include "common/utils.hpp" #include "common/logger_macros.hpp" @@ -73,4 +73,4 @@ class Op } } -#endif // _HAILO_NET_FLOW_OP_HPP_ \ No newline at end of file +#endif // _HAILO_NET_FLOW_OP_HPP_ diff --git a/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp b/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp index 8e4e3411..a94a54ec 100644 --- a/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp +++ b/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp @@ -15,13 +15,11 @@ #include "hailo/hailort.h" #include "net_flow/ops/op.hpp" -#include "net_flow/ops/op_metadata.hpp" +#include "net_flow/ops_metadata/softmax_op_metadata.hpp" #include "common/utils.hpp" #include "hailo/quantization.hpp" -#include - namespace hailort { namespace net_flow @@ -30,34 +28,9 @@ namespace net_flow #define SOFTMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS (2) // NHWC, NC #define SOFTMAX_NUM_OF_POSSIBLE_FORMAT_TYPES (4) // Auto, UINT8, UINT16, FLOAT32 -constexpr std::size_t SOFTMAX_NUMBER_OF_SRCS {1}; -constexpr std::size_t SOFTMAX_NUMBER_OF_DSTS {1}; - typedef hailo_status (*SoftmaxFunction)(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata, const std::map &inputs, std::map &outputs); -class SoftmaxOpMetadata : public OpMetadata -{ -public: - static Expected> create(const std::unordered_map &inputs_metadata, - const std::unordered_map &outputs_metadata, - const std::string &network_name); - std::string get_op_description() override; - hailo_status validate_format_info() override; - static hailo_format_t expand_output_format_autos(const hailo_format_t &output_format, const hailo_format_t &input_format); - - virtual Expected get_output_vstream_info() override; - -private: - SoftmaxOpMetadata(const std::unordered_map &inputs_metadata, - const std::unordered_map &outputs_metadata, - const std::string &network_name) - : OpMetadata(inputs_metadata, outputs_metadata, "Softmax-Post-Process", network_name, OperationType::SOFTMAX) - {} - - hailo_status validate_params() override; -}; - class SoftmaxPostProcessOp : public Op { diff --git a/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp b/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp index 1d1f1309..82bb735f 100644 --- a/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp +++ b/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp @@ -39,6 +39,8 @@ hailo_status SSDOpMetadata::validate_params() return status; } + CHECK(!nms_config().bbox_only, HAILO_INVALID_ARGUMENT, "SSDPostProcessOp: bbox_only is not supported for SSD model"); + // Validate each anchor is mapped by reg and cls inputs for (const auto ®_to_cls_name : m_ssd_config.reg_to_cls_inputs) { CHECK(m_ssd_config.anchors.count(reg_to_cls_name.first), HAILO_INVALID_ARGUMENT, @@ -90,9 +92,9 @@ hailo_status SSDOpMetadata::validate_format_info() std::string SSDOpMetadata::get_op_description() { auto nms_config_info = get_nms_config_description(); - auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:.2f}, Image width: {:.2f}, Centers scales factor: {}, " + auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:d}, Image width: {:d}, Centers scales factor: {}, " "Bbox dimension scale factor: {}, Normalize boxes: {}", OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info, - m_ssd_config.image_height, m_ssd_config.image_width, m_ssd_config.centers_scale_factor, m_ssd_config.bbox_dimensions_scale_factor, + static_cast(m_ssd_config.image_height), static_cast(m_ssd_config.image_width), m_ssd_config.centers_scale_factor, m_ssd_config.bbox_dimensions_scale_factor, m_ssd_config.normalize_boxes); return config_info; } @@ -233,4 +235,4 @@ hailo_status SSDPostProcessOp::extract_detections(const std::string ®_input_n } } -} \ No newline at end of file +} diff --git a/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp b/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp index ba1331e2..25d6077e 100644 --- a/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp +++ b/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp @@ -13,69 +13,13 @@ #define _HAILO_SSD_POST_PROCESS_HPP_ #include "net_flow/ops/nms_post_process.hpp" -#include "net_flow/ops/op_metadata.hpp" +#include "net_flow/ops_metadata/ssd_op_metadata.hpp" namespace hailort { namespace net_flow { -struct SSDPostProcessConfig -{ - // The image height. - float32_t image_height = 0; - - // The image width. - float32_t image_width = 0; - - uint32_t centers_scale_factor = 0; - - uint32_t bbox_dimensions_scale_factor = 0; - - uint32_t ty_index = 0; - uint32_t tx_index = 0; - uint32_t th_index = 0; - uint32_t tw_index = 0; - - std::map reg_to_cls_inputs; - - // A vector of anchors, each element in the vector represents the anchors for a specific layer - // Each layer anchors vector is structured as {w,h} pairs. - // Each anchor is mapped by 2 keys: - // 1. reg input - // 2. cls input - std::map> anchors; - - // Indicates whether boxes should be normalized (and clipped) - bool normalize_boxes = false; -}; - -class SSDOpMetadata : public NmsOpMetadata -{ -public: - static Expected> create(const std::unordered_map &inputs_metadata, - const std::unordered_map &outputs_metadata, - const NmsPostProcessConfig &nms_post_process_config, - const SSDPostProcessConfig &ssd_post_process_config, - const std::string &network_name); - std::string get_op_description() override; - hailo_status validate_format_info() override; - SSDPostProcessConfig &ssd_config() { return m_ssd_config;}; - -private: - SSDPostProcessConfig m_ssd_config; - SSDOpMetadata(const std::unordered_map &inputs_metadata, - const std::unordered_map &outputs_metadata, - const NmsPostProcessConfig &nms_post_process_config, - const SSDPostProcessConfig &ssd_post_process_config, - const std::string &network_name) - : NmsOpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "SSD-Post-Process", network_name, OperationType::SSD) - , m_ssd_config(ssd_post_process_config) - {} - - hailo_status validate_params() override; -}; - class SSDPostProcessOp : public NmsPostProcessOp { diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.cpp new file mode 100644 index 00000000..d03dc185 --- /dev/null +++ b/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.cpp @@ -0,0 +1,130 @@ +/** + * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file yolov5_bbox_only_post_process.cpp + * @brief YOLOv5 bbox only post process + * + **/ + +#include "net_flow/ops/yolov5_bbox_only_post_process.hpp" + +namespace hailort +{ +namespace net_flow +{ + +Expected> YOLOv5BboxOnlyPostProcessOp::create(std::shared_ptr metadata) +{ + auto status = metadata->validate_format_info(); + CHECK_SUCCESS_AS_EXPECTED(status); + + auto op = std::shared_ptr(new (std::nothrow) YOLOv5BboxOnlyPostProcessOp(metadata)); + CHECK_AS_EXPECTED(op != nullptr, HAILO_OUT_OF_HOST_MEMORY); + + return std::shared_ptr(std::move(op)); +} + +Expected Yolov5BboxOnlyOpMetadata::get_output_vstream_info() +{ + auto vstream_info = NmsOpMetadata::get_output_vstream_info(); + CHECK_EXPECTED(vstream_info); + + vstream_info->shape = m_outputs_metadata.begin()->second.shape; + return vstream_info.release(); +} + +hailo_status Yolov5BboxOnlyOpMetadata::validate_format_info() +{ + for (const auto& output_metadata : m_outputs_metadata) { + + CHECK(HAILO_FORMAT_TYPE_FLOAT32 == output_metadata.second.format.type, HAILO_INVALID_ARGUMENT, "The given output format type {} is not supported, " + "should be HAILO_FORMAT_TYPE_FLOAT32", HailoRTCommon::get_format_type_str(output_metadata.second.format.type)); + + CHECK(HAILO_FORMAT_ORDER_NHWC == output_metadata.second.format.order, HAILO_INVALID_ARGUMENT, "The given output format order {} is not supported, " + "should be HAILO_FORMAT_ORDER_NHWC", HailoRTCommon::get_format_order_str(output_metadata.second.format.order)); + + CHECK(!(HAILO_FORMAT_FLAGS_TRANSPOSED & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as transposed, which is not supported for this model.", + output_metadata.first); + CHECK(!(HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as argmax, which is not supported for this model.", + output_metadata.first); + } + + assert(1 <= m_inputs_metadata.size()); + const hailo_format_type_t& first_input_type = m_inputs_metadata.begin()->second.format.type; + for (const auto& input_metadata : m_inputs_metadata) { + CHECK(HAILO_FORMAT_ORDER_NHCW == input_metadata.second.format.order, HAILO_INVALID_ARGUMENT, "The given input format order {} is not supported, " + "should be HAILO_FORMAT_ORDER_NHCW", HailoRTCommon::get_format_order_str(input_metadata.second.format.order)); + + CHECK((HAILO_FORMAT_TYPE_UINT8 == input_metadata.second.format.type) || + (HAILO_FORMAT_TYPE_UINT16 == input_metadata.second.format.type), + HAILO_INVALID_ARGUMENT, "The given input format type {} is not supported, should be HAILO_FORMAT_TYPE_UINT8 or HAILO_FORMAT_TYPE_UINT16", + HailoRTCommon::get_format_type_str(input_metadata.second.format.type)); + + CHECK(input_metadata.second.format.type == first_input_type, HAILO_INVALID_ARGUMENT,"All inputs format type should be the same"); + } + + return HAILO_SUCCESS; +} + +std::string Yolov5BboxOnlyOpMetadata::get_op_description() +{ + auto nms_config_info = fmt::format("Classes: {}", + nms_config().number_of_classes); + auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:d}, Image width: {:d}", + OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info, static_cast(m_yolov5_config.image_height), static_cast(m_yolov5_config.image_width)); + return config_info; +} + + +Expected> Yolov5BboxOnlyOpMetadata::create(const std::unordered_map &inputs_metadata, + const std::unordered_map &outputs_metadata, + const NmsPostProcessConfig &nms_post_process_config, + const YoloPostProcessConfig &yolov5_post_process_config, + const std::string &network_name) +{ + auto op_metadata = std::shared_ptr(new (std::nothrow) Yolov5BboxOnlyOpMetadata(inputs_metadata, outputs_metadata, + nms_post_process_config, yolov5_post_process_config, network_name)); + CHECK_AS_EXPECTED(op_metadata != nullptr, HAILO_OUT_OF_HOST_MEMORY); + + auto status = op_metadata->validate_params(); + CHECK_SUCCESS_AS_EXPECTED(status); + + return std::shared_ptr(std::move(op_metadata)); +} + +hailo_status YOLOv5BboxOnlyPostProcessOp::execute(const std::map &inputs, std::map &outputs) +{ + const auto &inputs_metadata = m_metadata->inputs_metadata(); + const auto &yolo_config = m_metadata->yolov5_config(); + CHECK(inputs.size() == yolo_config.anchors.size(), HAILO_INVALID_ARGUMENT, + "Anchors vector count must be equal to data vector count. Anchors size is {}, data size is {}", + yolo_config.anchors.size(), inputs.size()); + + auto dst_ptr = (float32_t*)outputs.begin()->second.data(); + + size_t next_bbox_output_offset = YOLOV5_BBOX_ONLY_BBOXES_INDEX; + + for (const auto &name_to_input : inputs) { + hailo_status status = HAILO_UNINITIALIZED; + auto &name = name_to_input.first; + assert(contains(inputs_metadata, name)); + auto &input_metadata = inputs_metadata.at(name); + assert(contains(yolo_config.anchors, name)); + if (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT8) { + status = add_bboxes(dst_ptr, next_bbox_output_offset, name_to_input.second, + input_metadata.quant_info, input_metadata.shape, input_metadata.padded_shape, yolo_config.anchors.at(name)); + } else if (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT16) { + status = add_bboxes(dst_ptr, next_bbox_output_offset, name_to_input.second, + input_metadata.quant_info, input_metadata.shape, input_metadata.padded_shape, yolo_config.anchors.at(name)); + } else { + CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", input_metadata.format.type); + } + CHECK_SUCCESS(status); + } + return HAILO_SUCCESS; +} + +} // namespace net_flow +} // namespace hailort \ No newline at end of file diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.hpp new file mode 100644 index 00000000..282ff34a --- /dev/null +++ b/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.hpp @@ -0,0 +1,101 @@ +/** + * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file yolov5_bbox_only_post_process.hpp + * @brief YOLOV5 bbox only post process + * Output format of yolov5_bbox_only is NHWC - [1, total_proposals, 5 + number_of_classes] + * The bboxes entry in the output of yolov5_bbox_only is a list of bboxes, such that each of them looks like this: + * (y_min, x_min, y_max, x_max, objectness, score_per_class) + * + **/ + +#ifndef _HAILO_YOLOV5_BBOX_ONLY_POST_PROCESS_HPP_ +#define _HAILO_YOLOV5_BBOX_ONLY_POST_PROCESS_HPP_ + +#include "net_flow/ops/yolov5_post_process.hpp" +#include "net_flow/ops_metadata/yolov5_bbox_only_op_metadata.hpp" + +namespace hailort +{ + +static const uint32_t YOLOV5_BBOX_NUM_OF_VALUES = 5; +namespace net_flow +{ + +class YOLOv5BboxOnlyPostProcessOp : public YOLOv5PostProcessOp +{ +public: + static Expected> create(std::shared_ptr metadata); + + hailo_status execute(const std::map &inputs, std::map &outputs) override; + +private: + + YOLOv5BboxOnlyPostProcessOp(std::shared_ptr metadata) : + YOLOv5PostProcessOp(static_cast>(metadata)) + {} + + static const uint32_t YOLOV5_BBOX_ONLY_BBOXES_INDEX = 0; + + template + void add_classes_scores(hailo_quant_info_t &quant_info, DstType* dst_data, size_t &next_bbox_output_offset, + SrcType* src_data, uint32_t entry_idx, uint32_t class_start_idx, uint32_t padded_width) + { + const auto &nms_config = m_metadata->nms_config(); + + for (uint32_t class_index = 0; class_index < nms_config.number_of_classes; class_index++) { + auto class_entry_idx = entry_idx + ((class_start_idx + class_index) * padded_width); + auto class_confidence = dequantize_and_sigmoid( + src_data[class_entry_idx], quant_info); + dst_data[next_bbox_output_offset++] = class_confidence; + } + } + + template + hailo_status add_bboxes(DstType *dst_ptr, size_t &next_bbox_output_offset, + const MemoryView &input_buffer, hailo_quant_info_t quant_info, hailo_3d_image_shape_t shape, + hailo_3d_image_shape_t padded_shape, const std::vector &layer_anchors) + { + const uint32_t X_OFFSET = X_INDEX * padded_shape.width; + const uint32_t Y_OFFSET = Y_INDEX * padded_shape.width; + const uint32_t W_OFFSET = W_INDEX * padded_shape.width; + const uint32_t H_OFFSET = H_INDEX * padded_shape.width; + const uint32_t OBJECTNESS_OFFSET = OBJECTNESS_INDEX * padded_shape.width; + + auto num_of_anchors = get_num_of_anchors(layer_anchors); + + uint32_t entry_size = get_entry_size(); + auto number_of_entries = padded_shape.height * padded_shape.width * num_of_anchors; + + auto buffer_size = number_of_entries * entry_size * sizeof(SrcType); + CHECK(buffer_size == input_buffer.size(), HAILO_INVALID_ARGUMENT, + "Failed to extract proposals, buffer_size should be {}, but is {}", buffer_size, input_buffer.size()); + + auto input_row_size = padded_shape.width * padded_shape.features; + SrcType *input_data = (SrcType*)input_buffer.data(); + for (uint32_t row = 0; row < shape.height; row++) { + for (uint32_t col = 0; col < shape.width; col++) { + for (uint32_t anchor = 0; anchor < num_of_anchors; anchor++) { + auto entry_idx = (input_row_size * row) + col + ((anchor * entry_size) * padded_shape.width); + auto objectness = dequantize_and_sigmoid(input_data[entry_idx + OBJECTNESS_OFFSET], quant_info); + auto bbox = decode_bbox(input_data, entry_idx, X_OFFSET, Y_OFFSET, W_OFFSET, H_OFFSET, + quant_info, anchor, layer_anchors, col, row, shape); + memcpy(&dst_ptr[next_bbox_output_offset], &bbox, sizeof(hailo_bbox_float32_t) - sizeof(DstType)); // copy y_min, x_min, y_max, x_max + next_bbox_output_offset += (sizeof(hailo_bbox_float32_t) / sizeof(float32_t)) - 1; + dst_ptr[next_bbox_output_offset++] = objectness; + + add_classes_scores(quant_info, dst_ptr, next_bbox_output_offset, input_data, entry_idx, + CLASSES_START_INDEX, padded_shape.width); + } + } + } + return HAILO_SUCCESS; + } +}; + +} /* namespace net_flow */ +} /* namespace hailort */ + +#endif /* _HAILO_YOLOV5_BBOX_ONLY_POST_PROCESS_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp index 0e08b419..70151993 100644 --- a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp +++ b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp @@ -43,11 +43,16 @@ hailo_status Yolov5OpMetadata::validate_format_info() return NmsOpMetadata::validate_format_info(); } +Expected Yolov5OpMetadata::get_output_vstream_info() +{ + return NmsOpMetadata::get_output_vstream_info(); +} + std::string Yolov5OpMetadata::get_op_description() { auto nms_config_info = get_nms_config_description(); - auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:.2f}, Image width: {:.2f}", - OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info, m_yolov5_config.image_height, m_yolov5_config.image_width); + auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:d}, Image width: {:d}", + OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info, static_cast(m_yolov5_config.image_height), static_cast(m_yolov5_config.image_width)); return config_info; } @@ -72,7 +77,7 @@ hailo_status YOLOv5PostProcessOp::execute(const std::mapnms_config().number_of_classes); } +size_t YOLOv5PostProcessOp::get_num_of_anchors(const std::vector &layer_anchors) +{ + // Each layer anchors vector is structured as {w,h} pairs. + // For example, if we have a vector of size 6 (default YOLOv5 vector) then we have 3 anchors for this layer. + assert(layer_anchors.size() % 2 == 0); + size_t num_of_anchors = (layer_anchors.size() / 2); + return num_of_anchors; +} + } // namespace net_flow } // namespace hailort diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.hpp index 903a1da1..15c9b6b7 100644 --- a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.hpp +++ b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.hpp @@ -14,7 +14,7 @@ #define _HAILO_YOLO_POST_PROCESS_HPP_ #include "net_flow/ops/nms_post_process.hpp" -#include "net_flow/ops/yolov5_op_metadata.hpp" +#include "net_flow/ops_metadata/yolov5_op_metadata.hpp" namespace hailort { @@ -29,6 +29,7 @@ class YOLOv5PostProcessOp : public NmsPostProcessOp static Expected> create(std::shared_ptr metadata); hailo_status execute(const std::map &inputs, std::map &outputs) override; + static size_t get_num_of_anchors(const std::vector &layer_anchors); protected: hailo_bbox_float32_t decode(float32_t tx, float32_t ty, float32_t tw, float32_t th, @@ -48,7 +49,18 @@ class YOLOv5PostProcessOp : public NmsPostProcessOp static const uint32_t OBJECTNESS_INDEX = 4; static const uint32_t CLASSES_START_INDEX = 5; - + template + hailo_bbox_float32_t decode_bbox(SrcType* data, uint32_t entry_idx, const uint32_t X_OFFSET, const uint32_t Y_OFFSET, + const uint32_t W_OFFSET, const uint32_t H_OFFSET, hailo_quant_info_t quant_info, uint32_t anchor, + const std::vector &layer_anchors, uint32_t col, uint32_t row, hailo_3d_image_shape_t shape) + { + auto tx = dequantize_and_sigmoid(data[entry_idx + X_OFFSET], quant_info); + auto ty = dequantize_and_sigmoid(data[entry_idx + Y_OFFSET], quant_info); + auto tw = dequantize_and_sigmoid(data[entry_idx + W_OFFSET], quant_info); + auto th = dequantize_and_sigmoid(data[entry_idx + H_OFFSET], quant_info); + return decode(tx, ty, tw, th, layer_anchors[anchor * 2], layer_anchors[anchor * 2 + 1], col, row, + shape.width, shape.height); + } template void check_threshold_and_add_detection(hailo_bbox_float32_t bbox, hailo_quant_info_t &quant_info, @@ -126,10 +138,7 @@ class YOLOv5PostProcessOp : public NmsPostProcessOp const auto &nms_config = m_metadata->nms_config(); - // Each layer anchors vector is structured as {w,h} pairs. - // For example, if we have a vector of size 6 (default YOLOv5 vector) then we have 3 anchors for this layer. - assert(layer_anchors.size() % 2 == 0); - const size_t num_of_anchors = (layer_anchors.size() / 2); + auto num_of_anchors = get_num_of_anchors(layer_anchors); uint32_t entry_size = get_entry_size(); auto number_of_entries = padded_shape.height * padded_shape.width * num_of_anchors; @@ -149,12 +158,8 @@ class YOLOv5PostProcessOp : public NmsPostProcessOp continue; } - auto tx = dequantize_and_sigmoid(data[entry_idx + X_OFFSET], quant_info); - auto ty = dequantize_and_sigmoid(data[entry_idx + Y_OFFSET], quant_info); - auto tw = dequantize_and_sigmoid(data[entry_idx + W_OFFSET], quant_info); - auto th = dequantize_and_sigmoid(data[entry_idx + H_OFFSET], quant_info); - auto bbox = decode(tx, ty, tw, th, layer_anchors[anchor * 2], layer_anchors[anchor * 2 + 1], col, row, - shape.width, shape.height); + auto bbox = decode_bbox(data, entry_idx, X_OFFSET, Y_OFFSET, W_OFFSET, H_OFFSET, + quant_info, anchor, layer_anchors, col, row, shape); decode_classes_scores(bbox, quant_info, data, entry_idx, CLASSES_START_INDEX, objectness, padded_shape.width); @@ -164,12 +169,12 @@ class YOLOv5PostProcessOp : public NmsPostProcessOp return HAILO_SUCCESS; } -private: - std::shared_ptr m_metadata; + + std::shared_ptr m_metadata; }; -} // namespace net_flow -} // namespace hailort +} /* namespace net_flow */ +} /* namespace hailort */ -#endif // _HAILO_YOLO_POST_PROCESS_HPP_ +#endif /* _HAILO_YOLO_POST_PROCESS_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp index 539a9856..e89082b7 100644 --- a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp +++ b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp @@ -16,9 +16,11 @@ #else #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wunused-parameter" #endif #define STB_IMAGE_RESIZE_IMPLEMENTATION #include "stb_image_resize.h" +#include #if defined(_MSC_VER) #pragma warning(pop) #else @@ -30,6 +32,9 @@ namespace hailort namespace net_flow { +constexpr uint32_t VECTOR_DIM = 1; +using Eigen_Vector32f = Eigen::Matrix; + Expected> Yolov5SegOpMetadata::create(const std::unordered_map &inputs_metadata, const std::unordered_map &outputs_metadata, const NmsPostProcessConfig &nms_post_process_config, const YoloPostProcessConfig &yolo_config, const YoloV5SegPostProcessConfig &yolo_seg_config, @@ -45,6 +50,13 @@ Expected> Yolov5SegOpMetadata::create(const std::uno return std::shared_ptr(std::move(op_metadata)); } +hailo_status Yolov5SegOpMetadata::validate_params() +{ + CHECK(!nms_config().bbox_only, HAILO_INVALID_ARGUMENT, "YOLOv5SegPostProcessOp: bbox_only is not supported for YOLOv5Seg model"); + + return Yolov5OpMetadata::validate_params(); +} + hailo_status Yolov5SegOpMetadata::validate_format_info() { for (const auto& output_metadata : m_outputs_metadata) { @@ -90,7 +102,7 @@ Expected Yolov5SegOpMetadata::get_output_vstream_info() auto vstream_info = NmsOpMetadata::get_output_vstream_info(); CHECK_EXPECTED(vstream_info); - vstream_info->nms_shape.max_mask_size = static_cast(yolov5_config().image_height * yolov5_config().image_width); + vstream_info->nms_shape.max_accumulated_mask_size = m_yolo_seg_config.max_accumulated_mask_size; return vstream_info.release(); } @@ -105,8 +117,6 @@ Expected> Yolov5SegPostProcess::create(std::shared_ptr> Yolov5SegPostProcess::create(std::shared_ptr(new (std::nothrow) Yolov5SegPostProcess(std::move(metadata), - mask_mult_result_buffer.release(), resized_buffer.release(), transformed_proto_buffer.release(), dequantized_proto_buffer.release())); + mask_mult_result_buffer.release(), resized_buffer.release(), transformed_proto_buffer.release())); CHECK_NOT_NULL_AS_EXPECTED(op, HAILO_OUT_OF_HOST_MEMORY); return std::shared_ptr(std::move(op)); } Yolov5SegPostProcess::Yolov5SegPostProcess(std::shared_ptr metadata, - Buffer &&mask_mult_result_buffer, Buffer &&resized_mask, Buffer &&transformed_proto_buffer, Buffer &&dequantized_proto_buffer) + Buffer &&mask_mult_result_buffer, Buffer &&resized_mask, Buffer &&transformed_proto_buffer) : YOLOv5PostProcessOp(static_cast>(metadata)), m_metadata(metadata), m_mask_mult_result_buffer(std::move(mask_mult_result_buffer)), m_resized_mask_to_image_dim(std::move(resized_mask)), - m_transformed_proto_buffer(std::move(transformed_proto_buffer)), - m_dequantized_proto_buffer(std::move(dequantized_proto_buffer)) + m_transformed_proto_buffer(std::move(transformed_proto_buffer)) {} hailo_status Yolov5SegPostProcess::execute(const std::map &inputs, std::map &outputs) @@ -138,7 +147,7 @@ hailo_status Yolov5SegPostProcess::execute(const std::map> proto_layer( + (float32_t*)m_transformed_proto_buffer.data(), MASK_COEFFICIENT_SIZE, proto_mat_cols); + + Eigen_Vector32f coefficients(detection.m_coefficients.data()); + auto mult_result = (coefficients.transpose() * proto_layer); + + Eigen::Map> result( + (float32_t*)m_mask_mult_result_buffer.data(), VECTOR_DIM, proto_mat_cols); + result = 1.0f / (1.0f + (-1*mult_result).array().exp()); } hailo_status Yolov5SegPostProcess::crop_and_copy_mask(const DetectionBbox &detection, MemoryView &buffer, uint32_t buffer_offset) @@ -210,10 +219,10 @@ hailo_status Yolov5SegPostProcess::crop_and_copy_mask(const DetectionBbox &detec static_cast(yolov5_config.image_height), 0, 1, STBIR_ALPHA_CHANNEL_NONE, 0, STBIR_EDGE_CLAMP, STBIR_FILTER_TRIANGLE, STBIR_COLORSPACE_LINEAR, NULL); - auto x_min = static_cast(std::ceil(detection.m_bbox.x_min * yolov5_config.image_width)); - auto x_max = static_cast(std::ceil(detection.m_bbox.x_max * yolov5_config.image_width)); - auto y_min = static_cast(std::ceil(detection.m_bbox.y_min * yolov5_config.image_height)); - auto y_max = static_cast(std::ceil(detection.m_bbox.y_max * yolov5_config.image_height)); + auto x_min = static_cast(MAX(std::ceil(detection.m_bbox.x_min * yolov5_config.image_width), 0.0f)); + auto x_max = static_cast(MIN(std::ceil(detection.m_bbox.x_max * yolov5_config.image_width), yolov5_config.image_width)); + auto y_min = static_cast(MAX(std::ceil(detection.m_bbox.y_min * yolov5_config.image_height), 0.0f)); + auto y_max = static_cast(MIN(std::ceil(detection.m_bbox.y_max * yolov5_config.image_height), yolov5_config.image_height)); auto box_width = detection.get_bbox_width(yolov5_config.image_width); uint8_t *dst_mask = (uint8_t*)(buffer.data() + buffer_offset); @@ -245,22 +254,26 @@ hailo_status Yolov5SegPostProcess::calc_and_copy_mask(const DetectionBbox &detec Expected Yolov5SegPostProcess::copy_detection_to_result_buffer(MemoryView &buffer, DetectionBbox &detection, uint32_t buffer_offset) { - uint32_t copied_bytes_amount = 0; + uint32_t detection_size = sizeof(detection.m_bbox_with_mask); + uint32_t mask_size = static_cast(detection.m_bbox_with_mask.mask_size); + CHECK((buffer_offset + detection_size + mask_size) < buffer.size(), HAILO_INSUFFICIENT_BUFFER, + "The given buffer is too small to contain all detections." \ + " The output buffer will contain the highest scored detections that could be filled." \ + " One can use `set_nms_max_accumulated_mask_size` to change the output buffer size."); // Copy bbox - uint32_t size_to_copy = sizeof(detection.m_bbox_with_mask); - assert((buffer_offset + size_to_copy) <= buffer.size()); - detection.m_bbox_with_mask.mask = (buffer.data() + buffer_offset + size_to_copy); + uint32_t copied_bytes_amount = 0; + detection.m_bbox_with_mask.mask = (buffer.data() + buffer_offset + detection_size); *(hailo_detection_with_byte_mask_t*)(buffer.data() + buffer_offset) = *(hailo_detection_with_byte_mask_t*)&(detection.m_bbox_with_mask); - buffer_offset += size_to_copy; - copied_bytes_amount += size_to_copy; + buffer_offset += detection_size; + copied_bytes_amount += detection_size; // Calc and copy mask auto status = calc_and_copy_mask(detection, buffer, buffer_offset); CHECK_SUCCESS_AS_EXPECTED(status); - copied_bytes_amount += static_cast(detection.m_bbox_with_mask.mask_size); + copied_bytes_amount += mask_size; m_classes_detections_count[detection.m_class_id]--; return copied_bytes_amount; @@ -268,6 +281,7 @@ Expected Yolov5SegPostProcess::copy_detection_to_result_buffer(MemoryV hailo_status Yolov5SegPostProcess::fill_nms_with_byte_mask_format(MemoryView &buffer) { + auto status = HAILO_SUCCESS; const auto &nms_config = m_metadata->nms_config(); uint32_t ignored_detections_count = 0; uint16_t detections_count = 0; @@ -292,6 +306,10 @@ hailo_status Yolov5SegPostProcess::fill_nms_with_byte_mask_format(MemoryView &bu } auto copied_bytes_amount = copy_detection_to_result_buffer(buffer, detection, buffer_offset); + if (HAILO_INSUFFICIENT_BUFFER == copied_bytes_amount.status()) { + status = copied_bytes_amount.status(); + break; + } CHECK_EXPECTED_AS_STATUS(copied_bytes_amount); buffer_offset += copied_bytes_amount.release(); detections_count++; @@ -305,7 +323,7 @@ hailo_status Yolov5SegPostProcess::fill_nms_with_byte_mask_format(MemoryView &bu ignored_detections_count, nms_config.max_proposals_per_class); } - return HAILO_SUCCESS; + return status; } } /* namespace net_flow */ diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.hpp index ae3a7b2a..94816fbd 100644 --- a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.hpp +++ b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.hpp @@ -13,7 +13,7 @@ #include "hailo/hailort.h" #include "net_flow/ops/yolov5_post_process.hpp" #include "transform/transform_internal.hpp" -#include "net_flow/ops/yolov5_seg_op_metadata.hpp" +#include "net_flow/ops_metadata/yolov5_seg_op_metadata.hpp" namespace hailort { @@ -45,23 +45,36 @@ class Yolov5SegPostProcess : public YOLOv5PostProcessOp return m_metadata->inputs_metadata().at(m_metadata->yolov5seg_config().proto_layer_name).shape; }; + template + static void transform__d2h_NHCW_to_NCHW_with_dequantize(SrcType *src_ptr, hailo_3d_image_shape_t shape, + DstType *dst_ptr, hailo_quant_info_t quant_info) + { + assert(nullptr != src_ptr); + assert(nullptr != dst_ptr); + + uint32_t width_size = shape.width; + for (uint32_t r = 0; r < shape.height; r++) { + for (uint32_t c = 0; c < shape.features; c++) { + SrcType *src = src_ptr + shape.features * shape.width * r + shape.width * c; + DstType *dst = dst_ptr + shape.width * shape.height * c + shape.width * r; + Quantization::dequantize_output_buffer(src, dst, width_size, quant_info); + } + } + } + // Transform proto layer - To multiply between the box mask coefficients (of shape (1, 32)), in the proto layer, // we change the proto layer shape to be (features=32, height * width) template void transform_proto_layer(SrcType *src_buffer, const hailo_quant_info_t &quant_info) { hailo_3d_image_shape_t shape = get_proto_layer_shape(); - - // TODO: HRT-11734 Improve performance - Make both funcs in one run? - Quantization::dequantize_output_buffer(src_buffer, (float32_t*)m_dequantized_proto_buffer.data(), - HailoRTCommon::get_shape_size(shape), quant_info); - TransformContextUtils::transform__d2h_NHCW_to_NCHW((float32_t*)m_dequantized_proto_buffer.data(), &shape, - (float32_t*)m_transformed_proto_buffer.data(), &shape); + transform__d2h_NHCW_to_NCHW_with_dequantize(src_buffer, shape, + (float32_t*)m_transformed_proto_buffer.data(), quant_info); } private: Yolov5SegPostProcess(std::shared_ptr metadata, Buffer &&mask_mult_result_buffer, - Buffer &&resized_mask, Buffer &&transformed_proto_buffer, Buffer &&dequantized_proto_buffer); + Buffer &&resized_mask, Buffer &&transformed_proto_buffer); hailo_status fill_nms_with_byte_mask_format(MemoryView &buffer); void mult_mask_vector_and_proto_matrix(const DetectionBbox &detection); @@ -76,9 +89,7 @@ class Yolov5SegPostProcess : public YOLOv5PostProcessOp Buffer m_mask_mult_result_buffer; Buffer m_resized_mask_to_image_dim; - // TODO: HRT-11734 - Try use one buffer for both actions Buffer m_transformed_proto_buffer; - Buffer m_dequantized_proto_buffer; }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp index 788146af..b700ed58 100644 --- a/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp +++ b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp @@ -34,8 +34,8 @@ Expected> Yolov8OpMetadata::create(const std::unorde std::string Yolov8OpMetadata::get_op_description() { auto nms_config_info = get_nms_config_description(); - auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:.2f}, Image width: {:.2f}", - OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info, m_yolov8_config.image_height, m_yolov8_config.image_width); + auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:d}, Image width: {:d}", + OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info, static_cast(m_yolov8_config.image_height), static_cast(m_yolov8_config.image_width)); return config_info; } @@ -43,6 +43,8 @@ hailo_status Yolov8OpMetadata::validate_params() { CHECK_SUCCESS(NmsOpMetadata::validate_params()); + CHECK(!nms_config().bbox_only, HAILO_INVALID_ARGUMENT, "YOLOV8PostProcessOp: bbox_only is not supported for YOLOV8 model"); + // We go over the inputs metadata and check that it includes all of the regs and clss for (const auto &layer_names : m_yolov8_config.reg_to_cls_inputs) { CHECK(contains(m_inputs_metadata, layer_names.reg), HAILO_INVALID_ARGUMENT, @@ -98,7 +100,7 @@ hailo_status YOLOV8PostProcessOp::execute(const std::mapsecond); } -template -hailo_bbox_float32_t YOLOV8PostProcessOp::get_bbox(uint32_t row, uint32_t col, uint32_t stride, const hailo_3d_image_shape_t ®_padded_shape, - const hailo_quant_info_t ®_quant_info, SrcType *reg_data, std::vector> &d_matrix, DstType class_confidence) -{ - auto reg_row_size = reg_padded_shape.width * reg_padded_shape.features; - auto reg_feature_size = reg_padded_shape.width; - auto reg_idx = (reg_row_size * row) + col; - - // For each HxW - reshape from features to 4 x (features/4) + dequantize - // For example - reshape from 64 to 4X16 - 4 vectors of 16 values - for (uint32_t feature = 0; feature < reg_padded_shape.features; feature++) { - auto &tmp_vector = d_matrix.at(feature / (reg_padded_shape.features / NUM_OF_D_VALUES)); - tmp_vector[feature % (reg_padded_shape.features / NUM_OF_D_VALUES)] = Quantization::dequantize_output(reg_data[reg_idx + feature*reg_feature_size], reg_quant_info); - } - - // Performing softmax operation on each of the vectors - for (uint32_t vector_index = 0; vector_index < d_matrix.size(); vector_index++) { - auto &tmp_vector = d_matrix.at(vector_index); - SoftmaxPostProcessOp::softmax(tmp_vector.data(), tmp_vector.data(), tmp_vector.size()); - } - - // Performing dot product on each vector - // (A, B, C, ..., F, G) -> 0*A + 1*B + 2*C + ... + 14*F + 15*G - for (uint32_t vector_index = 0; vector_index < NUM_OF_D_VALUES; vector_index++) { - m_d_values_matrix[vector_index] = dot_product(d_matrix.at(vector_index)); - } - - // The decode function extract x_min, y_min, x_max, y_max from d1, d2, d3, d4 - const auto &d1 = m_d_values_matrix.at(0); - const auto &d2 = m_d_values_matrix.at(1); - const auto &d3 = m_d_values_matrix.at(2); - const auto &d4 = m_d_values_matrix.at(3); - auto bbox = decode(d1, d2, d3, d4, col, row, stride); - bbox.score = class_confidence; - return bbox; -} - hailo_bbox_float32_t YOLOV8PostProcessOp::decode(float32_t d1, float32_t d2, float32_t d3, float32_t d4, uint32_t col, uint32_t row, uint32_t stride) const { @@ -190,4 +155,4 @@ float32_t YOLOV8PostProcessOp::dot_product(std::vector &values) } } -} \ No newline at end of file +} diff --git a/hailort/libhailort/src/net_flow/ops/yolov8_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.hpp index 25d01965..d0433f92 100644 --- a/hailort/libhailort/src/net_flow/ops/yolov8_post_process.hpp +++ b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.hpp @@ -12,61 +12,13 @@ #define _HAILO_YOLOV8_POST_PROCESS_HPP_ #include "net_flow/ops/nms_post_process.hpp" -#include "net_flow/ops/op_metadata.hpp" +#include "net_flow/ops/softmax_post_process.hpp" +#include "net_flow/ops_metadata/yolov8_op_metadata.hpp" namespace hailort { namespace net_flow { -struct Yolov8MatchingLayersNames -{ - // Regression layer - std::string reg; - - // Classifications layer - std::string cls; - - uint32_t stride; -}; - -struct Yolov8PostProcessConfig -{ - // The image height. - float32_t image_height = 0; - - // The image width. - float32_t image_width = 0; - - // A vector off two strings that represents the relations between the outputs names. - std::vector reg_to_cls_inputs; -}; - -class Yolov8OpMetadata : public NmsOpMetadata -{ -public: - static Expected> create(const std::unordered_map &inputs_metadata, - const std::unordered_map &outputs_metadata, - const NmsPostProcessConfig &nms_post_process_config, - const Yolov8PostProcessConfig &yolov8_post_process_config, - const std::string &network_name); - hailo_status validate_format_info() override; - std::string get_op_description() override; - Yolov8PostProcessConfig &yolov8_config() { return m_yolov8_config;}; - -private: - Yolov8PostProcessConfig m_yolov8_config; - Yolov8OpMetadata(const std::unordered_map &inputs_metadata, - const std::unordered_map &outputs_metadata, - const NmsPostProcessConfig &nms_post_process_config, - const Yolov8PostProcessConfig &yolov8_post_process_config, - const std::string &network_name) - : NmsOpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "YOLOV8-Post-Process", network_name, OperationType::YOLOV8) - , m_yolov8_config(yolov8_post_process_config) - {} - - hailo_status validate_params() override; -}; - class YOLOV8PostProcessOp : public NmsPostProcessOp { public: @@ -84,8 +36,44 @@ class YOLOV8PostProcessOp : public NmsPostProcessOp { for (const auto &input_metadata : m_metadata->inputs_metadata()) { m_d_matrix[input_metadata.first] = std::vector>(NUM_OF_D_VALUES, - std::vector(input_metadata.second.padded_shape.features / NUM_OF_D_VALUES)); + std::vector(input_metadata.second.shape.features / NUM_OF_D_VALUES)); + } + } + + template + hailo_bbox_float32_t get_bbox(uint32_t row, uint32_t col, uint32_t stride, const hailo_3d_image_shape_t ®_padded_shape, + const hailo_3d_image_shape_t ®_shape, const hailo_quant_info_t ®_quant_info, SrcType *reg_data, + std::vector> &d_matrix, DstType class_confidence = 0) + { + auto reg_row_size = reg_padded_shape.width * reg_padded_shape.features; // should be the padded values - we use it to get to the relevant row + auto reg_feature_size = reg_padded_shape.width; // Also should be the padded value - we use it to get to the relevant feature + auto reg_idx = (reg_row_size * row) + col; + + // For each HxW - reshape from features to 4 x (features/4) + dequantize + // For example - reshape from 64 to 4X16 - 4 vectors of 16 values + for (uint32_t feature = 0; feature < reg_shape.features; feature++) { + auto &tmp_vector = d_matrix.at(feature / (reg_shape.features / NUM_OF_D_VALUES)); + tmp_vector[feature % (reg_shape.features / NUM_OF_D_VALUES)] = Quantization::dequantize_output(reg_data[reg_idx + feature*reg_feature_size], reg_quant_info); + } + + // Performing softmax operation on each of the vectors + for (uint32_t vector_index = 0; vector_index < d_matrix.size(); vector_index++) { + auto &tmp_vector = d_matrix.at(vector_index); + SoftmaxPostProcessOp::softmax(tmp_vector.data(), tmp_vector.data(), tmp_vector.size()); + } + // Performing dot product on each vector + // (A, B, C, ..., F, G) -> 0*A + 1*B + 2*C + ... + 14*F + 15*G + for (uint32_t vector_index = 0; vector_index < NUM_OF_D_VALUES; vector_index++) { + m_d_values_matrix[vector_index] = dot_product(d_matrix.at(vector_index)); } + // The decode function extract x_min, y_min, x_max, y_max from d1, d2, d3, d4 + const auto &d1 = m_d_values_matrix.at(0); + const auto &d2 = m_d_values_matrix.at(1); + const auto &d3 = m_d_values_matrix.at(2); + const auto &d4 = m_d_values_matrix.at(3); + auto bbox = decode(d1, d2, d3, d4, col, row, stride); + bbox.score = class_confidence; + return bbox; } static const uint32_t CLASSES_START_INDEX = 0; @@ -101,6 +89,8 @@ class YOLOV8PostProcessOp : public NmsPostProcessOp assert(contains(inputs_metadata, layers_names.reg)); assert(contains(inputs_metadata, layers_names.cls)); + const auto ®_shape = inputs_metadata.at(layers_names.reg).shape; + const auto &cls_shape = inputs_metadata.at(layers_names.cls).shape; const auto ®_padded_shape = inputs_metadata.at(layers_names.reg).padded_shape; const auto &cls_padded_shape = inputs_metadata.at(layers_names.cls).padded_shape; const auto ®_quant_info = inputs_metadata.at(layers_names.reg).quant_info; @@ -119,14 +109,14 @@ class YOLOV8PostProcessOp : public NmsPostProcessOp CHECK(buffer_size == cls_buffer.size(), HAILO_INVALID_ARGUMENT, "Failed to extract_detections, cls {} buffer_size should be {}, but is {}", layers_names.cls, buffer_size, cls_buffer.size()); - // Format is NHCW -> each row size is C size * W size + // Format is NHCW -> each row size is (padded C size) * (padded W size) auto cls_row_size = cls_padded_shape.features * cls_padded_shape.width; SrcType *reg_data = (SrcType*)reg_buffer.data(); SrcType *cls_data = (SrcType*)cls_buffer.data(); - for (uint32_t row = 0; row < cls_padded_shape.height; row++) { - for (uint32_t col = 0; col < cls_padded_shape.width; col++) { + for (uint32_t row = 0; row < cls_shape.height; row++) { + for (uint32_t col = 0; col < cls_shape.width; col++) { auto cls_idx = (cls_row_size * row) + col; if (nms_config.cross_classes) { @@ -137,7 +127,7 @@ class YOLOV8PostProcessOp : public NmsPostProcessOp // If passes threshold - get the relevant bbox and add this detection assert(contains(m_d_matrix, layers_names.reg)); auto &d_matrix = m_d_matrix.at(layers_names.reg); - auto bbox = get_bbox(row, col, stride, reg_padded_shape, reg_quant_info, + auto bbox = get_bbox(row, col, stride, reg_padded_shape, reg_shape, reg_quant_info, (SrcType*)reg_data, d_matrix, max_id_score_pair.second); m_detections.emplace_back(DetectionBbox(bbox, max_id_score_pair.first)); m_classes_detections_count[max_id_score_pair.first]++; @@ -153,7 +143,7 @@ class YOLOV8PostProcessOp : public NmsPostProcessOp // If passes threshold - get the relevant bbox and add this detection assert(contains(m_d_matrix, layers_names.reg)); auto &d_matrix = m_d_matrix.at(layers_names.reg); - auto bbox = get_bbox(row, col, stride, reg_padded_shape, reg_quant_info, + auto bbox = get_bbox(row, col, stride, reg_padded_shape, reg_shape, reg_quant_info, (SrcType*)reg_data, d_matrix, class_confidence); m_detections.emplace_back(DetectionBbox(bbox, curr_class_idx)); m_classes_detections_count[curr_class_idx]++; diff --git a/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp index 8f67829b..98812cd7 100644 --- a/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp +++ b/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp @@ -32,8 +32,8 @@ Expected> YoloxOpMetadata::create(const std::unorder std::string YoloxOpMetadata::get_op_description() { auto nms_config_info = get_nms_config_description(); - auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:.2f}, Image width: {:.2f}", - OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info, m_yolox_config.image_height, m_yolox_config.image_width); + auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:d}, Image width: {:d}", + OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info, static_cast(m_yolox_config.image_height), static_cast(m_yolox_config.image_width)); return config_info; } @@ -41,6 +41,8 @@ hailo_status YoloxOpMetadata::validate_params() { CHECK_SUCCESS(NmsOpMetadata::validate_params()); + CHECK(!nms_config().bbox_only, HAILO_INVALID_ARGUMENT, "YOLOXPostProcessOp: bbox_only is not supported for YOLOX model"); + // Validate regs, clss and objs matching layers have same shape for (const auto &layer_names : m_yolox_config.input_names) { CHECK(contains(m_inputs_metadata, layer_names.reg), HAILO_INVALID_ARGUMENT, @@ -106,7 +108,7 @@ hailo_status YOLOXPostProcessOp::execute(const std::map clear_before_frame(); for (const auto &layers_names_triplet : yolox_config.input_names) { - hailo_status status; + hailo_status status = HAILO_UNINITIALIZED; assert(contains(inputs, layers_names_triplet.cls)); assert(contains(inputs, layers_names_triplet.obj)); assert(contains(inputs, layers_names_triplet.reg)); diff --git a/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp index 3850f157..64f74277 100644 --- a/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp +++ b/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp @@ -12,63 +12,13 @@ #define _HAILO_YOLOX_POST_PROCESS_HPP_ #include "net_flow/ops/nms_post_process.hpp" -#include "net_flow/ops/op_metadata.hpp" +#include "net_flow/ops_metadata/yolox_op_metadata.hpp" namespace hailort { namespace net_flow { -struct YoloxMatchingLayersNames -{ - // Regression layer - std::string reg; - - // Objectness layer - std::string obj; - - // Classifications layer - std::string cls; -}; - -struct YoloxPostProcessConfig -{ - // The image height. - float32_t image_height = 0; - - // The image width. - float32_t image_width = 0; - - // A vector off three strings that represents the relations between the outputs names. - std::vector input_names; -}; - -class YoloxOpMetadata : public NmsOpMetadata -{ -public: - static Expected> create(const std::unordered_map &inputs_metadata, - const std::unordered_map &outputs_metadata, - const NmsPostProcessConfig &nms_post_process_config, - const YoloxPostProcessConfig &yolox_post_process_config, - const std::string &network_name); - hailo_status validate_format_info() override; - std::string get_op_description() override; - YoloxPostProcessConfig &yolox_config() { return m_yolox_config;}; - -private: - YoloxPostProcessConfig m_yolox_config; - YoloxOpMetadata(const std::unordered_map &inputs_metadata, - const std::unordered_map &outputs_metadata, - const NmsPostProcessConfig &nms_post_process_config, - const YoloxPostProcessConfig &yolox_post_process_config, - const std::string &network_name) - : NmsOpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "YOLOX-Post-Process", network_name, OperationType::YOLOX) - , m_yolox_config(yolox_post_process_config) - {} - - hailo_status validate_params() override; -}; - class YOLOXPostProcessOp : public NmsPostProcessOp { public: diff --git a/hailort/libhailort/src/net_flow/ops_metadata/argmax_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/argmax_op_metadata.hpp new file mode 100644 index 00000000..185d04db --- /dev/null +++ b/hailort/libhailort/src/net_flow/ops_metadata/argmax_op_metadata.hpp @@ -0,0 +1,50 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file argmax_op_metadata.hpp + * @brief Argmax op metadata + * + **/ + +#ifndef _HAILO_ARGMAX_OP_METADATA_HPP_ +#define _HAILO_ARGMAX_OP_METADATA_HPP_ + +#include "net_flow/ops_metadata/op_metadata.hpp" + +namespace hailort +{ +namespace net_flow +{ + +constexpr std::size_t ARGMAX_OUTPUT_FEATURES_SIZE {1}; +constexpr std::size_t ARGMAX_NUMBER_OF_SRCS {1}; +constexpr std::size_t ARGMAX_NUMBER_OF_DSTS {1}; + +class ArgmaxOpMetadata : public OpMetadata +{ +public: + static Expected> create(const std::unordered_map &inputs_metadata, + const std::unordered_map &outputs_metadata, + const std::string &network_name); + std::string get_op_description() override; + hailo_status validate_format_info() override; + static hailo_format_t expand_output_format_autos(const hailo_format_t &output_format, const hailo_format_t &input_format); + + virtual Expected get_output_vstream_info() override; + +private: + ArgmaxOpMetadata(const std::unordered_map &inputs_metadata, + const std::unordered_map &outputs_metadata, + const std::string &network_name) + : OpMetadata(inputs_metadata, outputs_metadata, "Argmax-Post-Process", network_name, OperationType::ARGMAX) + {} + + hailo_status validate_params() override; +}; + +} /* namespace net_flow */ +} /* namespace hailort */ + +#endif /* _HAILO_ARGMAX_POST_PROCESS_HPP_ */ diff --git a/hailort/libhailort/src/net_flow/ops_metadata/nms_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/nms_op_metadata.hpp new file mode 100644 index 00000000..60971265 --- /dev/null +++ b/hailort/libhailort/src/net_flow/ops_metadata/nms_op_metadata.hpp @@ -0,0 +1,92 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file nms_op_metadata.hpp + * @brief NMS op metadata + * + **/ + +#ifndef _HAILO_NET_FLOW_NMS_OP_METADATA_HPP_ +#define _HAILO_NET_FLOW_NMS_OP_METADATA_HPP_ + +#include "net_flow/ops_metadata/op_metadata.hpp" + +namespace hailort +{ +namespace net_flow +{ + +struct NmsPostProcessConfig +{ + // User given confidence threshold for a bbox. A bbox will be consider as detection if the + // (objectness * class_score) is higher then the confidence_threshold. + double nms_score_th = 0; + + // User given IoU threshold (intersection over union). This threshold is for performing + // Non-maximum suppression (Removing overlapping boxes). + double nms_iou_th = 0; + + // Maximum amount of bboxes per nms class. + uint32_t max_proposals_per_class = 0; + + // The model's number of classes. (This depends on the dataset that the model trained on). + uint32_t number_of_classes = 0; + + // Toggle background class removal from results + bool background_removal = false; + + // Index of background class for background removal + uint32_t background_removal_index = 0; + + // Indicates whether or not NMS performs IoU over different classes for the same box. + // If set to false - NMS won't intersect different classes, and a box could have multiple labels. + bool cross_classes = false; + + // Indicates whether only the bbox decoding is being done + bool bbox_only = false; +}; + +static const float32_t REMOVED_CLASS_SCORE = 0.0f; + +class NmsOpMetadata : public OpMetadata +{ +public: + static Expected> create(const std::unordered_map &inputs_metadata, + const std::unordered_map &outputs_metadata, + const NmsPostProcessConfig &nms_post_process_config, + const std::string &network_name, + const OperationType type, + const std::string &name); + virtual ~NmsOpMetadata() = default; + std::string get_nms_config_description(); + hailo_status validate_format_info() override; + NmsPostProcessConfig &nms_config() { return m_nms_config;}; + hailo_nms_info_t nms_info(); + std::string get_op_description() override; + static hailo_format_t expand_output_format_autos_by_op_type(const hailo_format_t &output_format, OperationType type, bool bbox_only); + + virtual Expected get_output_vstream_info() override; + +protected: + NmsOpMetadata(const std::unordered_map &inputs_metadata, + const std::unordered_map &outputs_metadata, + const NmsPostProcessConfig &nms_post_process_config, + const std::string &name, + const std::string &network_name, + const OperationType type) + : OpMetadata(inputs_metadata, outputs_metadata, name, network_name, type), + m_nms_config(nms_post_process_config) + {} + + hailo_status validate_params() override; + +private: + NmsPostProcessConfig m_nms_config; +}; + +} /* namespace net_flow */ +} /* namespace hailort */ + +#endif /* _HAILO_NET_FLOW_NMS_OP_METADATA_HPP_ */ diff --git a/hailort/libhailort/src/net_flow/ops/op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/op_metadata.hpp similarity index 97% rename from hailort/libhailort/src/net_flow/ops/op_metadata.hpp rename to hailort/libhailort/src/net_flow/ops_metadata/op_metadata.hpp index d07a4440..44b8defb 100644 --- a/hailort/libhailort/src/net_flow/ops/op_metadata.hpp +++ b/hailort/libhailort/src/net_flow/ops_metadata/op_metadata.hpp @@ -112,7 +112,7 @@ class OpMetadata virtual hailo_status validate_params() = 0; }; -} -} +} /* namespace net_flow */ +} /* namespace hailort */ -#endif \ No newline at end of file +#endif /* _HAILO_OP_META_DATA_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/net_flow/ops_metadata/softmax_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/softmax_op_metadata.hpp new file mode 100644 index 00000000..20a77997 --- /dev/null +++ b/hailort/libhailort/src/net_flow/ops_metadata/softmax_op_metadata.hpp @@ -0,0 +1,49 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file softmax_op_metadata.hpp + * @brief: Softmax op metadata + * + **/ + +#ifndef _HAILO_SOFTMAX_OP_METADATA_HPP_ +#define _HAILO_SOFTMAX_OP_METADATA_HPP_ + +#include "net_flow/ops_metadata/op_metadata.hpp" + +namespace hailort +{ +namespace net_flow +{ + +constexpr std::size_t SOFTMAX_NUMBER_OF_SRCS {1}; +constexpr std::size_t SOFTMAX_NUMBER_OF_DSTS {1}; + +class SoftmaxOpMetadata : public OpMetadata +{ +public: + static Expected> create(const std::unordered_map &inputs_metadata, + const std::unordered_map &outputs_metadata, + const std::string &network_name); + std::string get_op_description() override; + hailo_status validate_format_info() override; + static hailo_format_t expand_output_format_autos(const hailo_format_t &output_format, const hailo_format_t &input_format); + + virtual Expected get_output_vstream_info() override; + +private: + SoftmaxOpMetadata(const std::unordered_map &inputs_metadata, + const std::unordered_map &outputs_metadata, + const std::string &network_name) + : OpMetadata(inputs_metadata, outputs_metadata, "Softmax-Post-Process", network_name, OperationType::SOFTMAX) + {} + + hailo_status validate_params() override; +}; + +} /* namespace net_flow */ +} /* namespace hailort */ + +#endif /* _HAILO_SOFTMAX_OP_METADATA_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/net_flow/ops_metadata/ssd_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/ssd_op_metadata.hpp new file mode 100644 index 00000000..bb952357 --- /dev/null +++ b/hailort/libhailort/src/net_flow/ops_metadata/ssd_op_metadata.hpp @@ -0,0 +1,80 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file ssd_op_metadata.hpp + * @brief SSD op metadata + * + **/ + +#ifndef _HAILO_SSD_OP_METADATA_HPP_ +#define _HAILO_SSD_OP_METADATA_HPP_ + +#include "net_flow/ops_metadata/op_metadata.hpp" + +namespace hailort +{ +namespace net_flow +{ + +struct SSDPostProcessConfig +{ + // The image height. + float32_t image_height = 0; + + // The image width. + float32_t image_width = 0; + + uint32_t centers_scale_factor = 0; + + uint32_t bbox_dimensions_scale_factor = 0; + + uint32_t ty_index = 0; + uint32_t tx_index = 0; + uint32_t th_index = 0; + uint32_t tw_index = 0; + + std::map reg_to_cls_inputs; + + // A vector of anchors, each element in the vector represents the anchors for a specific layer + // Each layer anchors vector is structured as {w,h} pairs. + // Each anchor is mapped by 2 keys: + // 1. reg input + // 2. cls input + std::map> anchors; + + // Indicates whether boxes should be normalized (and clipped) + bool normalize_boxes = false; +}; + +class SSDOpMetadata : public NmsOpMetadata +{ +public: + static Expected> create(const std::unordered_map &inputs_metadata, + const std::unordered_map &outputs_metadata, + const NmsPostProcessConfig &nms_post_process_config, + const SSDPostProcessConfig &ssd_post_process_config, + const std::string &network_name); + std::string get_op_description() override; + hailo_status validate_format_info() override; + SSDPostProcessConfig &ssd_config() { return m_ssd_config;}; + +private: + SSDPostProcessConfig m_ssd_config; + SSDOpMetadata(const std::unordered_map &inputs_metadata, + const std::unordered_map &outputs_metadata, + const NmsPostProcessConfig &nms_post_process_config, + const SSDPostProcessConfig &ssd_post_process_config, + const std::string &network_name) + : NmsOpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "SSD-Post-Process", network_name, OperationType::SSD) + , m_ssd_config(ssd_post_process_config) + {} + + hailo_status validate_params() override; +}; + +} /* namespace net_flow */ +} /* namespace hailort */ + +#endif /* _HAILO_SSD_OP_METADATA_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/net_flow/ops_metadata/yolov5_bbox_only_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_bbox_only_op_metadata.hpp new file mode 100644 index 00000000..191f6386 --- /dev/null +++ b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_bbox_only_op_metadata.hpp @@ -0,0 +1,48 @@ +/** + * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file yolov5_bbox_only_op_metadata.hpp + * @brief YOLOv5 Bbox Only Post-Process op metadata + **/ + +#ifndef _HAILO_YOLOV5_BBOX_ONLY_OP_METADATA_HPP_ +#define _HAILO_YOLOV5_BBOX_ONLY_OP_METADATA_HPP_ + +#include "hailo/hailort.h" +#include "net_flow/ops_metadata/yolov5_op_metadata.hpp" + +namespace hailort +{ +namespace net_flow +{ + +class Yolov5BboxOnlyOpMetadata : public Yolov5OpMetadata +{ +public: + static Expected> create(const std::unordered_map &inputs_metadata, + const std::unordered_map &outputs_metadata, + const NmsPostProcessConfig &nms_post_process_config, + const YoloPostProcessConfig &yolov5_config, + const std::string &network_name); + hailo_status validate_format_info() override; + std::string get_op_description() override; + virtual Expected get_output_vstream_info() override; + +private: + Yolov5BboxOnlyOpMetadata(const std::unordered_map &inputs_metadata, + const std::unordered_map &outputs_metadata, + const NmsPostProcessConfig &nms_post_process_config, + const YoloPostProcessConfig &yolo_config, + const std::string &network_name) + : Yolov5OpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "YOLOv5Bbox-Only-Post-Process", + network_name, yolo_config, OperationType::YOLOV5) + {} + +}; + +} /* namespace hailort */ +} /* namespace net_flow */ + +#endif /* _HAILO_YOLOV5_BBOX_ONLY_OP_METADATA_HPP_ */ diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_op_metadata.hpp similarity index 91% rename from hailort/libhailort/src/net_flow/ops/yolov5_op_metadata.hpp rename to hailort/libhailort/src/net_flow/ops_metadata/yolov5_op_metadata.hpp index 145f84eb..caf70632 100644 --- a/hailort/libhailort/src/net_flow/ops/yolov5_op_metadata.hpp +++ b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_op_metadata.hpp @@ -10,7 +10,7 @@ #ifndef _HAILO_YOLO_OP_METADATA_HPP_ #define _HAILO_YOLO_OP_METADATA_HPP_ -#include "net_flow/ops/op_metadata.hpp" +#include "net_flow/ops_metadata/op_metadata.hpp" namespace hailort { @@ -41,6 +41,7 @@ class Yolov5OpMetadata : public NmsOpMetadata std::string get_op_description() override; hailo_status validate_format_info() override; YoloPostProcessConfig &yolov5_config() { return m_yolov5_config;}; + virtual Expected get_output_vstream_info() override; protected: Yolov5OpMetadata(const std::unordered_map &inputs_metadata, @@ -56,15 +57,14 @@ class Yolov5OpMetadata : public NmsOpMetadata hailo_status validate_params() override; -private: YoloPostProcessConfig m_yolov5_config; }; -} // namespace net_flow -} // namespace hailort +} /* namespace net_flow */ +} /* namespace hailort */ -#endif // _HAILO_YOLOV5_OP_METADATA_HPP_ +#endif /* _HAILO_YOLOV5_OP_METADATA_HPP_ */ diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_seg_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_seg_op_metadata.hpp similarity index 92% rename from hailort/libhailort/src/net_flow/ops/yolov5_seg_op_metadata.hpp rename to hailort/libhailort/src/net_flow/ops_metadata/yolov5_seg_op_metadata.hpp index 60398356..39213a7e 100644 --- a/hailort/libhailort/src/net_flow/ops/yolov5_seg_op_metadata.hpp +++ b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_seg_op_metadata.hpp @@ -11,7 +11,7 @@ #define _HAILO_YOLOV5_SEG_OP_METADATA_HPP_ #include "hailo/hailort.h" -#include "net_flow/ops/yolov5_op_metadata.hpp" +#include "net_flow/ops_metadata/yolov5_op_metadata.hpp" namespace hailort { @@ -22,6 +22,7 @@ struct YoloV5SegPostProcessConfig { // User given mask threshold. A pixel will consider part of the mask if it's value is higher then the mask_threshold. double mask_threshold; + uint32_t max_accumulated_mask_size; std::string proto_layer_name; }; @@ -38,6 +39,7 @@ class Yolov5SegOpMetadata : public Yolov5OpMetadata std::string get_op_description() override; YoloV5SegPostProcessConfig &yolov5seg_config() { return m_yolo_seg_config;}; virtual Expected get_output_vstream_info() override; + hailo_status validate_params() override; private: Yolov5SegOpMetadata(const std::unordered_map &inputs_metadata, @@ -57,4 +59,4 @@ class Yolov5SegOpMetadata : public Yolov5OpMetadata } /* namespace hailort */ } /* namespace net_flow */ -#endif /* _HAILO_YOLOV5_SEG_POST_PROCESS_HPP_ */ \ No newline at end of file +#endif /* _HAILO_YOLOV5_SEG_POST_PROCESS_HPP_ */ diff --git a/hailort/libhailort/src/net_flow/ops_metadata/yolov8_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/yolov8_op_metadata.hpp new file mode 100644 index 00000000..078cbfe4 --- /dev/null +++ b/hailort/libhailort/src/net_flow/ops_metadata/yolov8_op_metadata.hpp @@ -0,0 +1,71 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file yolov8_op_metadata.hpp + * @brief YOLOV8 op metadata + **/ +#ifndef _HAILO_YOLOV8_OP_METADATA_HPP_ +#define _HAILO_YOLOV8_OP_METADATA_HPP_ + +#include "net_flow/ops_metadata/nms_op_metadata.hpp" + +namespace hailort +{ +namespace net_flow +{ + + struct Yolov8MatchingLayersNames +{ + // Regression layer + std::string reg; + + // Classifications layer + std::string cls; + + uint32_t stride; +}; + +struct Yolov8PostProcessConfig +{ + // The image height. + float32_t image_height = 0; + + // The image width. + float32_t image_width = 0; + + // A vector off two strings that represents the relations between the outputs names. + std::vector reg_to_cls_inputs; +}; + +class Yolov8OpMetadata : public NmsOpMetadata +{ +public: + static Expected> create(const std::unordered_map &inputs_metadata, + const std::unordered_map &outputs_metadata, + const NmsPostProcessConfig &nms_post_process_config, + const Yolov8PostProcessConfig &yolov8_post_process_config, + const std::string &network_name); + hailo_status validate_format_info() override; + std::string get_op_description() override; + Yolov8PostProcessConfig &yolov8_config() { return m_yolov8_config;}; + +private: + Yolov8PostProcessConfig m_yolov8_config; + Yolov8OpMetadata(const std::unordered_map &inputs_metadata, + const std::unordered_map &outputs_metadata, + const NmsPostProcessConfig &nms_post_process_config, + const Yolov8PostProcessConfig &yolov8_post_process_config, + const std::string &network_name) + : NmsOpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "YOLOV8-Post-Process", network_name, OperationType::YOLOV8) + , m_yolov8_config(yolov8_post_process_config) + {} + + hailo_status validate_params() override; +}; + +} /* namespace net_flow */ +} /* namespace hailort */ + +#endif /* _HAILO_YOLOV8_OP_METADATA_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/net_flow/ops_metadata/yolox_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/yolox_op_metadata.hpp new file mode 100644 index 00000000..4b0ada23 --- /dev/null +++ b/hailort/libhailort/src/net_flow/ops_metadata/yolox_op_metadata.hpp @@ -0,0 +1,72 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file yolox_op_metadata.hpp + * @brief YOLOX op metadata + **/ +#ifndef _HAILO_YOLOX_OP_METADATA_HPP_ +#define _HAILO_YOLOX_OP_METADATA_HPP_ + +#include "net_flow/ops_metadata/op_metadata.hpp" + +namespace hailort +{ +namespace net_flow +{ + +struct YoloxMatchingLayersNames +{ + // Regression layer + std::string reg; + + // Objectness layer + std::string obj; + + // Classifications layer + std::string cls; +}; + +struct YoloxPostProcessConfig +{ + // The image height. + float32_t image_height = 0; + + // The image width. + float32_t image_width = 0; + + // A vector off three strings that represents the relations between the outputs names. + std::vector input_names; +}; + +class YoloxOpMetadata : public NmsOpMetadata +{ +public: + static Expected> create(const std::unordered_map &inputs_metadata, + const std::unordered_map &outputs_metadata, + const NmsPostProcessConfig &nms_post_process_config, + const YoloxPostProcessConfig &yolox_post_process_config, + const std::string &network_name); + hailo_status validate_format_info() override; + std::string get_op_description() override; + YoloxPostProcessConfig &yolox_config() { return m_yolox_config;}; + +private: + YoloxPostProcessConfig m_yolox_config; + YoloxOpMetadata(const std::unordered_map &inputs_metadata, + const std::unordered_map &outputs_metadata, + const NmsPostProcessConfig &nms_post_process_config, + const YoloxPostProcessConfig &yolox_post_process_config, + const std::string &network_name) + : NmsOpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "YOLOX-Post-Process", network_name, OperationType::YOLOX) + , m_yolox_config(yolox_post_process_config) + {} + + hailo_status validate_params() override; +}; + +} /* namespace hailort */ +} /* namespace net_flow */ + +#endif /* _HAILO_YOLOX_SEG_OP_METADATA_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp index a1e63526..0b0d7015 100644 --- a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp +++ b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp @@ -12,11 +12,13 @@ #include "common/utils.hpp" #include "common/os_utils.hpp" #include "hailo/event.hpp" +#include "utils/dma_buffer_utils.hpp" #include "hailo/hailort_defaults.hpp" #include "hailo/hailort_common.hpp" #include "net_flow/pipeline/async_infer_runner.hpp" +#include "net_flow/pipeline/infer_model_internal.hpp" #include "net_flow/pipeline/pipeline_internal.hpp" -#include "net_flow/ops/op_metadata.hpp" +#include "net_flow/ops_metadata/op_metadata.hpp" namespace hailort { @@ -80,8 +82,8 @@ The flow in case of shutdown is: */ { - if (HAILO_STREAM_ABORTED_BY_USER == error_status) { - LOGGER__INFO("Pipeline was aborted by user. Shutting it down"); + if (HAILO_STREAM_ABORT == error_status) { + LOGGER__INFO("Pipeline was aborted. Shutting it down"); } else { LOGGER__ERROR("Shutting down the pipeline with status {}", error_status); } @@ -131,6 +133,11 @@ const ElementBuildParams AsyncPipeline::get_build_params() return m_build_params; } +std::shared_ptr> AsyncPipeline::get_pipeline_status() +{ + return m_build_params.pipeline_status; +} + void AsyncPipeline::set_as_multi_planar() { m_is_multi_planar = true; @@ -148,7 +155,7 @@ Expected> AsyncInferRunnerImpl::create(std auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); - auto async_pipeline_expected = PipelineBuilder::create_pipeline(net_group, inputs_formats, outputs_formats, timeout, pipeline_status); + auto async_pipeline_expected = AsyncPipelineBuilder::create_pipeline(net_group, inputs_formats, outputs_formats, timeout, pipeline_status); CHECK_EXPECTED(async_pipeline_expected); auto async_infer_runner_ptr = make_shared_nothrow(async_pipeline_expected.release(), pipeline_status); @@ -208,15 +215,16 @@ hailo_status AsyncInferRunnerImpl::start_pipeline() void AsyncInferRunnerImpl::abort() { + std::unique_lock lock(m_mutex); m_is_aborted = true; - m_async_pipeline->shutdown(HAILO_STREAM_ABORTED_BY_USER); + m_async_pipeline->shutdown(HAILO_STREAM_ABORT); return; } Expected AsyncInferRunnerImpl::can_push_buffers() { for (auto &last_element : m_async_pipeline->get_last_elements()) { - auto can_push_buffer = last_element.second->can_push_buffer_upstream(last_element.first); + auto can_push_buffer = last_element.second->can_push_buffer_upstream(); CHECK_EXPECTED(can_push_buffer); if (!can_push_buffer.release()) { return false; @@ -224,7 +232,7 @@ Expected AsyncInferRunnerImpl::can_push_buffers() } for (auto &entry_element : m_async_pipeline->get_entry_elements()) { - auto can_push_buffer = entry_element.second->can_push_buffer_downstream(entry_element.first); + auto can_push_buffer = entry_element.second->can_push_buffer_downstream(); CHECK_EXPECTED(can_push_buffer); if (!can_push_buffer.release()) { return false; @@ -234,28 +242,170 @@ Expected AsyncInferRunnerImpl::can_push_buffers() return true; } -hailo_status AsyncInferRunnerImpl::async_infer() +hailo_status AsyncInferRunnerImpl::set_buffers(std::unordered_map &inputs, + std::unordered_map> &outputs) +{ + for (auto &last_element : m_async_pipeline->get_last_elements()) { + // TODO: handle the non-recoverable case where one buffer is enqueued successfully and the second isn't (HRT-11783) + auto status = last_element.second->enqueue_execution_buffer(outputs.at(last_element.first).first, + outputs.at(last_element.first).second); + CHECK_SUCCESS(status); + } + + for (auto &entry_element : m_async_pipeline->get_entry_elements()) { + entry_element.second->sinks()[0].run_push_async(std::move(inputs.at(entry_element.first))); + } + + return HAILO_SUCCESS; +} + +void AsyncInferRunnerImpl::set_pix_buffer_inputs(std::unordered_map &inputs, hailo_pix_buffer_t userptr_pix_buffer, + TransferDoneCallbackAsyncInfer input_done, const std::string &input_name) +{ + if (1 == userptr_pix_buffer.number_of_planes) { + inputs[input_name] = PipelineBuffer(MemoryView(userptr_pix_buffer.planes[0].user_ptr, userptr_pix_buffer.planes[0].bytes_used), input_done); + } else if (m_async_pipeline->is_multi_planar()) { + // If model is multi-planar + inputs[input_name] = PipelineBuffer(userptr_pix_buffer, input_done); + } else { + // Other cases - return error, as on async flow we do not support copy to new buffer + LOGGER__ERROR("HEF was compiled for single input layer, while trying to pass non-contiguous planes buffers."); + inputs[input_name] = PipelineBuffer(HAILO_INVALID_OPERATION, input_done); + } + +} + +Expected AsyncInferRunnerImpl::convert_dma_pix_buffer_to_userptr_pix_buffer(const hailo_pix_buffer_t &dma_pix_buffer) +{ + hailo_pix_buffer_t userptr_pix_buffer; + userptr_pix_buffer.index = dma_pix_buffer.index; + userptr_pix_buffer.number_of_planes = dma_pix_buffer.number_of_planes; + userptr_pix_buffer.memory_type = HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR; + + for (uint32_t i = 0; i < dma_pix_buffer.number_of_planes; i++ ) { + auto current_plane = dma_pix_buffer.planes[i]; + hailo_dma_buffer_t dma_buffer = {current_plane.fd, current_plane.bytes_used}; + + auto dma_buffer_memview_expected = DmaBufferUtils::mmap_dma_buffer_read(dma_buffer); + CHECK_EXPECTED_AS_STATUS(dma_buffer_memview_expected); + auto dma_buffer_memview = dma_buffer_memview_expected.release(); + + hailo_pix_buffer_plane_t new_plane; + new_plane.bytes_used = current_plane.bytes_used; + new_plane.plane_size = current_plane.plane_size; + new_plane.user_ptr = dma_buffer_memview.data(); + + userptr_pix_buffer.planes[i] = new_plane; + } + + return userptr_pix_buffer; +} + +hailo_status AsyncInferRunnerImpl::run(ConfiguredInferModel::Bindings &bindings, TransferDoneCallbackAsyncInfer transfer_done) { - hailo_status status = m_async_pipeline->get_build_params().pipeline_status->load(); + std::unique_lock lock(m_mutex); + hailo_status status = m_async_pipeline->get_pipeline_status()->load(); CHECK_SUCCESS(status, "Can't handle infer request since Pipeline status is {}.", status); - auto pools_are_ready = can_push_buffers(); - CHECK_EXPECTED_AS_STATUS(pools_are_ready); - CHECK(pools_are_ready.release(), HAILO_QUEUE_IS_FULL, "Can't handle infer request since a queue in the pipeline is full."); + TRY(auto are_pools_ready, can_push_buffers()); + CHECK(are_pools_ready, HAILO_QUEUE_IS_FULL, "Can't handle infer request since a queue in the pipeline is full."); + std::unordered_map> outputs; for (auto &last_element : m_async_pipeline->get_last_elements()) { - assert(contains(m_output_buffers, last_element.first)); - auto output_buffer = m_output_buffers.at(last_element.first); - auto read_done = m_read_dones.at(last_element.first); - // TODO: handle the non-recoverable case where one buffer is enqueued successfully and the second isn't (HRT-11783) - status = last_element.second->enqueue_execution_buffer(output_buffer, read_done); - CHECK_SUCCESS(status); + auto buff_type = bindings.output(last_element.first)->m_pimpl->get_type(); + if (BufferType::DMA_BUFFER == buff_type) { + TRY(auto dma_buffer, bindings.output(last_element.first)->get_dma_buffer(), "Couldnt find output buffer for '{}'", last_element.first); + + TRY(auto dma_buffer_memview, DmaBufferUtils::mmap_dma_buffer_write(dma_buffer)); + + auto output_done = [dma_buffer_memview, dma_buffer=dma_buffer, transfer_done](hailo_status status) { + auto mumap_status = DmaBufferUtils::munmap_dma_buffer_write(dma_buffer, dma_buffer_memview); + if (HAILO_SUCCESS != mumap_status) { + LOGGER__ERROR("Failed to unmap dma buffer"); + status = HAILO_FILE_OPERATION_FAILURE; + } + transfer_done(status); + }; + std::pair buffer_cb_pair (dma_buffer_memview, output_done); + outputs[last_element.first] = buffer_cb_pair; + + } else { + TRY(auto buffer, bindings.output(last_element.first)->get_buffer(), "Couldnt find output buffer for '{}'", last_element.first); + + std::pair buffer_cb_pair (buffer, transfer_done); + outputs[last_element.first] = buffer_cb_pair; + } } + std::unordered_map inputs; for (auto &entry_element : m_async_pipeline->get_entry_elements()) { - assert(contains(m_input_buffers, entry_element.first)); - entry_element.second->sinks()[0].run_push_async(std::move(m_input_buffers.at(entry_element.first))); + auto buff_type = bindings.input(entry_element.first)->m_pimpl->get_type(); + + switch (buff_type) { + case BufferType::VIEW: + { + TRY(auto buffer, bindings.input(entry_element.first)->get_buffer(), "Couldnt find input buffer for '{}'", entry_element.first); + inputs[entry_element.first] = PipelineBuffer(buffer, transfer_done); + break; + } + case BufferType::DMA_BUFFER: + { + TRY(auto dma_buffer, bindings.input(entry_element.first)->get_dma_buffer(), "Couldnt find input buffer for '{}'", entry_element.first); + + TRY(auto dma_buffer_memview, DmaBufferUtils::mmap_dma_buffer_read(dma_buffer)); + + auto input_done = [dma_buffer_memview, dma_buffer, transfer_done](hailo_status status) { + auto mumap_status = DmaBufferUtils::munmap_dma_buffer_read(dma_buffer, dma_buffer_memview); + if (HAILO_SUCCESS != mumap_status) { + // Note: we overide the status even if it was not success before (but either way it's set to non-success) + LOGGER__ERROR("Failed to unmap dma buffer"); + status = mumap_status; + } + transfer_done(status); + }; + inputs[entry_element.first] = PipelineBuffer(dma_buffer_memview, input_done); + break; + } + case BufferType::PIX_BUFFER: + { + // TODO: handle a case in which the pix_buffer is DMA buffers (HRT-12771) + TRY(auto pix_buffer, bindings.input(entry_element.first)->get_pix_buffer(), "Couldnt find input buffer for '{}'", entry_element.first); + + if (HAILO_PIX_BUFFER_MEMORY_TYPE_DMABUF == pix_buffer.memory_type) { + TRY(auto userptr_pix_buffer, convert_dma_pix_buffer_to_userptr_pix_buffer(pix_buffer)); + + auto input_done = [userptr_pix_buffer, transfer_done, dma_pix_buffer=pix_buffer](hailo_status status) { + for (uint32_t i = 0; i < dma_pix_buffer.number_of_planes; i++ ) { + auto plane_in_dma_buffer = dma_pix_buffer.planes[i]; + hailo_dma_buffer_t dma_buffer = {plane_in_dma_buffer.fd, plane_in_dma_buffer.bytes_used}; + + auto dma_buffer_memview = MemoryView(userptr_pix_buffer.planes[i].user_ptr, userptr_pix_buffer.planes[i].bytes_used); + + auto mumap_status = DmaBufferUtils::munmap_dma_buffer_read(dma_buffer, dma_buffer_memview); + if (HAILO_SUCCESS != mumap_status) { + // Note: we overide the status even if it was not success before (but either way it's set to non-success) + LOGGER__ERROR("Failed to unmap dma buffer"); + status = mumap_status; + } + } + transfer_done(status); + }; + + set_pix_buffer_inputs(inputs, userptr_pix_buffer, input_done, entry_element.first); + } else { + set_pix_buffer_inputs(inputs, pix_buffer, transfer_done, entry_element.first); + } + break; + } + + default: + CHECK(false, HAILO_NOT_FOUND, "Couldnt find input buffer for '{}'", entry_element.first); + } } + + status = set_buffers(inputs, outputs); + CHECK_SUCCESS(status); + return HAILO_SUCCESS; } @@ -284,32 +434,6 @@ std::unordered_map> AsyncInferRunn return m_async_pipeline->get_last_elements(); } -void AsyncInferRunnerImpl::set_input(const std::string &input_name, MemoryView &&input_buffer, TransferDoneCallbackAsyncInfer &write_done) -{ - m_input_buffers[input_name] = PipelineBuffer(std::move(input_buffer), write_done); -} - -void AsyncInferRunnerImpl::set_input(const std::string &input_name, hailo_pix_buffer_t input_buffer, TransferDoneCallbackAsyncInfer &write_done) -{ - // If only one plane is passed, address it as memview - if (1 == input_buffer.number_of_planes) { - m_input_buffers[input_name] = PipelineBuffer(MemoryView(input_buffer.planes[0].user_ptr, input_buffer.planes[0].bytes_used), write_done); - } else if (m_async_pipeline->is_multi_planar()) { - // If model is multi-planar - m_input_buffers[input_name] = PipelineBuffer(std::move(input_buffer), write_done); - } else { - // Other cases - return error, as on async flow we do not support copy to new buffer - LOGGER__ERROR("HEF was compiled for single input layer, while trying to pass non-contiguous planes buffers."); - m_input_buffers[input_name] = PipelineBuffer(HAILO_INVALID_OPERATION, write_done); - } -} - -void AsyncInferRunnerImpl::set_output(const std::string &output_name, MemoryView &&output_buffer, TransferDoneCallbackAsyncInfer &read_done) -{ - m_output_buffers[output_name] = std::move(output_buffer); - m_read_dones[output_name] = read_done; -} - std::vector> AsyncInferRunnerImpl::get_pipeline() const { return m_async_pipeline->get_pipeline(); diff --git a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp index 3467677f..2d0db048 100644 --- a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp +++ b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp @@ -10,9 +10,10 @@ #ifndef _HAILO_ASYNC_INFER_RUNNER_HPP_ #define _HAILO_ASYNC_INFER_RUNNER_HPP_ +#include "hailo/infer_model.hpp" #include "network_group/network_group_internal.hpp" #include "net_flow/pipeline/pipeline.hpp" -#include "net_flow/pipeline/pipeline_builder.hpp" +#include "net_flow/pipeline/async_pipeline_builder.hpp" #include "net_flow/pipeline/vstream_internal.hpp" #include "net_flow/ops/op.hpp" @@ -39,6 +40,8 @@ class AsyncPipeline const std::unordered_map>& get_last_elements() const; const std::shared_ptr get_async_hw_element(); const ElementBuildParams get_build_params(); + std::shared_ptr> get_pipeline_status(); + void set_as_multi_planar(); bool is_multi_planar(); @@ -64,21 +67,18 @@ class AsyncInferRunnerImpl virtual ~AsyncInferRunnerImpl(); AsyncInferRunnerImpl(std::shared_ptr async_pipeline, std::shared_ptr> pipeline_status); - hailo_status async_infer(); + hailo_status run(ConfiguredInferModel::Bindings &bindings, TransferDoneCallbackAsyncInfer transfer_done); + hailo_status set_buffers(std::unordered_map &inputs, + std::unordered_map> &outputs); void abort(); Expected can_push_buffers(); - // TODO: consider removing the methods below (needed for unit testing) void add_element_to_pipeline(std::shared_ptr pipeline_element); void add_entry_element(std::shared_ptr pipeline_element, const std::string &input_name); void add_last_element(std::shared_ptr pipeline_element, const std::string &output_name); - void set_input(const std::string &input_name, MemoryView &&input_buffer, TransferDoneCallbackAsyncInfer &write_done); - void set_input(const std::string &input_name, hailo_pix_buffer_t input_buffer, TransferDoneCallbackAsyncInfer &write_done); - void set_output(const std::string &output_name, MemoryView &&output_buffer, TransferDoneCallbackAsyncInfer &read_done); - std::unordered_map> get_entry_elements(); std::unordered_map> get_last_elements(); @@ -91,13 +91,15 @@ class AsyncInferRunnerImpl hailo_status start_pipeline(); hailo_status stop_pipeline(); + static Expected convert_dma_pix_buffer_to_userptr_pix_buffer(const hailo_pix_buffer_t &dma_pix_buffer); + void set_pix_buffer_inputs(std::unordered_map &inputs, hailo_pix_buffer_t userptr_pix_buffer, + TransferDoneCallbackAsyncInfer input_done, const std::string &input_name); + std::shared_ptr m_async_pipeline; - std::unordered_map m_input_buffers; - std::unordered_map m_output_buffers; - std::unordered_map m_read_dones; volatile bool m_is_activated; volatile bool m_is_aborted; std::shared_ptr> m_pipeline_status; + std::mutex m_mutex; }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline_builder.cpp b/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.cpp similarity index 74% rename from hailort/libhailort/src/net_flow/pipeline/pipeline_builder.cpp rename to hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.cpp index 6a6615c2..72f88a30 100644 --- a/hailort/libhailort/src/net_flow/pipeline/pipeline_builder.cpp +++ b/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.cpp @@ -3,25 +3,26 @@ * Distributed under the MIT license (https://opensource.org/licenses/MIT) **/ /** - * @file pipeline_builder.cpp + * @file async_pipeline_builder.cpp * @brief Async pipeline builder impl **/ -#include "pipeline_builder.hpp" +#include "async_pipeline_builder.hpp" #include "hailo/hailort.h" #include "net_flow/ops/yolov5_seg_post_process.hpp" +#include "net_flow/ops/yolov5_bbox_only_post_process.hpp" #include "net_flow/ops/yolov8_post_process.hpp" #include "net_flow/ops/argmax_post_process.hpp" #include "net_flow/ops/softmax_post_process.hpp" #include "net_flow/ops/yolox_post_process.hpp" #include "net_flow/ops/ssd_post_process.hpp" - +#include "net_flow/pipeline/vstream_builder.hpp" #include namespace hailort { -Expected> PipelineBuilder::expand_auto_input_formats(std::shared_ptrnet_group, +Expected> AsyncPipelineBuilder::expand_auto_input_formats(std::shared_ptrnet_group, const std::unordered_map &inputs_formats, const std::unordered_map &named_stream_infos) { std::unordered_map expanded_input_format; @@ -52,7 +53,7 @@ Expected> PipelineBuilder::expan return expanded_input_format; } -Expected> PipelineBuilder::expand_auto_output_formats(std::shared_ptr net_group, +Expected> AsyncPipelineBuilder::expand_auto_output_formats(std::shared_ptr net_group, const std::unordered_map &outputs_formats, const std::unordered_map &named_stream_infos) { std::unordered_map expanded_output_format; @@ -71,7 +72,7 @@ Expected> PipelineBuilder::expan return expanded_output_format; } -hailo_status PipelineBuilder::create_pre_async_hw_elements_per_input(std::shared_ptr net_group, +hailo_status AsyncPipelineBuilder::create_pre_async_hw_elements_per_input(std::shared_ptr net_group, const std::vector &stream_names, const std::unordered_map &inputs_formats, const std::unordered_map &named_stream_infos, std::shared_ptr async_pipeline) { @@ -82,8 +83,11 @@ hailo_status PipelineBuilder::create_pre_async_hw_elements_per_input(std::shared std::shared_ptr multi_plane_splitter = nullptr; std::shared_ptr last_element_connected_to_pipeline = nullptr; - auto entry_queue_elem_expected = add_push_queue_element(PipelineObject::create_element_name("EntryPushQueueElement", vstream_name, 0), - async_pipeline, nullptr, 0); + auto is_empty = true; + auto interacts_with_hw = true; // We want the entry queue size to be the size of queues interacts with HW + auto is_entry = true; + auto entry_queue_elem_expected = add_push_queue_element(PipelineObject::create_element_name("EntryPushQEl", vstream_name, 0), + async_pipeline, 0, is_empty, interacts_with_hw, nullptr, 0, is_entry); CHECK_EXPECTED_AS_STATUS(entry_queue_elem_expected); auto entry_queue_elem = entry_queue_elem_expected.release(); async_pipeline->add_entry_element(entry_queue_elem, vstream_name); @@ -114,9 +118,11 @@ hailo_status PipelineBuilder::create_pre_async_hw_elements_per_input(std::shared auto sink_index = static_cast(sink_index_expected.release()); if(is_multi_planar) { + is_empty = true; + interacts_with_hw = false; auto post_split_push_queue = add_push_queue_element( - PipelineObject::create_element_name("PostSplitPushQueue", stream_name, sink_index), - async_pipeline, nullptr); + PipelineObject::create_element_name("PostSplitPushQEl", stream_name, sink_index), + async_pipeline, 0, is_empty, interacts_with_hw, nullptr); CHECK_EXPECTED_AS_STATUS(post_split_push_queue); CHECK_SUCCESS(PipelinePad::link_pads(multi_plane_splitter, post_split_push_queue.value(), plane_index++)); @@ -133,17 +139,18 @@ hailo_status PipelineBuilder::create_pre_async_hw_elements_per_input(std::shared CHECK_EXPECTED_AS_STATUS(should_transform); if (should_transform.value()) { - bool is_dma_able = true; auto pre_infer_elem = PreInferElement::create(input_stream_info.shape, src_format, input_stream_info.hw_shape, input_stream_info.format, { input_stream_info.quant_info }, - PipelineObject::create_element_name("PreInferElement", stream_name, input_stream_info.index), - async_pipeline->get_build_params(), PipelineDirection::PUSH, is_dma_able, async_pipeline); + PipelineObject::create_element_name("PreInferEl", stream_name, input_stream_info.index), + async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline); CHECK_EXPECTED_AS_STATUS(pre_infer_elem); async_pipeline->add_element_to_pipeline(pre_infer_elem.value()); CHECK_SUCCESS(PipelinePad::link_pads(last_element_connected_to_pipeline, pre_infer_elem.value())); - auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement", stream_name, input_stream_info.index), - async_pipeline, pre_infer_elem.value()); + is_empty = false; + interacts_with_hw = true; + auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQEl", stream_name, input_stream_info.index), + async_pipeline, input_stream_info.hw_frame_size, is_empty, interacts_with_hw, pre_infer_elem.value()); CHECK_EXPECTED_AS_STATUS(queue_elem); CHECK_SUCCESS(PipelinePad::link_pads(pre_infer_elem.value(), queue_elem.value())); CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), async_pipeline->get_async_hw_element(), 0, sink_index)); @@ -155,7 +162,7 @@ hailo_status PipelineBuilder::create_pre_async_hw_elements_per_input(std::shared return HAILO_SUCCESS; } -hailo_status PipelineBuilder::create_pre_async_hw_elements(std::shared_ptr net_group, +hailo_status AsyncPipelineBuilder::create_pre_async_hw_elements(std::shared_ptr net_group, const std::unordered_map &inputs_formats, const std::unordered_map &named_stream_infos, std::shared_ptr async_pipeline) { @@ -170,19 +177,24 @@ hailo_status PipelineBuilder::create_pre_async_hw_elements(std::shared_ptr> PipelineBuilder::add_post_infer_element(const hailo_format_t &output_format, +Expected> AsyncPipelineBuilder::add_post_infer_element(const hailo_format_t &output_format, const hailo_nms_info_t &nms_info, std::shared_ptr async_pipeline, const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const std::vector &dst_quant_infos, - bool is_last_copy_element, std::shared_ptr final_elem, const uint32_t final_elem_source_index) + std::shared_ptr final_elem, const uint32_t final_elem_source_index) { - auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement", final_elem->name(), static_cast(final_elem_source_index)), - async_pipeline, final_elem, final_elem_source_index); + auto pre_transform_frame_size = (HailoRTCommon::is_nms(src_format.order)) ? + HailoRTCommon::get_nms_hw_frame_size(nms_info) : HailoRTCommon::get_periph_frame_size(src_image_shape, src_format); + auto is_empty = false; + auto interacts_with_hw = true; + auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQEl", final_elem->name(), + static_cast(final_elem_source_index)), async_pipeline, pre_transform_frame_size, is_empty, interacts_with_hw, + final_elem, final_elem_source_index); CHECK_EXPECTED(queue_elem); auto post_infer_elem = PostInferElement::create(src_image_shape, src_format, dst_image_shape, output_format, - dst_quant_infos, nms_info, PipelineObject::create_element_name("PostInferElement", + dst_quant_infos, nms_info, PipelineObject::create_element_name("PostInferEl", final_elem->name(), static_cast(final_elem_source_index)), async_pipeline->get_build_params(), - PipelineDirection::PUSH, is_last_copy_element, async_pipeline); + PipelineDirection::PUSH, async_pipeline); CHECK_EXPECTED(post_infer_elem); async_pipeline->add_element_to_pipeline(post_infer_elem.value()); @@ -191,10 +203,11 @@ Expected> PipelineBuilder::add_post_infer_elem return post_infer_elem.release(); } -Expected> PipelineBuilder::add_push_queue_element(const std::string &queue_name, std::shared_ptr async_pipeline, - std::shared_ptr final_elem, const uint32_t final_elem_source_index) +Expected> AsyncPipelineBuilder::add_push_queue_element(const std::string &queue_name, std::shared_ptr async_pipeline, + size_t frame_size, bool is_empty, bool interacts_with_hw, std::shared_ptr final_elem, const uint32_t final_elem_source_index, bool is_entry) { - auto push_queue_elem = AsyncPushQueueElement::create(queue_name, async_pipeline->get_build_params(), async_pipeline, PipelineDirection::PUSH); + auto push_queue_elem = AsyncPushQueueElement::create(queue_name, async_pipeline->get_build_params(), frame_size, + is_empty, interacts_with_hw, async_pipeline, is_entry); CHECK_EXPECTED(push_queue_elem); async_pipeline->add_element_to_pipeline(push_queue_elem.value()); @@ -207,16 +220,16 @@ Expected> PipelineBuilder::add_push_queue return push_queue_elem.release(); } -Expected> PipelineBuilder::add_nms_to_detections_convert_element(std::shared_ptr async_pipeline, +Expected> AsyncPipelineBuilder::add_nms_to_detections_convert_element(std::shared_ptr async_pipeline, const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata, - const bool is_last_copy_element, std::shared_ptr final_elem, const uint32_t final_elem_index) + std::shared_ptr final_elem, const uint32_t final_elem_index) { auto metadata = std::dynamic_pointer_cast(op_metadata); assert(nullptr != metadata); auto nms_to_detections_element = ConvertNmsToDetectionsElement::create(metadata->nms_info(), PipelineObject::create_element_name(element_name, output_stream_name, stream_index), - async_pipeline->get_build_params(), PipelineDirection::PUSH, is_last_copy_element, async_pipeline); + async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline); CHECK_EXPECTED(nms_to_detections_element); async_pipeline->add_element_to_pipeline(nms_to_detections_element.value()); @@ -225,16 +238,16 @@ Expected> PipelineBuilder::add_nm return nms_to_detections_element.release(); } -Expected> PipelineBuilder::add_remove_overlapping_bboxes_element(std::shared_ptr async_pipeline, +Expected> AsyncPipelineBuilder::add_remove_overlapping_bboxes_element(std::shared_ptr async_pipeline, const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata, - const bool is_last_copy_element, std::shared_ptr final_elem, const uint32_t final_elem_index) + std::shared_ptr final_elem, const uint32_t final_elem_index) { auto metadata = std::dynamic_pointer_cast(op_metadata); assert(nullptr != metadata); auto remove_overlapping_bboxes_element = RemoveOverlappingBboxesElement::create(metadata->nms_config(), PipelineObject::create_element_name(element_name, output_stream_name, stream_index), - async_pipeline->get_build_params(), PipelineDirection::PUSH, is_last_copy_element, async_pipeline); + async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline); CHECK_EXPECTED(remove_overlapping_bboxes_element); async_pipeline->add_element_to_pipeline(remove_overlapping_bboxes_element.value()); @@ -243,16 +256,16 @@ Expected> PipelineBuilder::add_r return remove_overlapping_bboxes_element; } -Expected> PipelineBuilder::add_fill_nms_format_element(std::shared_ptr async_pipeline, +Expected> AsyncPipelineBuilder::add_fill_nms_format_element(std::shared_ptr async_pipeline, const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata, - const hailo_format_t &output_format, const bool is_last_copy_element, std::shared_ptr final_elem, const uint32_t final_elem_index) + std::shared_ptr final_elem, const uint32_t final_elem_index) { auto metadata = std::dynamic_pointer_cast(op_metadata); assert(nullptr != metadata); - auto fill_nms_format_element = FillNmsFormatElement::create(metadata->nms_info(), output_format, metadata->nms_config(), + auto fill_nms_format_element = FillNmsFormatElement::create(metadata->nms_config(), PipelineObject::create_element_name(element_name, output_stream_name, stream_index), - async_pipeline->get_build_params(), PipelineDirection::PUSH, is_last_copy_element, async_pipeline); + async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline); CHECK_EXPECTED(fill_nms_format_element); async_pipeline->add_element_to_pipeline(fill_nms_format_element.value()); @@ -261,11 +274,11 @@ Expected> PipelineBuilder::add_fill_nms_fo return fill_nms_format_element; } -Expected> PipelineBuilder::add_last_async_element(std::shared_ptr async_pipeline, - const std::string &output_format_name, std::shared_ptr final_elem, const uint32_t final_elem_source_index) +Expected> AsyncPipelineBuilder::add_last_async_element(std::shared_ptr async_pipeline, + const std::string &output_format_name, size_t frame_size, std::shared_ptr final_elem, const uint32_t final_elem_source_index) { - auto last_async_element = LastAsyncElement::create(PipelineObject::create_element_name("LastAsyncElement", - final_elem->name(), static_cast(final_elem_source_index)), async_pipeline->get_build_params(), async_pipeline); + auto last_async_element = LastAsyncElement::create(PipelineObject::create_element_name("LastAsyncEl", + final_elem->name(), static_cast(final_elem_source_index)), async_pipeline->get_build_params(), frame_size, async_pipeline); CHECK_EXPECTED(last_async_element); async_pipeline->add_element_to_pipeline(last_async_element.value()); @@ -276,7 +289,7 @@ Expected> PipelineBuilder::add_last_async_elem return last_async_element.release(); } -Expected> PipelineBuilder::get_output_format_from_edge_info_name(const std::string &edge_info_name, +Expected> AsyncPipelineBuilder::get_output_format_from_edge_info_name(const std::string &edge_info_name, const std::unordered_map &outputs_formats) { for (auto &output_format : outputs_formats) { @@ -287,19 +300,21 @@ Expected> PipelineBuilder::get_output_for return make_unexpected(HAILO_NOT_FOUND); } -hailo_status PipelineBuilder::add_output_demux_flow(const std::string &output_stream_name, std::shared_ptr async_pipeline, +hailo_status AsyncPipelineBuilder::add_output_demux_flow(const std::string &output_stream_name, std::shared_ptr async_pipeline, const std::unordered_map &outputs_formats, std::shared_ptr net_group, const std::unordered_map &named_stream_infos) { + CHECK(contains(named_stream_infos, output_stream_name), HAILO_INTERNAL_FAILURE); + const auto &stream_info = named_stream_infos.at(output_stream_name); + auto source_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(output_stream_name); CHECK_EXPECTED_AS_STATUS(source_index); - const bool is_dma_able_hw_async = true; - // async_hw element must be filled with buffers like an edge element in order to support large batch sizes - auto status = async_pipeline->get_async_hw_element()->fill_buffer_pool(is_dma_able_hw_async, async_pipeline->get_build_params().buffer_pool_size_edges, *source_index); - CHECK_SUCCESS(status); - CHECK(contains(named_stream_infos, output_stream_name), HAILO_INTERNAL_FAILURE); - const auto &stream_info = named_stream_infos.at(output_stream_name); + auto is_empty = false; + auto interacts_with_hw = true; + auto hw_queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_post_hw", stream_info.name, stream_info.index), + async_pipeline, stream_info.hw_frame_size, is_empty, interacts_with_hw, async_pipeline->get_async_hw_element(), *source_index); + CHECK_EXPECTED_AS_STATUS(hw_queue_elem); auto layer_info = net_group->get_layer_info(output_stream_name); CHECK_EXPECTED_AS_STATUS(layer_info); @@ -311,20 +326,22 @@ hailo_status PipelineBuilder::add_output_demux_flow(const std::string &output_st CHECK_ARG_NOT_NULL(demuxer_ptr); auto demux_elem = TransformDemuxElement::create(demuxer_ptr, - PipelineObject::create_element_name("TransformDemuxElement", output_stream_name, stream_info.index), - async_pipeline->get_build_params(), PipelineDirection::PUSH, true, async_pipeline); + PipelineObject::create_element_name("TransformDemuxEl", output_stream_name, stream_info.index), + async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline); CHECK_EXPECTED_AS_STATUS(demux_elem); async_pipeline->add_element_to_pipeline(demux_elem.value()); - CHECK_SUCCESS(PipelinePad::link_pads(async_pipeline->get_async_hw_element(), demux_elem.value(), *source_index, 0)); + CHECK_SUCCESS(PipelinePad::link_pads(hw_queue_elem.value(), demux_elem.value())); uint8_t i = 0; for (auto &edge_info : demuxer_ptr->get_edges_stream_info()) { auto output_format_expected = get_output_format_from_edge_info_name(edge_info.name, outputs_formats); CHECK_EXPECTED_AS_STATUS(output_format_expected); - auto demux_queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_demux", edge_info.name, i), async_pipeline, - demux_elem.value(), i); + is_empty = false; + interacts_with_hw = false; + auto demux_queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQEl_demux", edge_info.name, i), async_pipeline, + edge_info.hw_frame_size, is_empty, interacts_with_hw, demux_elem.value(), i); CHECK_EXPECTED_AS_STATUS(demux_queue_elem); auto should_transform = OutputTransformContext::is_transformation_required(edge_info.hw_shape, @@ -332,17 +349,20 @@ hailo_status PipelineBuilder::add_output_demux_flow(const std::string &output_st CHECK_EXPECTED_AS_STATUS(should_transform); if (should_transform.value()) { - status = demux_elem.value()->fill_buffer_pool(false, async_pipeline->get_build_params().buffer_pool_size_internal, i); - CHECK_SUCCESS(status); - auto post_infer_elem = add_post_infer_element(output_format_expected.value().second, edge_info.nms_info, - async_pipeline, edge_info.hw_shape, edge_info.format, edge_info.shape, {edge_info.quant_info}, true, demux_queue_elem.value()); + async_pipeline, edge_info.hw_shape, edge_info.format, edge_info.shape, {edge_info.quant_info}, demux_queue_elem.value()); CHECK_EXPECTED_AS_STATUS(post_infer_elem); - auto last_async_element = add_last_async_element(async_pipeline, output_format_expected.value().first, post_infer_elem.value()); + auto post_transform_frame_size = (HailoRTCommon::is_nms(edge_info.format.order)) ? + HailoRTCommon::get_nms_host_frame_size(edge_info.nms_info, output_format_expected.value().second) : + HailoRTCommon::get_frame_size(edge_info.shape, output_format_expected.value().second); + + auto last_async_element = add_last_async_element(async_pipeline, output_format_expected.value().first, post_transform_frame_size, + post_infer_elem.value()); CHECK_EXPECTED_AS_STATUS(last_async_element); } else { - auto last_async_element = add_last_async_element(async_pipeline, output_format_expected.value().first, demux_queue_elem.value()); + auto last_async_element = add_last_async_element(async_pipeline, output_format_expected.value().first, edge_info.hw_frame_size, + demux_queue_elem.value()); CHECK_EXPECTED_AS_STATUS(last_async_element); } i++; @@ -350,7 +370,7 @@ hailo_status PipelineBuilder::add_output_demux_flow(const std::string &output_st return HAILO_SUCCESS; } -Expected PipelineBuilder::should_transform(const hailo_stream_info_t &stream_info, const std::vector &stream_quant_infos, +Expected AsyncPipelineBuilder::should_transform(const hailo_stream_info_t &stream_info, const std::vector &stream_quant_infos, const hailo_format_t &output_format) { auto should_transform = OutputTransformContext::is_transformation_required(stream_info.hw_shape, @@ -359,19 +379,10 @@ Expected PipelineBuilder::should_transform(const hailo_stream_info_t &stre return should_transform.release(); } -hailo_status PipelineBuilder::add_nms_fuse_flow(const std::vector &output_streams_names, +hailo_status AsyncPipelineBuilder::add_nms_fuse_flow(const std::vector &output_streams_names, const std::pair &output_format, std::shared_ptr async_pipeline, const std::unordered_map &named_stream_infos) { - const bool is_dma_able_hw_async = true; - for (const auto &stream_name : output_streams_names) { - auto output_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(stream_name); - CHECK_EXPECTED_AS_STATUS(output_index); - // async_hw element must be filled with buffers like an edge element in order to support large batch sizes - auto status = async_pipeline->get_async_hw_element()->fill_buffer_pool(is_dma_able_hw_async, async_pipeline->get_build_params().buffer_pool_size_edges, *output_index); - CHECK_SUCCESS(status); - } - std::vector nms_infos; nms_infos.reserve(output_streams_names.size()); hailo_stream_info_t first_defused_stream_info = {}; @@ -388,9 +399,8 @@ hailo_status PipelineBuilder::add_nms_fuse_flow(const std::vector & // To get the fused layer name and src stream format, we use the stream info of one of the defuses auto fused_layer_name = first_defused_stream_info.nms_info.defuse_info.original_name; - bool is_last_copy_element = true; - auto nms_elem = NmsMuxElement::create(nms_infos, PipelineObject::create_element_name("NmsMuxElement", fused_layer_name, 0), - async_pipeline->get_build_params(), PipelineDirection::PUSH, is_last_copy_element, async_pipeline); + auto nms_elem = NmsMuxElement::create(nms_infos, PipelineObject::create_element_name("NmsMuxEl", fused_layer_name, 0), + async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline); CHECK_EXPECTED_AS_STATUS(nms_elem); async_pipeline->add_element_to_pipeline(nms_elem.value()); @@ -403,8 +413,11 @@ hailo_status PipelineBuilder::add_nms_fuse_flow(const std::vector & auto output_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(stream_name); CHECK_EXPECTED_AS_STATUS(output_index); - auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_nms_source", curr_stream_info.name, curr_stream_info.index), - async_pipeline, async_pipeline->get_async_hw_element(), output_index.value()); + auto is_empty = false; + auto interacts_with_hw = true; + auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQEl_nms", curr_stream_info.name, curr_stream_info.index), + async_pipeline, curr_stream_info.hw_frame_size, is_empty, interacts_with_hw, + async_pipeline->get_async_hw_element(), output_index.value()); CHECK_EXPECTED_AS_STATUS(queue_elem); CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), nms_elem.value(), 0, i)); @@ -414,30 +427,23 @@ hailo_status PipelineBuilder::add_nms_fuse_flow(const std::vector & // TODO(HRT-11078): Fix multi qp for fused NMS auto stream_quant_infos = std::vector(1, first_defused_stream_info.quant_info); - auto should_transform_expected = should_transform(first_defused_stream_info, stream_quant_infos, output_format.second); - CHECK_EXPECTED_AS_STATUS(should_transform_expected); - - if (should_transform_expected.value()) { - auto fused_layer_nms_info = nms_elem.value()->get_fused_nms_info(); + // On NMS models we always need tp post-infer + auto fused_layer_nms_info = nms_elem.value()->get_fused_nms_info(); - hailo_status status = nms_elem.value()->fill_buffer_pool(false, async_pipeline->get_build_params().buffer_pool_size_internal, first_defused_stream_info.name); - CHECK_SUCCESS(status); + auto post_infer_elem = add_post_infer_element(output_format.second, fused_layer_nms_info, async_pipeline, + first_defused_stream_info.hw_shape, first_defused_stream_info.format, first_defused_stream_info.shape, stream_quant_infos, nms_elem.value()); + CHECK_EXPECTED_AS_STATUS(post_infer_elem); - auto post_infer_elem = add_post_infer_element(output_format.second, fused_layer_nms_info, async_pipeline, - first_defused_stream_info.hw_shape, first_defused_stream_info.format, first_defused_stream_info.shape, stream_quant_infos, true, nms_elem.value()); - CHECK_EXPECTED_AS_STATUS(post_infer_elem); + auto post_transform_frame_size = HailoRTCommon::get_nms_host_frame_size(fused_layer_nms_info, output_format.second); - auto last_async_element = add_last_async_element(async_pipeline, output_format.first, post_infer_elem.value()); - CHECK_EXPECTED_AS_STATUS(last_async_element); - } else { - auto last_async_element = add_last_async_element(async_pipeline, output_format.first, nms_elem.value()); - CHECK_EXPECTED_AS_STATUS(last_async_element); - } + auto last_async_element = add_last_async_element(async_pipeline, output_format.first, post_transform_frame_size, + post_infer_elem.value()); + CHECK_EXPECTED_AS_STATUS(last_async_element); return HAILO_SUCCESS; } -hailo_status PipelineBuilder::add_softmax_flow(std::shared_ptr async_pipeline, const std::vector &output_streams_names, +hailo_status AsyncPipelineBuilder::add_softmax_flow(std::shared_ptr async_pipeline, const std::vector &output_streams_names, const std::pair &output_format, const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata, const std::unordered_map &named_stream_infos) { @@ -459,11 +465,14 @@ hailo_status PipelineBuilder::add_softmax_flow(std::shared_ptr as auto stream_quant_infos = std::vector(1, stream_info.quant_info); auto post_infer_elem = add_post_infer_element(output_format_expanded, {}, async_pipeline, stream_info.hw_shape, stream_info.format, - stream_info.shape, stream_quant_infos, false, async_pipeline->get_async_hw_element(), hw_async_elem_index.value()); + stream_info.shape, stream_quant_infos, async_pipeline->get_async_hw_element(), hw_async_elem_index.value()); CHECK_EXPECTED_AS_STATUS(post_infer_elem); - auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_softmax", async_pipeline->get_async_hw_element()->name(), - static_cast(hw_async_elem_index.value())), async_pipeline, post_infer_elem.value()); + auto is_empty = false; + auto interacts_with_hw = false; + const auto post_transform_frame_size = HailoRTCommon::get_frame_size(stream_info.shape, output_format_expanded); + auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQEl_softmax", async_pipeline->get_async_hw_element()->name(), + static_cast(hw_async_elem_index.value())), async_pipeline, post_transform_frame_size, is_empty, interacts_with_hw, post_infer_elem.value()); CHECK_EXPECTED_AS_STATUS(queue_elem); // Updating metadata according to user request @@ -483,20 +492,21 @@ hailo_status PipelineBuilder::add_softmax_flow(std::shared_ptr as auto softmax_op = op_expected.release(); auto softmax_element = SoftmaxPostProcessElement::create(softmax_op, - PipelineObject::create_element_name("SoftmaxPostProcessElement", stream_name, stream_info.index), - async_pipeline->get_build_params(), PipelineDirection::PUSH, true, async_pipeline); + PipelineObject::create_element_name("SoftmaxPPEl", stream_name, stream_info.index), + async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline); CHECK_EXPECTED_AS_STATUS(softmax_element); async_pipeline->add_element_to_pipeline(softmax_element.value()); CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), softmax_element.value())); - auto last_async_element = add_last_async_element(async_pipeline, updated_output_format.first, softmax_element.value()); + auto last_async_element = add_last_async_element(async_pipeline, updated_output_format.first, post_transform_frame_size, + softmax_element.value()); CHECK_EXPECTED_AS_STATUS(last_async_element); return HAILO_SUCCESS; } -hailo_status PipelineBuilder::add_argmax_flow(std::shared_ptr async_pipeline, const std::vector &output_streams_names, +hailo_status AsyncPipelineBuilder::add_argmax_flow(std::shared_ptr async_pipeline, const std::vector &output_streams_names, const std::pair &output_format, const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata, const std::unordered_map &named_stream_infos) { @@ -509,14 +519,17 @@ hailo_status PipelineBuilder::add_argmax_flow(std::shared_ptr asy auto hw_async_elem_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(stream_name); CHECK_EXPECTED_AS_STATUS(hw_async_elem_index); - auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_argmax", async_pipeline->get_async_hw_element()->name(), - static_cast(hw_async_elem_index.value())), async_pipeline, async_pipeline->get_async_hw_element()); + auto is_empty = false; + auto interacts_with_hw = true; + auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQEl_argmax", async_pipeline->get_async_hw_element()->name(), + static_cast(hw_async_elem_index.value())), async_pipeline, stream_info.hw_frame_size, is_empty, interacts_with_hw, + async_pipeline->get_async_hw_element(), hw_async_elem_index.value()); CHECK_EXPECTED_AS_STATUS(queue_elem); // Updating metadata according to user request auto op_input_format = argmax_op_metadata->inputs_metadata().begin()->second.format; auto updated_outputs_metadata = argmax_op_metadata.get()->outputs_metadata(); - updated_outputs_metadata.begin()->second.format = net_flow::ArgmaxOpMetadata::expand_output_format_autos(output_format.second, op_input_format);; + updated_outputs_metadata.begin()->second.format = net_flow::ArgmaxOpMetadata::expand_output_format_autos(output_format.second, op_input_format); auto metadata = std::dynamic_pointer_cast(argmax_op_metadata); assert(nullptr != metadata); metadata->set_outputs_metadata(updated_outputs_metadata); @@ -525,23 +538,26 @@ hailo_status PipelineBuilder::add_argmax_flow(std::shared_ptr asy auto op_expected = net_flow::ArgmaxPostProcessOp::create(metadata); CHECK_EXPECTED_AS_STATUS(op_expected); auto argmax_op = op_expected.release(); - bool is_last_copy_element = true; auto argmax_element = ArgmaxPostProcessElement::create(argmax_op, - PipelineObject::create_element_name("ArgmaxPostProcessElement", stream_name, stream_info.index), - async_pipeline->get_build_params(), PipelineDirection::PUSH, is_last_copy_element, async_pipeline); + PipelineObject::create_element_name("ArgmaxPPEl", stream_name, stream_info.index), + async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline); CHECK_EXPECTED_AS_STATUS(argmax_element); async_pipeline->add_element_to_pipeline(argmax_element.value()); CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), argmax_element.value())); - auto last_async_element = add_last_async_element(async_pipeline, output_format.first, argmax_element.value()); + const auto post_transform_frame_size = HailoRTCommon::get_frame_size(updated_outputs_metadata.begin()->second.shape, + updated_outputs_metadata.begin()->second.format); + + auto last_async_element = add_last_async_element(async_pipeline, output_format.first, post_transform_frame_size, + argmax_element.value()); CHECK_EXPECTED_AS_STATUS(last_async_element); return HAILO_SUCCESS; } -hailo_status PipelineBuilder::add_nms_flow(std::shared_ptr async_pipeline, const std::vector &output_streams_names, +hailo_status AsyncPipelineBuilder::add_nms_flow(std::shared_ptr async_pipeline, const std::vector &output_streams_names, const std::pair &output_format, const std::shared_ptr &nms_op, const hailo_vstream_info_t &vstream_info, const std::unordered_map &named_stream_infos) { @@ -549,10 +565,15 @@ hailo_status PipelineBuilder::add_nms_flow(std::shared_ptr async_ CHECK(contains(named_stream_infos, first_stream_name), HAILO_INTERNAL_FAILURE); const auto &first_stream_info = named_stream_infos.at(first_stream_name); + auto nms_op_metadata = std::dynamic_pointer_cast(nms_op->metadata()); + assert(nullptr != nms_op_metadata); + CHECK(output_format.second.type == HAILO_FORMAT_TYPE_FLOAT32, HAILO_INVALID_ARGUMENT, "NMS output format type must be HAILO_FORMAT_TYPE_FLOAT32"); - CHECK(HailoRTCommon::is_nms(output_format.second.order), HAILO_INVALID_ARGUMENT, - "NMS output format order must be HAILO_FORMAT_ORDER_HAILO_NMS or HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK"); + if(!nms_op_metadata->nms_config().bbox_only){ + CHECK(HailoRTCommon::is_nms(output_format.second.order), HAILO_INVALID_ARGUMENT, + "NMS output format order must be HAILO_FORMAT_ORDER_HAILO_NMS or HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK"); + } std::unordered_map inputs_metadata; std::unordered_map outputs_metadata; @@ -580,8 +601,8 @@ hailo_status PipelineBuilder::add_nms_flow(std::shared_ptr async_ }; outputs_metadata.insert({nms_op->outputs_metadata().begin()->first, output_metadata}); - auto nms_elem = NmsPostProcessMuxElement::create(nms_op, PipelineObject::create_element_name("NmsPostProcessMuxElement", nms_op->get_name(), 0), - async_pipeline->get_build_params(), PipelineDirection::PUSH, true, async_pipeline); + auto nms_elem = NmsPostProcessMuxElement::create(nms_op, PipelineObject::create_element_name("NmsPPMuxEl", nms_op->get_name(), 0), + async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline); CHECK_EXPECTED_AS_STATUS(nms_elem); async_pipeline->add_element_to_pipeline(nms_elem.value()); @@ -608,20 +629,30 @@ hailo_status PipelineBuilder::add_nms_flow(std::shared_ptr async_ auto source_id = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(curr_stream_name); CHECK_EXPECTED_AS_STATUS(source_id); - auto nms_source_queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_nms_source", curr_stream_info.name, curr_stream_info.index), - async_pipeline, async_pipeline->get_async_hw_element(), source_id.value()); + auto is_empty = false; + auto interacts_with_hw = true; + auto nms_source_queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQEl_nms", curr_stream_info.name, + curr_stream_info.index), async_pipeline, curr_stream_info.hw_frame_size, is_empty, interacts_with_hw, + async_pipeline->get_async_hw_element(), source_id.value()); CHECK_EXPECTED_AS_STATUS(nms_source_queue_elem); CHECK_SUCCESS(PipelinePad::link_pads(nms_source_queue_elem.value(), nms_elem.value(), 0, i)); nms_elem.value()->add_sink_name(curr_stream_name); } - auto last_async_element = add_last_async_element(async_pipeline, output_format.first, nms_elem.value()); + uint32_t post_transform_frame_size; + if(nms_op_metadata->nms_config().bbox_only){ + post_transform_frame_size = HailoRTCommon::get_frame_size(vstream_info, output_format.second); + } else { + post_transform_frame_size = HailoRTCommon::get_nms_host_frame_size(vstream_info.nms_shape, output_format.second); + } + auto last_async_element = add_last_async_element(async_pipeline, output_format.first, post_transform_frame_size, + nms_elem.value()); CHECK_EXPECTED_AS_STATUS(last_async_element); return HAILO_SUCCESS; } -hailo_status PipelineBuilder::add_iou_flow( std::shared_ptr async_pipeline, const std::vector &output_streams_names, +hailo_status AsyncPipelineBuilder::add_iou_flow( std::shared_ptr async_pipeline, const std::vector &output_streams_names, const std::pair &output_format, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata, const std::unordered_map &named_stream_infos) { @@ -630,58 +661,62 @@ hailo_status PipelineBuilder::add_iou_flow( std::shared_ptr async CHECK(contains(named_stream_infos, output_stream_name), HAILO_INTERNAL_FAILURE); const auto &output_stream_info = named_stream_infos.at(output_stream_name); - auto output_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(output_stream_name); - CHECK_EXPECTED_AS_STATUS(output_index); - - auto hw_read_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_hw_read", output_stream_name, output_stream_info.index), - async_pipeline, async_pipeline->get_async_hw_element() , output_index.value()); - CHECK_EXPECTED_AS_STATUS(hw_read_queue_element); - // TODO (HRT-11078): Fix multi qp for PP auto stream_quant_infos = std::vector(1, output_stream_info.quant_info); //output_stream_base->get_quant_infos(); auto post_infer_element = add_post_infer_element(output_format.second, output_stream_info.nms_info, - async_pipeline, output_stream_info.hw_shape, output_stream_info.format, output_stream_info.shape, stream_quant_infos, false, hw_read_queue_element.value()); + async_pipeline, output_stream_info.hw_shape, output_stream_info.format, output_stream_info.shape, stream_quant_infos, + async_pipeline->get_async_hw_element()); CHECK_EXPECTED_AS_STATUS(post_infer_element); - auto pre_nms_convert_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_pre_nms_convert", output_stream_name, - output_stream_info.index), async_pipeline, post_infer_element.value()); + auto is_empty = false; + auto interacts_with_hw = false; + const auto post_transform_frame_size = HailoRTCommon::get_nms_host_frame_size(output_stream_info.nms_info, output_format.second); + auto pre_nms_convert_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQEl_pre_nms_convert", output_stream_name, + output_stream_info.index), async_pipeline, post_transform_frame_size, is_empty, interacts_with_hw, post_infer_element.value()); CHECK_EXPECTED_AS_STATUS(pre_nms_convert_queue_element); auto nms_to_detections_element = add_nms_to_detections_convert_element(async_pipeline, output_stream_name, output_stream_info.index, - "NmsFormatToDetectionsElement", iou_op_metadata, false, pre_nms_convert_queue_element.value()); + "NmsFormatToDetectionsEl", iou_op_metadata, pre_nms_convert_queue_element.value()); CHECK_EXPECTED_AS_STATUS(nms_to_detections_element); - auto pre_remove_overlapping_bboxes_element_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_pre_bboxes_removing", - output_stream_name, output_stream_info.index), async_pipeline, nms_to_detections_element.value()); + auto pre_remove_overlapping_bboxes_element_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQEl_pre_bboxes_removing", + output_stream_name, output_stream_info.index), async_pipeline, 0, is_empty, interacts_with_hw, nms_to_detections_element.value()); CHECK_EXPECTED_AS_STATUS(pre_remove_overlapping_bboxes_element_queue_element); auto remove_overlapping_bboxes_element = add_remove_overlapping_bboxes_element(async_pipeline, output_stream_name, output_stream_info.index, - "RemoveOverlappingBboxesElement", iou_op_metadata, false, pre_remove_overlapping_bboxes_element_queue_element.value()); + "RemoveOverlappingBboxesEl", iou_op_metadata, pre_remove_overlapping_bboxes_element_queue_element.value()); CHECK_EXPECTED_AS_STATUS(remove_overlapping_bboxes_element); - auto pre_fill_nms_format_element_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_pre_fill_nms_format", output_stream_name, - output_stream_info.index), async_pipeline, remove_overlapping_bboxes_element.value()); + auto pre_fill_nms_format_element_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQEl_pre_fill_nms_format", + output_stream_name, output_stream_info.index), async_pipeline, 0, is_empty, interacts_with_hw, remove_overlapping_bboxes_element.value()); CHECK_EXPECTED_AS_STATUS(pre_fill_nms_format_element_queue_element); auto fill_nms_format_element = add_fill_nms_format_element(async_pipeline, output_stream_name, output_stream_info.index, - "FillNmsFormatElement", iou_op_metadata, output_format.second, true, pre_fill_nms_format_element_queue_element.value()); + "FillNmsFormatEl", iou_op_metadata, pre_fill_nms_format_element_queue_element.value()); CHECK_EXPECTED_AS_STATUS(fill_nms_format_element); - auto last_async_element = add_last_async_element(async_pipeline, output_format.first, fill_nms_format_element.value()); + auto output_vstream_info = iou_op_metadata->get_output_vstream_info(); + CHECK_EXPECTED_AS_STATUS(output_vstream_info); + const auto final_frame_size = HailoRTCommon::get_frame_size(*output_vstream_info, output_format.second); + + auto last_async_element = add_last_async_element(async_pipeline, output_format.first, final_frame_size, fill_nms_format_element.value()); CHECK_EXPECTED_AS_STATUS(last_async_element); return HAILO_SUCCESS; } -hailo_status PipelineBuilder::add_nms_flows(std::shared_ptr async_pipeline, const std::vector &output_streams_names, +hailo_status AsyncPipelineBuilder::add_nms_flows(std::shared_ptr async_pipeline, const std::vector &output_streams_names, const std::pair &output_format, const net_flow::PostProcessOpMetadataPtr &op_metadata, const std::vector &vstreams_infos, const std::unordered_map &named_stream_infos) { assert(1 <= op_metadata->outputs_metadata().size()); auto updated_outputs_metadata = op_metadata->outputs_metadata(); + auto nms_metadata = std::dynamic_pointer_cast(op_metadata); + assert(nullptr != nms_metadata); std::pair expanded_output_format = {output_format.first, - net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(output_format.second, op_metadata->type())}; + net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(output_format.second, op_metadata->type(), + nms_metadata->nms_config().bbox_only)}; updated_outputs_metadata.begin()->second.format = expanded_output_format.second; op_metadata->set_outputs_metadata(updated_outputs_metadata); @@ -714,10 +749,19 @@ hailo_status PipelineBuilder::add_nms_flows(std::shared_ptr async { auto metadata = std::dynamic_pointer_cast(op_metadata); assert(nullptr != metadata); - auto op_expected = net_flow::YOLOv5PostProcessOp::create(metadata); - CHECK_EXPECTED_AS_STATUS(op_expected); - op = op_expected.release(); - break; + if (metadata->nms_config().bbox_only) { + auto bbox_only_metadata = std::dynamic_pointer_cast(op_metadata); + assert(nullptr != bbox_only_metadata); + auto op_expected = net_flow::YOLOv5BboxOnlyPostProcessOp::create(bbox_only_metadata); + CHECK_EXPECTED(op_expected); + op = op_expected.release(); + break; + } else { + auto op_expected = net_flow::YOLOv5PostProcessOp::create(metadata); + CHECK_EXPECTED_AS_STATUS(op_expected); + op = op_expected.release(); + break; + } } case (net_flow::OperationType::YOLOV5SEG): { @@ -749,20 +793,11 @@ hailo_status PipelineBuilder::add_nms_flows(std::shared_ptr async return add_nms_flow(async_pipeline, output_streams_names, expanded_output_format, op, output_vstream_info, named_stream_infos); } -hailo_status PipelineBuilder::add_ops_flows(std::shared_ptr async_pipeline, +hailo_status AsyncPipelineBuilder::add_ops_flows(std::shared_ptr async_pipeline, const std::pair &output_format, net_flow::PostProcessOpMetadataPtr &op_metadata, const std::vector &output_streams_names, const std::vector &vstreams_infos, const std::unordered_map &named_stream_infos) { - const bool is_dma_able_hw_async = true; - for (const auto &stream_name : output_streams_names) { - auto source_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(stream_name); - CHECK_EXPECTED_AS_STATUS(source_index); - // async_hw element must be filled with buffers like an edge element in order to support large batch sizes - auto status = async_pipeline->get_async_hw_element()->fill_buffer_pool(is_dma_able_hw_async, async_pipeline->get_build_params().buffer_pool_size_edges, *source_index); - CHECK_SUCCESS(status); - } - switch (op_metadata->type()) { case net_flow::OperationType::YOLOX: case net_flow::OperationType::YOLOV8: @@ -784,7 +819,7 @@ hailo_status PipelineBuilder::add_ops_flows(std::shared_ptr async } } -hailo_status PipelineBuilder::create_post_async_hw_elements(std::shared_ptr net_group, +hailo_status AsyncPipelineBuilder::create_post_async_hw_elements(std::shared_ptr net_group, const std::unordered_map &expanded_outputs_formats, std::unordered_map &original_outputs_formats, const std::unordered_map &named_stream_infos, std::shared_ptr async_pipeline) { @@ -852,19 +887,20 @@ hailo_status PipelineBuilder::create_post_async_hw_elements(std::shared_ptrget_async_hw_element()->fill_buffer_pool(true, async_pipeline->get_build_params().buffer_pool_size_edges, *final_elem_source_index); - CHECK_SUCCESS(status); - auto post_infer_elem = add_post_infer_element(output_format.second, first_stream_info.nms_info, async_pipeline, first_stream_info.hw_shape, - first_stream_info.format, first_stream_info.shape, stream_quant_infos, true, async_pipeline->get_async_hw_element(), final_elem_source_index.value()); + first_stream_info.format, first_stream_info.shape, stream_quant_infos, async_pipeline->get_async_hw_element(), final_elem_source_index.value()); CHECK_EXPECTED_AS_STATUS(post_infer_elem); - auto last_async_element = add_last_async_element(async_pipeline, output_format.first, post_infer_elem.value()); + auto post_transform_frame_size = (HailoRTCommon::is_nms(first_stream_info.format.order)) ? + HailoRTCommon::get_nms_host_frame_size(first_stream_info.nms_info, output_format.second) : + HailoRTCommon::get_frame_size(first_stream_info.shape, output_format.second); + + auto last_async_element = add_last_async_element(async_pipeline, output_format.first, post_transform_frame_size, + post_infer_elem.value()); CHECK_EXPECTED_AS_STATUS(last_async_element); } else { - auto last_async_element = add_last_async_element(async_pipeline, output_format.first, async_pipeline->get_async_hw_element(), - final_elem_source_index.value()); + auto last_async_element = add_last_async_element(async_pipeline, output_format.first, first_stream_info.hw_frame_size, + async_pipeline->get_async_hw_element(), final_elem_source_index.value()); CHECK_EXPECTED_AS_STATUS(last_async_element); } } @@ -872,7 +908,7 @@ hailo_status PipelineBuilder::create_post_async_hw_elements(std::shared_ptr> PipelineBuilder::create_pipeline(std::shared_ptr net_group, +Expected> AsyncPipelineBuilder::create_pipeline(std::shared_ptr net_group, const std::unordered_map &inputs_formats, const std::unordered_map &outputs_formats, const uint32_t timeout, std::shared_ptr> pipeline_status) @@ -880,7 +916,7 @@ Expected> PipelineBuilder::create_pipeline(std::s std::unordered_map> entry_elements; std::unordered_map> last_elements; - ElementBuildParams build_params; + ElementBuildParams build_params {}; // Buffer pool sizes for pipeline elements should be: // * The minimum of the maximum queue size of all LL streams (input and output) - for edge elements @@ -921,13 +957,9 @@ Expected> PipelineBuilder::create_pipeline(std::s async_pipeline->set_build_params(build_params); - // all elements in async pipeline start as last elements, and in the end of this func all non-last-copy elements will be added buffers - bool is_last_copy_element = true; - auto async_hw_elem = AsyncHwElement::create(named_stream_infos, build_params.timeout, - build_params.buffer_pool_size_edges, build_params.elem_stats_flags, build_params.vstream_stats_flags, - build_params.shutdown_event, "AsyncHwElement", build_params.pipeline_status, net_group, - PipelineDirection::PUSH, is_last_copy_element, async_pipeline); + build_params.elem_stats_flags, "AsyncHwEl", build_params.pipeline_status, net_group, + PipelineDirection::PUSH, async_pipeline); CHECK_EXPECTED(async_hw_elem); async_pipeline->add_element_to_pipeline(async_hw_elem.value()); async_pipeline->set_async_hw_element(async_hw_elem.release()); @@ -940,10 +972,23 @@ Expected> PipelineBuilder::create_pipeline(std::s async_pipeline); CHECK_SUCCESS_AS_EXPECTED(status); + print_pipeline_elements_info(async_pipeline); + return async_pipeline; } -Expected> PipelineBuilder::create_multi_plane_splitter_element(const std::string &input_name, +void AsyncPipelineBuilder::print_pipeline_elements_info(std::shared_ptr async_pipeline) +{ + auto async_entry_elements = async_pipeline->get_entry_elements(); + std::vector visited_elements; + visited_elements.reserve(async_pipeline->get_pipeline().size()); + + for (auto &element : async_entry_elements) { + element.second->print_deep_description(visited_elements); + } +} + +Expected> AsyncPipelineBuilder::create_multi_plane_splitter_element(const std::string &input_name, hailo_format_order_t order, std::shared_ptr> pipeline_status, std::shared_ptr async_pipeline) { CHECK_AS_EXPECTED((HAILO_FORMAT_ORDER_NV12 == order) || (HAILO_FORMAT_ORDER_NV21 == order) || (HAILO_FORMAT_ORDER_I420 == order), @@ -953,7 +998,7 @@ Expected> PipelineBuilder::create_multi_plane_ auto duration_collector_expected = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE); CHECK_EXPECTED(duration_collector_expected); - auto planes_splitter = PixBufferElement::create(PipelineObject::create_element_name("PixBufferElement", + auto planes_splitter = PixBufferElement::create(PipelineObject::create_element_name("PixBufEl", input_name, 0), std::chrono::milliseconds(HAILO_INFINITE), duration_collector_expected.release(), pipeline_status, order, async_pipeline); CHECK_EXPECTED(planes_splitter); diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline_builder.hpp b/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.hpp similarity index 89% rename from hailort/libhailort/src/net_flow/pipeline/pipeline_builder.hpp rename to hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.hpp index 1ddf71c9..6120fe5e 100644 --- a/hailort/libhailort/src/net_flow/pipeline/pipeline_builder.hpp +++ b/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.hpp @@ -3,12 +3,12 @@ * Distributed under the MIT license (https://opensource.org/licenses/MIT) **/ /** - * @file pipeline_builder.hpp + * @file async_pipeline_builder.hpp * @brief Async Pipeline Builder **/ -#ifndef _HAILO_PIPELINE_BUILDER_HPP_ -#define _HAILO_PIPELINE_BUILDER_HPP_ +#ifndef _HAILO_ASYNC_PIPELINE_BUILDER_HPP_ +#define _HAILO_ASYNC_PIPELINE_BUILDER_HPP_ #include "hailo/hailort.h" #include "network_group/network_group_internal.hpp" @@ -20,10 +20,10 @@ namespace hailort { -class PipelineBuilder final +class AsyncPipelineBuilder final { public: - PipelineBuilder() = delete; + AsyncPipelineBuilder() = delete; static Expected> create_pipeline(std::shared_ptr net_group, const std::unordered_map &inputs_formats, @@ -74,28 +74,31 @@ class PipelineBuilder final static Expected> add_post_infer_element(const hailo_format_t &output_format, const hailo_nms_info_t &nms_info, std::shared_ptr async_pipeline, const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, - const hailo_3d_image_shape_t &dst_image_shape, const std::vector &dst_quant_infos, bool is_last_copy_element, + const hailo_3d_image_shape_t &dst_image_shape, const std::vector &dst_quant_infos, std::shared_ptr final_elem, const uint32_t final_elem_source_index = 0); static Expected> add_last_async_element(std::shared_ptr async_pipeline, - const std::string &output_format_name, std::shared_ptr final_elem, const uint32_t final_elem_source_index = 0); + const std::string &output_format_name, size_t frame_size, std::shared_ptr final_elem, const uint32_t final_elem_source_index = 0); static Expected> add_push_queue_element(const std::string &queue_name, std::shared_ptr async_pipeline, - std::shared_ptr final_elem, const uint32_t final_elem_source_index = 0); + size_t frame_size, bool is_empty, bool interacts_with_hw, std::shared_ptr final_elem, const uint32_t final_elem_source_index = 0, + bool is_entry = false); static Expected> add_nms_to_detections_convert_element(std::shared_ptr async_pipeline, const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata, - const bool is_last_copy_element, std::shared_ptr final_elem, const uint32_t final_elem_source_index = 0); + std::shared_ptr final_elem, const uint32_t final_elem_source_index = 0); static Expected> add_remove_overlapping_bboxes_element(std::shared_ptr async_pipeline, const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata, - const bool is_last_copy_element, std::shared_ptr final_elem, const uint32_t final_elem_source_index = 0); + std::shared_ptr final_elem, const uint32_t final_elem_source_index = 0); static Expected> add_fill_nms_format_element(std::shared_ptr async_pipeline, const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata, - const hailo_format_t &output_format, const bool is_last_copy_element, std::shared_ptr final_elem, const uint32_t final_elem_source_index = 0); + std::shared_ptr final_elem, const uint32_t final_elem_source_index = 0); static Expected> create_multi_plane_splitter_element(const std::string &input_name, hailo_format_order_t order, std::shared_ptr> pipeline_status, std::shared_ptr async_pipeline); static Expected should_transform(const hailo_stream_info_t &stream_info, const std::vector &stream_quant_infos, const hailo_format_t &output_format); + + static void print_pipeline_elements_info(std::shared_ptr async_pipeline); }; } /* namespace hailort */ -#endif /* _HAILO_PIPELINE_BUILDER_HPP_ */ +#endif /* _HAILO_ASYNC_PIPELINE_BUILDER_HPP_ */ diff --git a/hailort/libhailort/src/net_flow/pipeline/edge_elements.cpp b/hailort/libhailort/src/net_flow/pipeline/edge_elements.cpp new file mode 100644 index 00000000..f0b6e2b5 --- /dev/null +++ b/hailort/libhailort/src/net_flow/pipeline/edge_elements.cpp @@ -0,0 +1,437 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file edge_elements.cpp + * @brief Implementation of the edge elements (sinks and sources) + **/ + +#include "net_flow/pipeline/vstream_internal.hpp" +#include "net_flow/pipeline/edge_elements.hpp" + +namespace hailort +{ + +PipelinePad &SinkElement::sink() +{ + return m_sinks[0]; +} + +std::vector SinkElement::execution_pads() +{ + std::vector result{&sink()}; + return result; +} + +hailo_status SinkElement::execute_terminate(hailo_status /*error_status*/) +{ + return HAILO_SUCCESS; +} + +hailo_status SinkElement::execute_dequeue_user_buffers(hailo_status /*error_status*/) +{ + return HAILO_SUCCESS; +} + +Expected> HwWriteElement::create(std::shared_ptr stream, const std::string &name, + hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, + PipelineDirection pipeline_direction) +{ + auto duration_collector = DurationCollector::create(elem_flags); + CHECK_EXPECTED(duration_collector); + + auto got_flush_event = Event::create_shared(Event::State::not_signalled); + CHECK_EXPECTED(got_flush_event); + + // On HwWriteElement the stream always owns the buffer, hence, we set the mode explicitly. + auto status = stream->set_buffer_mode(StreamBufferMode::OWNING); + CHECK_SUCCESS_AS_EXPECTED(status); + + auto hw_write_elem_ptr = make_shared_nothrow(stream, name, + duration_collector.release(), std::move(pipeline_status), got_flush_event.release(), pipeline_direction); + CHECK_AS_EXPECTED(nullptr != hw_write_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + + LOGGER__INFO("Created {}", hw_write_elem_ptr->description()); + + return hw_write_elem_ptr; +} + +HwWriteElement::HwWriteElement(std::shared_ptr stream, const std::string &name, DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status, EventPtr got_flush_event, PipelineDirection pipeline_direction) : + SinkElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, nullptr), + m_stream(stream), m_got_flush_event(got_flush_event) +{} + +Expected HwWriteElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/) +{ + return make_unexpected(HAILO_INVALID_OPERATION); +} + +hailo_status HwWriteElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/) +{ + if (PipelineBuffer::Type::FLUSH == buffer.get_type()) { + hailo_status flush_status = m_stream->flush(); + if (HAILO_STREAM_ABORT == flush_status) { + LOGGER__INFO("Failed flushing input stream {} because stream was aborted", m_stream->to_string()); + } else if (HAILO_SUCCESS != flush_status) { + LOGGER__ERROR("flush has failed in {} with status {}", name(), flush_status); + } + hailo_status status = m_got_flush_event->signal(); + CHECK_SUCCESS(status); + return HAILO_SUCCESS; + } + + m_duration_collector.start_measurement(); + const auto status = m_stream->write(MemoryView(buffer.data(), buffer.size())); + m_duration_collector.complete_measurement(); + + if (HAILO_STREAM_ABORT == status) { + LOGGER__INFO("Failed to send on input stream {} because stream was aborted", m_stream->to_string()); + return HAILO_STREAM_ABORT; + } + CHECK_SUCCESS(status, "{} (H2D) failed with status={}", name(), status); + + return HAILO_SUCCESS; +} + +void HwWriteElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) +{ + LOGGER__ERROR("run_push_async is not supported for {}", name()); + assert(false); +} + +hailo_status HwWriteElement::execute_activate() +{ + return HAILO_SUCCESS; +} + +hailo_status HwWriteElement::execute_deactivate() +{ + // The flush operation will block until all buffers currently in the pipeline will be processed. + // We assume that no buffers are sent after the call for deactivate. + hailo_status flush_status = m_stream->flush(); + if (HAILO_STREAM_ABORT == flush_status) { + LOGGER__INFO("Failed flushing input stream {} because stream was aborted", m_stream->to_string()); + return HAILO_SUCCESS; + } else if (HAILO_STREAM_NOT_ACTIVATED == flush_status) { + LOGGER__INFO("Failed flushing input stream {} because stream is not activated", m_stream->to_string()); + return HAILO_SUCCESS; + } else if (HAILO_SUCCESS != flush_status) { + LOGGER__ERROR("flush has failed in {} with status {}", name(), flush_status); + } + + auto abort_status = execute_abort(); + CHECK(((abort_status == HAILO_SUCCESS) || (abort_status == HAILO_STREAM_NOT_ACTIVATED)), abort_status, + "Failed to abort stream in {}", name()); + return HAILO_SUCCESS; +} + +hailo_status HwWriteElement::execute_post_deactivate(bool should_clear_abort) +{ + if (should_clear_abort) { + auto status = execute_clear_abort(); + CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status, + "Failed to clear abort stream in {}", name()); + } + return HAILO_SUCCESS; +} + +hailo_status HwWriteElement::execute_clear() +{ + return HAILO_SUCCESS; +} + +hailo_status HwWriteElement::execute_flush() +{ + hailo_status status = m_got_flush_event->wait(m_stream->get_timeout()); + CHECK_SUCCESS(status); + + status = m_got_flush_event->reset(); + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + +hailo_status HwWriteElement::execute_abort() +{ + return m_stream->abort_impl(); +} + +hailo_status HwWriteElement::execute_clear_abort() +{ + return m_stream->clear_abort_impl(); +} + +std::string HwWriteElement::description() const +{ + std::stringstream element_description; + element_description << "(" << this->name() << " | hw_frame_size: " << m_stream->get_info().hw_frame_size << ")"; + + return element_description.str(); +} + +Expected> LastAsyncElement::create(const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, + hailo_vstream_stats_flags_t vstream_stats_flags, std::shared_ptr> pipeline_status, size_t queue_size, + size_t frame_size, EventPtr shutdown_event, std::shared_ptr async_pipeline) +{ + auto duration_collector = DurationCollector::create(elem_flags); + CHECK_EXPECTED(duration_collector); + + auto is_empty = true; // LastAsync always holds user buffers, therefore its created empty + auto is_dma_able = false; + queue_size = queue_size * 2; // Multiplying by 2 to ensure dual-buffering when edge-element is the bottleneck + auto buffer_pool = BufferPool::create(frame_size, queue_size, shutdown_event, elem_flags, vstream_stats_flags, is_empty, is_dma_able); + CHECK_EXPECTED(buffer_pool); + + auto last_async_elem_ptr = make_shared_nothrow(name, + duration_collector.release(), std::move(pipeline_status), buffer_pool.release(), async_pipeline); + CHECK_NOT_NULL_AS_EXPECTED(last_async_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + + LOGGER__INFO("Created {}", last_async_elem_ptr->description()); + + return last_async_elem_ptr; +} + +Expected> LastAsyncElement::create(const std::string &name, + const ElementBuildParams &build_params, size_t frame_size, std::shared_ptr async_pipeline) +{ + return LastAsyncElement::create(name, build_params.elem_stats_flags, build_params.vstream_stats_flags, build_params.pipeline_status, + build_params.buffer_pool_size_edges, frame_size, build_params.shutdown_event, async_pipeline); +} + +LastAsyncElement::LastAsyncElement(const std::string &name, DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + BufferPoolPtr buffer_pool, std::shared_ptr async_pipeline): + SinkElement(name, std::move(duration_collector), std::move(pipeline_status), PipelineDirection::PUSH, async_pipeline), + m_pool(buffer_pool) +{} + +Expected LastAsyncElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/) +{ + return make_unexpected(HAILO_INVALID_OPERATION); +} + +hailo_status LastAsyncElement::run_push(PipelineBuffer &&/*optional*/, const PipelinePad &/*sink*/) +{ + return HAILO_INVALID_OPERATION; +} + +void LastAsyncElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/) +{ + // Call callback here on LastAsyncElement because if we wait for destructor to call callbacks they can be + // called out of order + buffer.call_exec_done(); +} + +hailo_status LastAsyncElement::execute_activate() +{ + return HAILO_SUCCESS; +} + +hailo_status LastAsyncElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done) +{ + return m_pool->enqueue_buffer(mem_view, exec_done); +} + +Expected LastAsyncElement::can_push_buffer_upstream() +{ + return !m_pool->is_full(); +} + +SourceElement::SourceElement(const std::string &name, DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : + PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline) +{ + m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE); +} + +hailo_status LastAsyncElement::execute_dequeue_user_buffers(hailo_status error_status) +{ + auto empty_pool_status = empty_buffer_pool(m_pool, error_status, BUFFER_POOL_DEFAULT_QUEUE_TIMEOUT); + CHECK_SUCCESS(empty_pool_status); + + return HAILO_SUCCESS; +} + +PipelinePad &SourceElement::source() +{ + return m_sources[0]; +} + +std::vector SourceElement::execution_pads() +{ + std::vector result{&source()}; + return result; +} + +SinkElement::SinkElement(const std::string &name, DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : + PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline) +{ + m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK); +} + +Expected> HwReadElement::create(std::shared_ptr stream, const std::string &name, + const ElementBuildParams &build_params, PipelineDirection pipeline_direction) +{ + // On HwReadElement the stream always owns the buffer, hence, we set the mode explicitly. + auto status = stream->set_buffer_mode(StreamBufferMode::OWNING); + CHECK_SUCCESS_AS_EXPECTED(status); + + auto duration_collector = DurationCollector::create(build_params.elem_stats_flags); + CHECK_EXPECTED(duration_collector); + + auto pipeline_status = build_params.pipeline_status; + + auto shutdown_event = Event::create_shared(Event::State::not_signalled); + CHECK_EXPECTED(shutdown_event); + + auto hw_read_elem_ptr = make_shared_nothrow(stream, name, build_params.timeout, + duration_collector.release(), shutdown_event.release(), std::move(pipeline_status), pipeline_direction); + CHECK_AS_EXPECTED(nullptr != hw_read_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + + LOGGER__INFO("Created {}", hw_read_elem_ptr->description()); + + return hw_read_elem_ptr; +} + +HwReadElement::HwReadElement(std::shared_ptr stream, const std::string &name, + std::chrono::milliseconds timeout, DurationCollector &&duration_collector, + EventPtr shutdown_event, std::shared_ptr> &&pipeline_status, + PipelineDirection pipeline_direction) : + SourceElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, nullptr), + m_stream(stream), + m_timeout(timeout), + m_shutdown_event(shutdown_event), + m_activation_wait_or_shutdown(stream->get_core_op_activated_event(), shutdown_event) +{} + +uint32_t HwReadElement::get_invalid_frames_count() +{ + return m_stream->get_invalid_frames_count(); +} + +std::string HwReadElement::description() const +{ + std::stringstream element_description; + element_description << "(" << this->name() << " | hw_frame_size: " << m_stream->get_info().hw_frame_size << ")"; + + return element_description.str(); +} + +hailo_status HwReadElement::execute_post_deactivate(bool should_clear_abort) +{ + if (should_clear_abort) { + auto status = execute_clear_abort(); + CHECK(((HAILO_SUCCESS == status) || (HAILO_STREAM_NOT_ACTIVATED == status)), status, + "Failed to clear abort stream in {}", name()); + } + return HAILO_SUCCESS; +} + +hailo_status HwReadElement::execute_clear() +{ + return HAILO_SUCCESS; +} + +hailo_status HwReadElement::execute_flush() +{ + return HAILO_INVALID_OPERATION; +} + +hailo_status HwReadElement::execute_abort() +{ + return m_stream->abort_impl(); +} + +hailo_status HwReadElement::execute_clear_abort() +{ + return m_stream->clear_abort_impl(); +} + +void HwReadElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) +{ + LOGGER__ERROR("run_push_async is not supported for {}", name()); + assert(false); +} + +hailo_status HwReadElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) +{ + return HAILO_INVALID_OPERATION; +} + +Expected HwReadElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/) +{ + auto pool = next_pad_downstream().element().get_buffer_pool(); + assert(pool); + + auto buffer = pool->get_available_buffer(std::move(optional), m_timeout); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { + return make_unexpected(buffer.status()); + } + CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status()); + + while (true) { + if (!m_stream->is_scheduled()) { + auto status = m_activation_wait_or_shutdown.wait(m_timeout); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { + return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); + } + if (HAILO_TIMEOUT == status) { + return make_unexpected(HAILO_NETWORK_GROUP_NOT_ACTIVATED); + } + CHECK_SUCCESS_AS_EXPECTED(status); + } else { + auto status = m_activation_wait_or_shutdown.wait(std::chrono::milliseconds(0)); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { + return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); + } + } + + MemoryView buffer_view(buffer.value().as_view()); + m_duration_collector.start_measurement(); + auto status = m_stream->read(buffer_view); + if (HAILO_INVALID_FRAME == status) { + m_stream->increase_invalid_frames_count(1); + status = HAILO_SUCCESS; + } + if (HAILO_STREAM_NOT_ACTIVATED == status) { + // Try again + continue; + } + if (HAILO_STREAM_ABORT == status) { + LOGGER__INFO("Reading from stream was aborted!"); + return make_unexpected(HAILO_STREAM_ABORT); + } + CHECK_SUCCESS_AS_EXPECTED(status, "{} (D2H) failed with status={}", name(), status); + m_duration_collector.complete_measurement(); + + return buffer.release(); + } +} + +hailo_status HwReadElement::execute_activate() +{ + CHECK_SUCCESS(m_shutdown_event->reset(), "Failed to reset shutdown event for {}", name()); + + return HAILO_SUCCESS; +} + +hailo_status HwReadElement::execute_deactivate() +{ + auto signal_shutdown_status = m_shutdown_event->signal(); + if (HAILO_SUCCESS != signal_shutdown_status) { + LOGGER__ERROR("Signaling {} shutdown event failed with {}", name(), signal_shutdown_status); + } + + auto abort_status = execute_abort(); + if ((HAILO_SUCCESS != abort_status) && (HAILO_STREAM_NOT_ACTIVATED != abort_status)) { + LOGGER__ERROR("Abort {} failed with {}", name(), abort_status); + return abort_status; + } + + return signal_shutdown_status; +} + + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/net_flow/pipeline/edge_elements.hpp b/hailort/libhailort/src/net_flow/pipeline/edge_elements.hpp new file mode 100644 index 00000000..090a7cad --- /dev/null +++ b/hailort/libhailort/src/net_flow/pipeline/edge_elements.hpp @@ -0,0 +1,143 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file edge_elements.hpp + * @brief all edge elements in the pipeline (sinks and sources) + **/ + +#ifndef _HAILO_EDGE_ELEMENTS_HPP_ +#define _HAILO_EDGE_ELEMENTS_HPP_ + +namespace hailort +{ +// An element with one sink pad only (consumes data) +class SinkElement : public PipelineElementInternal +{ +public: + SinkElement(const std::string &name, DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); + PipelinePad &sink(); + +protected: + virtual std::vector execution_pads() override; + virtual hailo_status execute_terminate(hailo_status error_status) override; + virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override; +}; + +class HwWriteElement : public SinkElement +{ +public: + static Expected> create(std::shared_ptr stream, const std::string &name, + hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, + PipelineDirection pipeline_direction = PipelineDirection::PUSH); + HwWriteElement(std::shared_ptr stream, const std::string &name, DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status, EventPtr got_flush_event, PipelineDirection pipeline_direction); + virtual ~HwWriteElement() = default; + + virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; + virtual hailo_status execute_activate() override; + virtual hailo_status execute_deactivate() override; + virtual hailo_status execute_post_deactivate(bool should_clear_abort) override; + virtual hailo_status execute_clear() override; + virtual hailo_status execute_flush() override; + virtual hailo_status execute_abort() override; + virtual hailo_status execute_clear_abort() override; + virtual std::string description() const override; + +private: + std::shared_ptr m_stream; + EventPtr m_got_flush_event; +}; + +class LastAsyncElement : public SinkElement +{ +public: + static Expected> create(const std::string &name, + hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_stats_flags, + std::shared_ptr> pipeline_status, size_t queue_size, size_t frame_size, + EventPtr shutdown_event, std::shared_ptr async_pipeline); + static Expected> create(const std::string &name, const ElementBuildParams &build_params, + size_t frame_size, std::shared_ptr async_pipeline); + LastAsyncElement(const std::string &name, DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + BufferPoolPtr buffer_pool, std::shared_ptr async_pipeline); + virtual ~LastAsyncElement() = default; + + virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; + virtual hailo_status execute_activate() override; + + virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done) override; + + virtual Expected can_push_buffer_upstream() override; + + virtual hailo_status execute_post_deactivate(bool /*should_clear_abort*/) override { return HAILO_SUCCESS; }; + virtual hailo_status execute_deactivate() override { return HAILO_SUCCESS; }; + virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override; + + virtual BufferPoolPtr get_buffer_pool() const override + { + return m_pool; + } + +private: + BufferPoolPtr m_pool; +}; + +// An element with one source pad only (generates data) +class SourceElement : public PipelineElementInternal +{ +public: + SourceElement(const std::string &name, DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); + PipelinePad &source(); + +protected: + virtual std::vector execution_pads() override; +}; + +class HwReadElement : public SourceElement +{ +public: + static Expected> create(std::shared_ptr stream, const std::string &name, + const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL); + HwReadElement(std::shared_ptr stream, const std::string &name, std::chrono::milliseconds timeout, + DurationCollector &&duration_collector, EventPtr shutdown_event, std::shared_ptr> &&pipeline_status, + PipelineDirection pipeline_direction); + virtual ~HwReadElement() = default; + + virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; + virtual hailo_status execute_activate() override; + virtual hailo_status execute_deactivate() override; + virtual hailo_status execute_post_deactivate(bool should_clear_abort) override; + virtual hailo_status execute_clear() override; + virtual hailo_status execute_flush() override; + virtual hailo_status execute_abort() override; + virtual hailo_status execute_clear_abort() override; + uint32_t get_invalid_frames_count(); + virtual std::string description() const override; + + PipelinePad &next_pad_downstream() + { + return *m_sources[0].next(); + } + +private: + std::shared_ptr m_stream; + std::chrono::milliseconds m_timeout; + EventPtr m_shutdown_event; + WaitOrShutdown m_activation_wait_or_shutdown; +}; + + +} /* namespace hailort */ + +#endif /* _HAILO_EDGE_ELEMENTS_HPP_ */ diff --git a/hailort/libhailort/src/net_flow/pipeline/filter_elements.cpp b/hailort/libhailort/src/net_flow/pipeline/filter_elements.cpp new file mode 100644 index 00000000..03cdc47e --- /dev/null +++ b/hailort/libhailort/src/net_flow/pipeline/filter_elements.cpp @@ -0,0 +1,771 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file filter_elements.cpp + * @brief Implementation of the filter elements + **/ + +#include "net_flow/pipeline/vstream_internal.hpp" +#include "net_flow/pipeline/filter_elements.hpp" + +namespace hailort +{ + +FilterElement::FilterElement(const std::string &name, DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status, + PipelineDirection pipeline_direction, + std::chrono::milliseconds timeout, std::shared_ptr async_pipeline) : + IntermediateElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline), + m_timeout(timeout) +{} + +hailo_status FilterElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/) +{ + auto output = action(std::move(buffer), PipelineBuffer()); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) { + return output.status(); + } + CHECK_EXPECTED_AS_STATUS(output); + + hailo_status status = next_pad().run_push(output.release()); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { + LOGGER__INFO("run_push of {} was shutdown!", name()); + return status; + } + if (HAILO_STREAM_ABORT == status) { + LOGGER__INFO("run_push of {} was aborted!", name()); + return status; + } + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + +void FilterElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/) +{ + assert(m_pipeline_direction == PipelineDirection::PUSH); + if (HAILO_SUCCESS != buffer.action_status()) { + auto pool = next_pad().element().get_buffer_pool(); + assert(pool); + + auto buffer_from_pool = pool->get_available_buffer(PipelineBuffer(), m_timeout); + if (HAILO_SUCCESS != buffer_from_pool.status()) { + handle_non_recoverable_async_error(buffer_from_pool.status()); + } else { + buffer_from_pool->set_action_status(buffer.action_status()); + next_pad().run_push_async(buffer_from_pool.release()); + } + return; + } + + auto output = action(std::move(buffer), PipelineBuffer()); + if (HAILO_SUCCESS == output.status()) { + next_pad().run_push_async(output.release()); + } else { + next_pad().run_push_async(PipelineBuffer(output.status())); + } + return; +} + +Expected FilterElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/) +{ + auto buffer = next_pad().run_pull(); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { + LOGGER__INFO("run_pull in FilterElement was shutdown!"); + return make_unexpected(buffer.status()); + } + CHECK_EXPECTED(buffer); + return action(buffer.release(), std::move(optional)); +} + +PipelinePad &FilterElement::next_pad_downstream() +{ + return *m_sources[0].next(); +} + +PipelinePad &FilterElement::next_pad_upstream() +{ + return *m_sinks[0].prev(); +} + +Expected> PreInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, + const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector &dst_quant_infos, + const std::string &name, std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) +{ + auto transform_context = InputTransformContext::create(src_image_shape, src_format, dst_image_shape, dst_format, + dst_quant_infos); + CHECK_EXPECTED(transform_context, "Failed Creating InputTransformContext"); + + auto duration_collector = DurationCollector::create(elem_flags); + CHECK_EXPECTED(duration_collector); + + auto pre_infer_elem_ptr = make_shared_nothrow(transform_context.release(), + name, timeout, duration_collector.release(), std::move(pipeline_status), pipeline_direction, + async_pipeline); + CHECK_AS_EXPECTED(nullptr != pre_infer_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + + LOGGER__INFO("Created {}", pre_infer_elem_ptr->description()); + + return pre_infer_elem_ptr; +} + +Expected> PreInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, + const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector &dst_quant_infos, + const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr> pipeline_status, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) +{ + return PreInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_infos, name, + std::chrono::milliseconds(vstream_params.timeout_ms), vstream_params.pipeline_elements_stats_flags, + pipeline_status, pipeline_direction, async_pipeline); +} + +Expected> PreInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, + const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector &dst_quant_infos, + const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) +{ + return PreInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_infos, name, + build_params.timeout, build_params.elem_stats_flags, build_params.pipeline_status, pipeline_direction, async_pipeline); +} + +PreInferElement::PreInferElement(std::unique_ptr &&transform_context, const std::string &name, std::chrono::milliseconds timeout, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline) : + FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline), + m_transform_context(std::move(transform_context)) +{} + +Expected PreInferElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/) +{ + LOGGER__ERROR("PreInferElement does not support run_pull operation"); + return make_unexpected(HAILO_INVALID_OPERATION); +} + +PipelinePad &PreInferElement::next_pad() +{ + // Note: The next elem to be run is downstream from this elem (i.e. buffers are pushed) + return *m_sources[0].next(); +} + +std::string PreInferElement::description() const +{ + std::stringstream element_description; + element_description << "(" << this->name() << " | " << m_transform_context->description() << ")"; + return element_description.str(); +} + +Expected PreInferElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +{ + if (PipelineBuffer::Type::FLUSH == input.get_type()) { + return std::move(input); + } + + // Buffers are always taken from the next-pad-downstream + auto pool = next_pad_downstream().element().get_buffer_pool(); + assert(pool); + + auto transformed_buffer = pool->get_available_buffer(std::move(optional), m_timeout); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == transformed_buffer.status()) { + return make_unexpected(transformed_buffer.status()); + } + + if (!transformed_buffer) { + input.set_action_status(transformed_buffer.status()); + } + CHECK_AS_EXPECTED(HAILO_TIMEOUT != transformed_buffer.status(), HAILO_TIMEOUT, + "{} (H2D) failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_timeout.count()); + CHECK_EXPECTED(transformed_buffer); + + auto dst = transformed_buffer->as_view(); + m_duration_collector.start_measurement(); + const auto status = m_transform_context->transform(input.as_view(), dst); + m_duration_collector.complete_measurement(); + + input.set_action_status(status); + transformed_buffer->set_action_status(status); + + auto metadata = input.get_metadata(); + + CHECK_SUCCESS_AS_EXPECTED(status); + + // Note: The latency to be measured starts as the input buffer is sent to the InputVStream (via write()) + transformed_buffer->set_metadata(std::move(metadata)); + + return transformed_buffer.release(); +} + +Expected> ConvertNmsToDetectionsElement::create( + const hailo_nms_info_t &nms_info, const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status, std::chrono::milliseconds timeout, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) +{ + auto duration_collector = DurationCollector::create(elem_flags); + CHECK_EXPECTED(duration_collector); + + auto convert_nms_to_detections_elem_ptr = make_shared_nothrow(std::move(nms_info), + name, duration_collector.release(), std::move(pipeline_status), timeout, pipeline_direction, async_pipeline); + CHECK_AS_EXPECTED(nullptr != convert_nms_to_detections_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + + LOGGER__INFO("Created {}", convert_nms_to_detections_elem_ptr->description()); + + return convert_nms_to_detections_elem_ptr; +} + +Expected> ConvertNmsToDetectionsElement::create( + const hailo_nms_info_t &nms_info, const std::string &name, const ElementBuildParams &build_params, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) +{ + return ConvertNmsToDetectionsElement::create(nms_info, name, build_params.elem_stats_flags, build_params.pipeline_status, + build_params.timeout, pipeline_direction, async_pipeline); +} + +ConvertNmsToDetectionsElement::ConvertNmsToDetectionsElement(const hailo_nms_info_t &&nms_info, const std::string &name, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : + FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline), + m_nms_info(std::move(nms_info)) +{} + +hailo_status ConvertNmsToDetectionsElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink) +{ + CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION, + "ConvertNmsToDetectionsElement {} does not support run_push operation", name()); + return FilterElement::run_push(std::move(buffer), sink); +} + +PipelinePad &ConvertNmsToDetectionsElement::next_pad() +{ + if (PipelineDirection::PUSH == m_pipeline_direction){ + return *m_sources[0].next(); + } + return *m_sinks[0].prev(); +} + +Expected ConvertNmsToDetectionsElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +{ + // Buffers are always taken from the next-pad-downstream + auto pool = next_pad_downstream().element().get_buffer_pool(); + assert(pool); + + auto buffer = pool->get_available_buffer(std::move(optional), m_timeout); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { + return make_unexpected(buffer.status()); + } + + if (!buffer) { + input.set_action_status(buffer.status()); + } + CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status()); + + buffer->set_metadata(input.get_metadata()); + + m_duration_collector.start_measurement(); + + auto detections_pair = net_flow::NmsPostProcessOp::transform__d2h_NMS_DETECTIONS(input.data(), m_nms_info); + auto detections_pipeline_data = make_shared_nothrow + (std::move(detections_pair.first),std::move(detections_pair.second)); + buffer->set_additional_data(detections_pipeline_data); + + m_duration_collector.complete_measurement(); + + return buffer.release(); +} + +Expected> FillNmsFormatElement::create(const net_flow::NmsPostProcessConfig nms_config, + const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) +{ + auto duration_collector = DurationCollector::create(elem_flags); + CHECK_EXPECTED(duration_collector); + + auto fill_nms_format_element = make_shared_nothrow(std::move(nms_config), + name, duration_collector.release(), std::move(pipeline_status), timeout, pipeline_direction, async_pipeline); + CHECK_AS_EXPECTED(nullptr != fill_nms_format_element, HAILO_OUT_OF_HOST_MEMORY); + + LOGGER__INFO("Created {}", fill_nms_format_element->description()); + + return fill_nms_format_element; +} + +Expected> FillNmsFormatElement::create(const net_flow::NmsPostProcessConfig nms_config, + const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline) +{ + return FillNmsFormatElement::create(nms_config, name, build_params.elem_stats_flags, + build_params.pipeline_status, build_params.timeout, pipeline_direction, async_pipeline); +} + +FillNmsFormatElement::FillNmsFormatElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : + FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline), + m_nms_config(std::move(nms_config)) +{} + +hailo_status FillNmsFormatElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink) +{ + CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION, + "FillNmsFormatElement {} does not support run_push operation", name()); + return FilterElement::run_push(std::move(buffer), sink); +} + +PipelinePad &FillNmsFormatElement::next_pad() +{ + if (PipelineDirection::PUSH == m_pipeline_direction){ + return *m_sources[0].next(); + } + return *m_sinks[0].prev(); +} + +Expected FillNmsFormatElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +{ + // Buffers are always taken from the next-pad-downstream + auto pool = next_pad_downstream().element().get_buffer_pool(); + assert(pool); + + auto buffer_expected = pool->get_available_buffer(std::move(optional), m_timeout); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer_expected.status()) { + return make_unexpected(buffer_expected.status()); + } + if (!buffer_expected) { + input.set_action_status(buffer_expected.status()); + } + CHECK_EXPECTED(buffer_expected, "{} (D2H) failed with status={}", name(), buffer_expected.status()); + auto buffer = buffer_expected.release(); + + buffer.set_metadata(input.get_metadata()); + + m_duration_collector.start_measurement(); + + auto detections = input.get_metadata().get_additional_data(); + auto dst = buffer.as_view(); + net_flow::NmsPostProcessOp::fill_nms_format_buffer(dst, detections->m_detections, detections->m_detections_classes_count, + m_nms_config); + + m_duration_collector.complete_measurement(); + + return buffer; +} + +Expected> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape, + const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, + const std::vector &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name, + hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) +{ + auto transform_context = OutputTransformContext::create(src_image_shape, src_format, dst_image_shape, dst_format, + dst_quant_infos, nms_info); + CHECK_EXPECTED(transform_context, "Failed Creating OutputTransformContext"); + + auto duration_collector = DurationCollector::create(elem_flags); + CHECK_EXPECTED(duration_collector); + + auto post_infer_elem_ptr = make_shared_nothrow(transform_context.release(), name, + duration_collector.release(), std::move(pipeline_status), timeout, pipeline_direction, async_pipeline); + CHECK_AS_EXPECTED(nullptr != post_infer_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + + LOGGER__INFO("Created {}", post_infer_elem_ptr->description()); + + return post_infer_elem_ptr; +} + +Expected> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, + const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector &dst_quant_infos, const hailo_nms_info_t &nms_info, + const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr> pipeline_status, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) +{ + return PostInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_infos, nms_info, + name, vstream_params.pipeline_elements_stats_flags, pipeline_status, std::chrono::milliseconds(vstream_params.timeout_ms), + pipeline_direction, async_pipeline); +} + +Expected> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape, + const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, + const std::vector &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name, + const ElementBuildParams &build_params, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline) +{ + return PostInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, + dst_quant_infos, nms_info, name, build_params.elem_stats_flags, build_params.pipeline_status, + build_params.timeout, pipeline_direction, async_pipeline); +} + +PostInferElement::PostInferElement(std::unique_ptr &&transform_context, const std::string &name, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : + FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline), + m_transform_context(std::move(transform_context)) +{} + +Expected PostInferElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source) +{ + CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION, + "PostInferElement {} does not support run_pull operation", name() + ); + return FilterElement::run_pull(std::move(optional), source); +} + +hailo_status PostInferElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink) +{ + CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION, + "PostInferElement {} does not support run_push operation", name()); + return FilterElement::run_push(std::move(buffer), sink); +} + +PipelinePad &PostInferElement::next_pad() +{ + if (PipelineDirection::PUSH == m_pipeline_direction){ + return *m_sources[0].next(); + } + return *m_sinks[0].prev(); +} + +std::string PostInferElement::description() const +{ + std::stringstream element_description; + element_description << "(" << this->name() << " | " << m_transform_context->description() << ")"; + return element_description.str(); +} + +Expected PostInferElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +{ + // Buffers are always taken from the next-pad-downstream + auto pool = next_pad_downstream().element().get_buffer_pool(); + assert(pool); + + auto buffer = pool->get_available_buffer(std::move(optional), m_timeout); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { + return make_unexpected(buffer.status()); + } + + if (!buffer) { + input.set_action_status(buffer.status()); + } + CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status()); + + // Note: The latency to be measured starts as the buffer is read from the HW (it's 'input' in this case) + buffer->set_metadata(input.get_metadata()); + + auto dst = buffer->as_view(); + m_duration_collector.start_measurement(); + const auto status = m_transform_context->transform(input.as_view(), dst); + m_duration_collector.complete_measurement(); + + input.set_action_status(status); + buffer->set_action_status(status); + + CHECK_SUCCESS_AS_EXPECTED(status); + + return buffer.release(); +} + +Expected> RemoveOverlappingBboxesElement::create( + const net_flow::NmsPostProcessConfig nms_config, const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status, std::chrono::milliseconds timeout, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) +{ + auto duration_collector = DurationCollector::create(elem_flags); + CHECK_EXPECTED(duration_collector); + + auto convert_nms_removed_overlapping_elem_ptr = make_shared_nothrow(std::move(nms_config), + name, duration_collector.release(), std::move(pipeline_status), timeout, pipeline_direction, async_pipeline); + CHECK_AS_EXPECTED(nullptr != convert_nms_removed_overlapping_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + + LOGGER__INFO("Created {}", convert_nms_removed_overlapping_elem_ptr->description()); + + return convert_nms_removed_overlapping_elem_ptr; +} + +Expected> RemoveOverlappingBboxesElement::create(const net_flow::NmsPostProcessConfig nms_config, + const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline) +{ + return RemoveOverlappingBboxesElement::create(nms_config, name, + build_params.elem_stats_flags, build_params.pipeline_status, build_params.timeout, pipeline_direction, async_pipeline); +} + +RemoveOverlappingBboxesElement::RemoveOverlappingBboxesElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : + FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline), + m_nms_config(std::move(nms_config)) +{} + +hailo_status RemoveOverlappingBboxesElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink) +{ + CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION, + "RemoveOverlappingBboxesElement {} does not support run_push operation", name()); + return FilterElement::run_push(std::move(buffer), sink); +} + +PipelinePad &RemoveOverlappingBboxesElement::next_pad() +{ + if (PipelineDirection::PUSH == m_pipeline_direction){ + return *m_sources[0].next(); + } + return *m_sinks[0].prev(); +} + +std::string RemoveOverlappingBboxesElement::description() const +{ + std::stringstream element_description; + element_description << "(" << this->name(); + element_description << " | " << "IoU Threshold: " << this->m_nms_config.nms_iou_th << ")"; + return element_description.str(); +} + +Expected RemoveOverlappingBboxesElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +{ + // Buffers are always taken from the next-pad-downstream + auto pool = next_pad_downstream().element().get_buffer_pool(); + assert(pool); + + auto buffer = pool->get_available_buffer(std::move(optional), m_timeout); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { + return make_unexpected(buffer.status()); + } + + if (!buffer) { + input.set_action_status(buffer.status()); + } + CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status()); + + buffer->set_metadata(input.get_metadata()); + + m_duration_collector.start_measurement(); + auto detections_pipeline_data = input.get_metadata().get_additional_data(); + + net_flow::NmsPostProcessOp::remove_overlapping_boxes(detections_pipeline_data->m_detections, + detections_pipeline_data->m_detections_classes_count, m_nms_config.nms_iou_th); + m_duration_collector.complete_measurement(); + + return buffer.release(); +} + +Expected> ArgmaxPostProcessElement::create(std::shared_ptr argmax_op, + const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) +{ + auto duration_collector = DurationCollector::create(elem_flags); + CHECK_EXPECTED(duration_collector); + auto argmax_elem_ptr = make_shared_nothrow(argmax_op, + name, duration_collector.release(), std::move(pipeline_status), timeout, pipeline_direction, async_pipeline); + CHECK_AS_EXPECTED(nullptr != argmax_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + LOGGER__INFO("Created {}", argmax_elem_ptr->description()); + return argmax_elem_ptr; +} + +Expected> ArgmaxPostProcessElement::create(std::shared_ptr argmax_op, + const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline) +{ + return ArgmaxPostProcessElement::create(argmax_op, name, + build_params.elem_stats_flags, build_params.pipeline_status, build_params.timeout, + pipeline_direction, async_pipeline); +} + +ArgmaxPostProcessElement::ArgmaxPostProcessElement(std::shared_ptr argmax_op, const std::string &name, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline) : + FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline), + m_argmax_op(argmax_op) +{} + +Expected ArgmaxPostProcessElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source) +{ + CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION, + "ArgmaxPostProcessElement {} does not support run_pull operation", name()); + return FilterElement::run_pull(std::move(optional), source); +} + +hailo_status ArgmaxPostProcessElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink) +{ + CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION, + "ArgmaxPostProcessElement {} does not support run_push operation", name()); + return FilterElement::run_push(std::move(buffer), sink); +} + +PipelinePad &ArgmaxPostProcessElement::next_pad() +{ + if (PipelineDirection::PUSH == m_pipeline_direction){ + return *m_sources[0].next(); + } + return *m_sinks[0].prev(); +} + +std::string ArgmaxPostProcessElement::description() const +{ + std::stringstream element_description; + element_description << "(" << this->name() << " | " << m_argmax_op->metadata()->get_op_description() << ")"; + return element_description.str(); +} + +Expected ArgmaxPostProcessElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +{ + // Buffers are always taken from the next-pad-downstream + auto pool = next_pad_downstream().element().get_buffer_pool(); + assert(pool); + + auto buffer = pool->get_available_buffer(std::move(optional), m_timeout); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { + return make_unexpected(buffer.status()); + } + + if (!buffer) { + input.set_action_status(buffer.status()); + } + CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status()); + + std::map inputs; + std::map outputs; + auto &input_name = m_argmax_op->inputs_metadata().begin()->first; + auto &output_name = m_argmax_op->outputs_metadata().begin()->first; + inputs.insert({input_name, input.as_view()}); + outputs.insert({output_name, buffer->as_view()}); + m_duration_collector.start_measurement(); + auto post_process_result = m_argmax_op->execute(inputs, outputs); + m_duration_collector.complete_measurement(); + + input.set_action_status(post_process_result); + buffer->set_action_status(post_process_result); + + CHECK_SUCCESS_AS_EXPECTED(post_process_result); + + return buffer.release(); +} + +Expected> SoftmaxPostProcessElement::create(std::shared_ptr softmax_op, + const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status, std::chrono::milliseconds timeout, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) +{ + auto duration_collector = DurationCollector::create(elem_flags); + CHECK_EXPECTED(duration_collector); + auto softmax_elem_ptr = make_shared_nothrow(softmax_op, + name, duration_collector.release(), std::move(pipeline_status), timeout, pipeline_direction, async_pipeline); + CHECK_AS_EXPECTED(nullptr != softmax_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + LOGGER__INFO("Created {}", softmax_elem_ptr->description()); + return softmax_elem_ptr; +} + +Expected> SoftmaxPostProcessElement::create(std::shared_ptr softmax_op, + const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline) +{ + return SoftmaxPostProcessElement::create(softmax_op, name, build_params.elem_stats_flags, build_params.pipeline_status, + build_params.timeout, pipeline_direction, async_pipeline); +} + +SoftmaxPostProcessElement::SoftmaxPostProcessElement(std::shared_ptr softmax_op, const std::string &name, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : + FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline), + m_softmax_op(softmax_op) +{} + +Expected SoftmaxPostProcessElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source) +{ + CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION, + "SoftmaxPostProcessElement {} does not support run_pull operation", name()); + return FilterElement::run_pull(std::move(optional), source); +} + +hailo_status SoftmaxPostProcessElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink) +{ + CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION, + "SoftmaxPostProcessElement {} does not support run_push operation", name()); + return FilterElement::run_push(std::move(buffer), sink); +} + +PipelinePad &SoftmaxPostProcessElement::next_pad() +{ + if (PipelineDirection::PUSH == m_pipeline_direction){ + return *m_sources[0].next(); + } + return *m_sinks[0].prev(); +} + +std::string SoftmaxPostProcessElement::description() const +{ + std::stringstream element_description; + element_description << "(" << this->name() << " | " << m_softmax_op->metadata()->get_op_description() << ")"; + return element_description.str(); +} + +Expected SoftmaxPostProcessElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +{ + // Buffers are always taken from the next-pad-downstream + auto pool = next_pad_downstream().element().get_buffer_pool(); + assert(pool); + + auto buffer = pool->get_available_buffer(std::move(optional), m_timeout); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { + return make_unexpected(buffer.status()); + } + + if (!buffer) { + input.set_action_status(buffer.status()); + } + CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status()); + + std::map inputs; + std::map outputs; + auto &input_name = m_softmax_op->inputs_metadata().begin()->first; + auto &output_name = m_softmax_op->outputs_metadata().begin()->first; + inputs.insert({input_name, input.as_view()}); + outputs.insert({output_name, buffer->as_view()}); + m_duration_collector.start_measurement(); + auto post_process_result = m_softmax_op->execute(inputs, outputs); + m_duration_collector.complete_measurement(); + + input.set_action_status(post_process_result); + buffer->set_action_status(post_process_result); + + CHECK_SUCCESS_AS_EXPECTED(post_process_result); + + return buffer.release(); +} + +Expected> CopyBufferElement::create(const std::string &name, + std::shared_ptr> pipeline_status, std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline) +{ + auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE); + CHECK_EXPECTED(duration_collector); + auto elem_ptr = make_shared_nothrow(name, duration_collector.release(), std::move(pipeline_status), + timeout, pipeline_direction, async_pipeline); + CHECK_AS_EXPECTED(nullptr != elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + + LOGGER__INFO("Created {}", elem_ptr->description()); + + return elem_ptr; +} + +CopyBufferElement::CopyBufferElement(const std::string &name, DurationCollector &&duration_collector, + std::shared_ptr> pipeline_status, std::chrono::milliseconds timeout, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : + FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline) +{} + +PipelinePad &CopyBufferElement::next_pad() +{ + if (PipelineDirection::PUSH == m_pipeline_direction){ + return *m_sources[0].next(); + } + return *m_sinks[0].prev(); +} + +Expected CopyBufferElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +{ + CHECK_AS_EXPECTED(optional, HAILO_INVALID_ARGUMENT, "Optional buffer must be passed to CopyBufferElement!"); + + CHECK_AS_EXPECTED(optional.size() == input.size(), HAILO_INVALID_ARGUMENT, "Optional buffer size does not equal to the input buffer size!"); + memcpy(optional.data(), input.data(), optional.size()); + + return std::move(optional); +} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/net_flow/pipeline/filter_elements.hpp b/hailort/libhailort/src/net_flow/pipeline/filter_elements.hpp new file mode 100644 index 00000000..e5f5abfe --- /dev/null +++ b/hailort/libhailort/src/net_flow/pipeline/filter_elements.hpp @@ -0,0 +1,268 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file filter_elements.hpp + * @brief all filter elements in the pipeline (single input, single output). + **/ + +#ifndef _HAILO_FILTER_ELEMENTS_HPP_ +#define _HAILO_FILTER_ELEMENTS_HPP_ + +#include "net_flow/pipeline/pipeline_internal.hpp" + +namespace hailort +{ + +class FilterElement : public IntermediateElement +{ +public: + FilterElement(const std::string &name, DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status, + PipelineDirection pipeline_direction, std::chrono::milliseconds timeout, + std::shared_ptr async_pipeline); + virtual ~FilterElement() = default; + + virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; + +protected: + // The optional buffer functions as an output buffer that the user can write to instead of acquiring a new buffer + virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) = 0; + + PipelinePad &next_pad_downstream(); + PipelinePad &next_pad_upstream(); + + std::chrono::milliseconds m_timeout; +}; + +class PreInferElement : public FilterElement +{ +public: + static Expected> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, + const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector &dst_quant_infos, + const std::string &name, std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PUSH, + std::shared_ptr async_pipeline = nullptr); + static Expected> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, + const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector &dst_quant_infos, + const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr> pipeline_status, + PipelineDirection pipeline_direction = PipelineDirection::PUSH, std::shared_ptr async_pipeline = nullptr); + static Expected> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, + const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector &dst_quant_infos, + const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PUSH, + std::shared_ptr async_pipeline = nullptr); + PreInferElement(std::unique_ptr &&transform_context, const std::string &name, std::chrono::milliseconds timeout, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline); + virtual ~PreInferElement() = default; + + virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; + virtual PipelinePad &next_pad() override; + virtual std::string description() const override; + +protected: + virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; + +private: + std::unique_ptr m_transform_context; +}; + +class RemoveOverlappingBboxesElement : public FilterElement +{ +public: + static Expected> create( + const net_flow::NmsPostProcessConfig nms_config, const std::string &name, + hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + static Expected> create(const net_flow::NmsPostProcessConfig nms_config, + const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + RemoveOverlappingBboxesElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name, DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status, std::chrono::milliseconds timeout, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); + virtual ~RemoveOverlappingBboxesElement() = default; + virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual PipelinePad &next_pad() override; + virtual std::string description() const override; + + virtual hailo_status set_nms_iou_threshold(float32_t threshold) + { + m_nms_config.nms_iou_th = threshold; + return HAILO_SUCCESS; + } + +protected: + virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; + +private: + net_flow::NmsPostProcessConfig m_nms_config; +}; + +class PostInferElement : public FilterElement +{ +public: + static Expected> create(const hailo_3d_image_shape_t &src_image_shape, + const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, + const std::vector &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name, + hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + static Expected> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, + const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, + const std::vector &dst_quant_info, const hailo_nms_info_t &nms_info, + const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr> pipeline_status, + PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + static Expected> create(const hailo_3d_image_shape_t &src_image_shape, + const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, + const std::vector &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name, + const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + PostInferElement(std::unique_ptr &&transform_context, const std::string &name, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); + virtual ~PostInferElement() = default; + virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; + virtual PipelinePad &next_pad() override; + virtual std::string description() const override; + +protected: + virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; + +private: + std::unique_ptr m_transform_context; +}; + +class ConvertNmsToDetectionsElement : public FilterElement +{ +public: + static Expected> create(const hailo_nms_info_t &nms_info, const std::string &name, + hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + static Expected> create( + const hailo_nms_info_t &nms_info, const std::string &name, const ElementBuildParams &build_params, + PipelineDirection pipeline_direction = PipelineDirection::PULL, std::shared_ptr async_pipeline = nullptr); + ConvertNmsToDetectionsElement(const hailo_nms_info_t &&nms_info, const std::string &name, DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status, std::chrono::milliseconds timeout, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); + virtual ~ConvertNmsToDetectionsElement() = default; + virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual PipelinePad &next_pad() override; + +protected: + virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; + +private: + hailo_nms_info_t m_nms_info; +}; + +class FillNmsFormatElement : public FilterElement +{ +public: + static Expected> create(const net_flow::NmsPostProcessConfig nms_config, const std::string &name, + hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + static Expected> create(const net_flow::NmsPostProcessConfig nms_config, const std::string &name, + const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + FillNmsFormatElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name, DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status, std::chrono::milliseconds timeout, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); + virtual ~FillNmsFormatElement() = default; + virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual PipelinePad &next_pad() override; + + virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) override + { + m_nms_config.max_proposals_per_class = max_proposals_per_class; + return HAILO_SUCCESS; + } + +protected: + virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; + +private: + net_flow::NmsPostProcessConfig m_nms_config; +}; + +class ArgmaxPostProcessElement : public FilterElement +{ +public: + static Expected> create(std::shared_ptr argmax_op, + const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status, std::chrono::milliseconds timeout, + PipelineDirection pipeline_direction = PipelineDirection::PULL, std::shared_ptr async_pipeline = nullptr); + static Expected> create(std::shared_ptr argmax_op, + const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + ArgmaxPostProcessElement(std::shared_ptr argmax_op, const std::string &name, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline); + virtual ~ArgmaxPostProcessElement() = default; + virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; + virtual PipelinePad &next_pad() override; + virtual std::string description() const override; + +protected: + virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; + +private: + std::shared_ptr m_argmax_op; +}; + +class SoftmaxPostProcessElement : public FilterElement +{ +public: + static Expected> create(std::shared_ptr softmax_op, + const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status, std::chrono::milliseconds timeout, + PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + static Expected> create(std::shared_ptr softmax_op, + const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + SoftmaxPostProcessElement(std::shared_ptr softmax_op, const std::string &name, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline); + virtual ~SoftmaxPostProcessElement() = default; + virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; + virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual PipelinePad &next_pad() override; + virtual std::string description() const override; + +protected: + virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; + +private: + std::shared_ptr m_softmax_op; +}; + +class CopyBufferElement : public FilterElement +{ +public: + static Expected> create(const std::string &name, std::shared_ptr> pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction = PipelineDirection::PULL, std::shared_ptr async_pipeline = nullptr); + CopyBufferElement(const std::string &name, DurationCollector &&duration_collector, std::shared_ptr> pipeline_status, + std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); + virtual ~CopyBufferElement() = default; + virtual PipelinePad &next_pad() override; + +protected: + virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; +}; + + + +} /* namespace hailort */ + +#endif /* _HAILO_FILTER_ELEMENTS_HPP_ */ diff --git a/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp b/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp index 8f0d8b26..fa19401a 100644 --- a/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp +++ b/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp @@ -13,9 +13,12 @@ #include "hailo/hailort_common.hpp" #include "hailo/vdevice.hpp" #include "hailo/infer_model.hpp" +#include "vdevice/vdevice_internal.hpp" +#include "hef/hef_internal.hpp" #include "net_flow/pipeline/infer_model_internal.hpp" #include "net_flow/pipeline/async_infer_runner.hpp" + #define WAIT_FOR_ASYNC_IN_DTOR_TIMEOUT (std::chrono::milliseconds(10000)) namespace hailort @@ -86,6 +89,12 @@ void InferModel::InferStream::Impl::set_nms_max_proposals_per_class(uint32_t max m_vstream_info.nms_shape.max_bboxes_per_class = max_proposals_per_class; } +void InferModel::InferStream::Impl::set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size) +{ + m_nms_max_accumulated_mask_size = max_accumulated_mask_size; + m_vstream_info.nms_shape.max_accumulated_mask_size = max_accumulated_mask_size; +} + InferModel::InferStream::InferStream(std::shared_ptr pimpl) : m_pimpl(pimpl) { } @@ -150,6 +159,11 @@ void InferModel::InferStream::set_nms_max_proposals_per_class(uint32_t max_propo m_pimpl->set_nms_max_proposals_per_class(max_proposals_per_class); } +void InferModel::InferStream::set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size) +{ + m_pimpl->set_nms_max_accumulated_mask_size(max_accumulated_mask_size); +} + InferModel::InferModel(VDevice &vdevice, Hef &&hef, std::unordered_map &&inputs, std::unordered_map &&outputs) : m_vdevice(vdevice), m_hef(std::move(hef)), m_inputs(std::move(inputs)), m_outputs(std::move(outputs)), @@ -203,11 +217,8 @@ void InferModel::set_hw_latency_measurement_flags(hailo_latency_measurement_flag m_config_params.latency = latency; } -// TODO: document that this will check validity of format tpyes/orders -Expected InferModel::configure(const std::string &network_name) +Expected InferModel::configure() { - CHECK_AS_EXPECTED(network_name.empty(), HAILO_NOT_IMPLEMENTED, "Passing network name is not supported yet!"); - auto configure_params = m_vdevice.get().create_configure_params(m_hef); CHECK_EXPECTED(configure_params); @@ -227,6 +238,15 @@ Expected InferModel::configure(const std::string &network_ auto network_groups = m_vdevice.get().configure(m_hef, configure_params.value()); CHECK_EXPECTED(network_groups); + CHECK_AS_EXPECTED(1 == network_groups->size(), HAILO_INVALID_HEF, + "InferModel expects HEF with a single network group. found {}.", network_groups->size()); + + // TODO (HRT-11293) : Remove this check + TRY(auto internal_queue_size, network_groups.value()[0]->get_min_buffer_pool_size()); + CHECK_AS_EXPECTED(internal_queue_size >= m_config_params.batch_size, HAILO_INVALID_OPERATION, + "Trying to configure a model with a batch={} bigger than internal_queue_size={}, which is not supported. Try using a smaller batch.", + m_config_params.batch_size, internal_queue_size); + std::unordered_map inputs_formats; std::unordered_map outputs_formats; @@ -249,6 +269,7 @@ Expected InferModel::configure(const std::string &network_ CHECK_AS_EXPECTED(std::all_of(m_inputs.begin(), m_inputs.end(), [](const auto &input_pair) { return ((input_pair.second.m_pimpl->m_nms_score_threshold == INVALID_NMS_CONFIG) && (input_pair.second.m_pimpl->m_nms_iou_threshold == INVALID_NMS_CONFIG) && + (input_pair.second.m_pimpl->m_nms_max_accumulated_mask_size == static_cast(INVALID_NMS_CONFIG)) && (input_pair.second.m_pimpl->m_nms_max_proposals_per_class == static_cast(INVALID_NMS_CONFIG))); }), HAILO_INVALID_OPERATION, "NMS config was changed for input"); @@ -256,6 +277,7 @@ Expected InferModel::configure(const std::string &network_ auto &edge_name = output_pair.first; if ((output_pair.second.m_pimpl->m_nms_score_threshold == INVALID_NMS_CONFIG) && (output_pair.second.m_pimpl->m_nms_iou_threshold == INVALID_NMS_CONFIG) && + (output_pair.second.m_pimpl->m_nms_max_accumulated_mask_size == static_cast(INVALID_NMS_CONFIG)) && (output_pair.second.m_pimpl->m_nms_max_proposals_per_class == static_cast(INVALID_NMS_CONFIG))) { continue; } @@ -271,25 +293,51 @@ Expected InferModel::configure(const std::string &network_ auto status = network_groups.value()[0]->set_nms_max_bboxes_per_class(edge_name, output_pair.second.m_pimpl->m_nms_max_proposals_per_class); CHECK_SUCCESS_AS_EXPECTED(status); } + if (output_pair.second.m_pimpl->m_nms_max_accumulated_mask_size != static_cast(INVALID_NMS_CONFIG)) { + auto status = network_groups.value()[0]->set_nms_max_accumulated_mask_size(edge_name, output_pair.second.m_pimpl->m_nms_max_accumulated_mask_size); + CHECK_SUCCESS_AS_EXPECTED(status); + } } auto configured_infer_model_pimpl = ConfiguredInferModelImpl::create(network_groups.value()[0], inputs_formats, outputs_formats, - get_input_names(), get_output_names()); + get_input_names(), get_output_names(), m_vdevice); CHECK_EXPECTED(configured_infer_model_pimpl); + // The hef buffer is being used only when working with the service. + // TODO HRT-12636 - Besides clearing the hef buffer, clear also unnecessary members of Hef object. + // After HRT-12636 is done - The user can configure an infer model only once, with or without the service. + m_hef.pimpl->clear_hef_buffer(); + return ConfiguredInferModel(configured_infer_model_pimpl.release()); } Expected InferModel::configure_for_ut(std::shared_ptr async_infer_runner, - const std::vector &input_names, const std::vector &output_names) + const std::vector &input_names, const std::vector &output_names, + std::shared_ptr net_group) { - auto configure_params = m_vdevice.get().create_configure_params(m_hef); - CHECK_EXPECTED(configure_params); + if (nullptr == net_group) { + auto configure_params = m_vdevice.get().create_configure_params(m_hef); + CHECK_EXPECTED(configure_params); - auto network_groups = m_vdevice.get().configure(m_hef, configure_params.value()); - CHECK_EXPECTED(network_groups); + for (auto &network_group_name_params_pair : *configure_params) { + for (auto &stream_params_name_pair : network_group_name_params_pair.second.stream_params_by_name) { + stream_params_name_pair.second.flags = HAILO_STREAM_FLAGS_ASYNC; + } - auto configured_infer_model_pimpl = ConfiguredInferModelImpl::create_for_ut(network_groups.value()[0], async_infer_runner, input_names, output_names); + for (auto &network_name_params_pair : network_group_name_params_pair.second.network_params_by_name) { + network_name_params_pair.second.batch_size = m_config_params.batch_size; + } + + network_group_name_params_pair.second.power_mode = m_config_params.power_mode; + network_group_name_params_pair.second.latency = m_config_params.latency; + } + + auto network_groups = m_vdevice.get().configure(m_hef, configure_params.value()); + CHECK_EXPECTED(network_groups); + net_group = network_groups.value()[0]; + } + + auto configured_infer_model_pimpl = ConfiguredInferModelImpl::create_for_ut(net_group, async_infer_runner, input_names, output_names); CHECK_EXPECTED(configured_infer_model_pimpl); return ConfiguredInferModel(configured_infer_model_pimpl.release()); @@ -378,9 +426,9 @@ Expected ConfiguredInferModel::run_async(ConfiguredInferModel::Bi return m_pimpl->run_async(bindings, callback); } -Expected ConfiguredInferModel::get_hw_latency_measurement(const std::string &network_name) +Expected ConfiguredInferModel::get_hw_latency_measurement() { - return m_pimpl->get_hw_latency_measurement(network_name); + return m_pimpl->get_hw_latency_measurement(); } hailo_status ConfiguredInferModel::set_scheduler_timeout(const std::chrono::milliseconds &timeout) @@ -403,14 +451,31 @@ Expected ConfiguredInferModel::get_async_queue_size() return m_pimpl->get_async_queue_size(); } +void ConfiguredInferModel::shutdown() +{ + m_pimpl->abort(); +} + Expected> ConfiguredInferModelImpl::create(std::shared_ptr net_group, const std::unordered_map &inputs_formats, const std::unordered_map &outputs_formats, - const std::vector &input_names, const std::vector &output_names, const uint32_t timeout) + const std::vector &input_names, const std::vector &output_names, VDevice &vdevice, const uint32_t timeout) { auto async_infer_runner = AsyncInferRunnerImpl::create(net_group, inputs_formats, outputs_formats, timeout); CHECK_EXPECTED(async_infer_runner); + auto &hw_elem = async_infer_runner.value()->get_async_pipeline()->get_async_hw_element(); + for (auto &pool : hw_elem->get_hw_interacted_buffer_pools_h2d()) { + if (!pool->is_holding_user_buffers()) { + CHECK_SUCCESS_AS_EXPECTED(pool->map_to_vdevice(vdevice, HAILO_DMA_BUFFER_DIRECTION_H2D)); + } + } + for (auto &pool : hw_elem->get_hw_interacted_buffer_pools_d2h()) { + if (!pool->is_holding_user_buffers()) { + CHECK_SUCCESS_AS_EXPECTED(pool->map_to_vdevice(vdevice, HAILO_DMA_BUFFER_DIRECTION_D2H)); + } + } + auto configured_infer_model_pimpl = make_shared_nothrow(net_group, async_infer_runner.release(), input_names, output_names); CHECK_NOT_NULL_AS_EXPECTED(configured_infer_model_pimpl, HAILO_OUT_OF_HOST_MEMORY); @@ -527,14 +592,48 @@ hailo_status ConfiguredInferModelImpl::run(ConfiguredInferModel::Bindings bindin hailo_status ConfiguredInferModelImpl::validate_bindings(ConfiguredInferModel::Bindings bindings) { for (const auto &input_name : m_input_names) { - if (BufferType::VIEW == bindings.input(input_name)->m_pimpl->get_type()) { - CHECK_EXPECTED_AS_STATUS(bindings.input(input_name)->get_buffer()); - } else { - CHECK_EXPECTED_AS_STATUS(bindings.input(input_name)->get_pix_buffer()); + auto buffer_type = bindings.input(input_name)->m_pimpl->get_type(); + switch (buffer_type) { + case BufferType::VIEW: + { + CHECK_EXPECTED_AS_STATUS(bindings.input(input_name)->get_buffer()); + break; + } + case BufferType::PIX_BUFFER: + { + CHECK_EXPECTED_AS_STATUS(bindings.input(input_name)->get_pix_buffer()); + break; + } + case BufferType::DMA_BUFFER: + { + CHECK_EXPECTED_AS_STATUS(bindings.input(input_name)->get_dma_buffer()); + break; + } + default: + CHECK(false, HAILO_NOT_FOUND, "Couldnt find input buffer for '{}'", input_name); } } for (const auto &output_name : m_output_names) { - CHECK_EXPECTED_AS_STATUS(bindings.output(output_name)->get_buffer()); + auto buffer_type = bindings.output(output_name)->m_pimpl->get_type(); + switch (buffer_type) { + case BufferType::VIEW: + { + CHECK_EXPECTED_AS_STATUS(bindings.output(output_name)->get_buffer()); + break; + } + case BufferType::PIX_BUFFER: + { + CHECK(false, HAILO_NOT_SUPPORTED, "pix_buffer isn't supported for outputs in '{}'", output_name); + break; + } + case BufferType::DMA_BUFFER: + { + CHECK_EXPECTED_AS_STATUS(bindings.output(output_name)->get_dma_buffer()); + break; + } + default: + CHECK(false, HAILO_NOT_FOUND, "Couldnt find output buffer for '{}'", output_name); + } } return HAILO_SUCCESS; @@ -547,62 +646,40 @@ Expected ConfiguredInferModelImpl::run_async(ConfiguredInferModel auto job_pimpl = make_shared_nothrow(static_cast(m_input_names.size() + m_output_names.size())); CHECK_NOT_NULL_AS_EXPECTED(job_pimpl, HAILO_OUT_OF_HOST_MEMORY); - AsyncInferJob job(job_pimpl); TransferDoneCallbackAsyncInfer transfer_done = [this, bindings, job_pimpl, callback](hailo_status status) { bool should_call_callback = job_pimpl->stream_done(status); if (should_call_callback) { - { - std::unique_lock lock(m_mutex); - m_ongoing_parallel_transfers--; - } - m_cv.notify_all(); - auto final_status = (m_async_infer_runner->get_pipeline_status() == HAILO_SUCCESS) ? job_pimpl->completion_status() : m_async_infer_runner->get_pipeline_status(); - AsyncInferCompletionInfo completion_info(bindings, final_status); + AsyncInferCompletionInfo completion_info(final_status); callback(completion_info); job_pimpl->mark_callback_done(); - } - }; - for (const auto &input_name : m_input_names) { - auto buff_type = bindings.input(input_name)->m_pimpl->get_type(); - if (BufferType::VIEW == buff_type) { - auto buffer = bindings.input(input_name)->get_buffer(); - CHECK_EXPECTED(buffer, "Couldnt find input buffer for '{}'", input_name); - m_async_infer_runner->set_input(input_name, buffer.release(), transfer_done); - } else if (BufferType::PIX_BUFFER == buff_type) { - auto buffer = bindings.input(input_name)->get_pix_buffer(); - CHECK_EXPECTED(buffer, "Couldnt find input buffer for '{}'", input_name); - m_async_infer_runner->set_input(input_name, buffer.release(), transfer_done); - } else { - CHECK_AS_EXPECTED(false, HAILO_NOT_FOUND, "Couldnt find input buffer for '{}'", input_name); + { + std::unique_lock lock(m_mutex); + m_ongoing_parallel_transfers--; + } + m_cv.notify_all(); } - } - - for (const auto &output_name : m_output_names) { - auto buffer = bindings.output(output_name)->get_buffer(); - CHECK_EXPECTED(buffer, "Couldnt find output buffer for '{}'", output_name); - m_async_infer_runner->set_output(output_name, buffer.release(), transfer_done); - } + }; { std::unique_lock lock(m_mutex); - auto status = m_async_infer_runner->async_infer(); + auto status = m_async_infer_runner->run(bindings, transfer_done); CHECK_SUCCESS_AS_EXPECTED(status); m_ongoing_parallel_transfers++; } - m_cv.notify_all(); + AsyncInferJob job(job_pimpl); return job; } -Expected ConfiguredInferModelImpl::get_hw_latency_measurement(const std::string &network_name) +Expected ConfiguredInferModelImpl::get_hw_latency_measurement() { - return m_cng->get_latency_measurement(network_name); + return m_cng->get_latency_measurement(); } hailo_status ConfiguredInferModelImpl::set_scheduler_timeout(const std::chrono::milliseconds &timeout) @@ -683,7 +760,7 @@ hailo_status AsyncInferJob::Impl::wait(std::chrono::milliseconds timeout) bool was_successful = m_cv.wait_for(lock, timeout, [this] () -> bool { return (m_callback_called); }); - CHECK(was_successful, HAILO_TIMEOUT, "Waiting for async job to finish has failed with timeout {}!", timeout.count()); + CHECK(was_successful, HAILO_TIMEOUT, "Waiting for async job to finish has failed with timeout ({}ms)", timeout.count()); return HAILO_SUCCESS; } @@ -762,7 +839,7 @@ hailo_status ConfiguredInferModel::Bindings::InferStream::Impl::set_buffer(Memor return HAILO_SUCCESS; } -Expected ConfiguredInferModel::Bindings::InferStream::Impl::get_buffer() +Expected ConfiguredInferModel::Bindings::InferStream::Impl::get_buffer() const { CHECK_AS_EXPECTED(BufferType::VIEW == m_buffer_type, HAILO_INVALID_OPERATION, "Trying to get buffer as view for '{}', while it is not configured as view", m_name); @@ -785,6 +862,22 @@ Expected ConfiguredInferModel::Bindings::InferStream::Impl:: return cp; } +hailo_status ConfiguredInferModel::Bindings::InferStream::Impl::set_dma_buffer(hailo_dma_buffer_t dma_buffer) +{ + m_buffer_type = BufferType::DMA_BUFFER; + m_dma_buffer = dma_buffer; + + return HAILO_SUCCESS; +} + +Expected ConfiguredInferModel::Bindings::InferStream::Impl::get_dma_buffer() +{ + CHECK_AS_EXPECTED(BufferType::DMA_BUFFER == m_buffer_type, HAILO_INVALID_OPERATION, + "Trying to get buffer as dma_buffer for '{}', while it is not configured as dma_buffer", m_name); + auto cp = m_dma_buffer; + return cp; +} + BufferType ConfiguredInferModel::Bindings::InferStream::Impl::get_type() { return m_buffer_type; @@ -809,6 +902,11 @@ hailo_status ConfiguredInferModel::Bindings::InferStream::set_pix_buffer(const h return m_pimpl->set_pix_buffer(pix_buffer); } +hailo_status ConfiguredInferModel::Bindings::InferStream::set_dma_buffer(hailo_dma_buffer_t dma_buffer) +{ + return m_pimpl->set_dma_buffer(dma_buffer); +} + Expected ConfiguredInferModel::Bindings::InferStream::get_buffer() { return m_pimpl->get_buffer(); @@ -819,4 +917,9 @@ Expected ConfiguredInferModel::Bindings::InferStream::get_pi return m_pimpl->get_pix_buffer(); } +Expected ConfiguredInferModel::Bindings::InferStream::get_dma_buffer() +{ + return m_pimpl->get_dma_buffer(); +} + } /* namespace hailort */ diff --git a/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp b/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp index bdea8116..7f40dead 100644 --- a/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp +++ b/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp @@ -23,9 +23,11 @@ class ConfiguredInferModel::Bindings::InferStream::Impl public: Impl(const hailo_vstream_info_t &vstream_info); hailo_status set_buffer(MemoryView view); - Expected get_buffer(); + Expected get_buffer() const; hailo_status set_pix_buffer(const hailo_pix_buffer_t &pix_buffer); Expected get_pix_buffer(); + hailo_status set_dma_buffer(hailo_dma_buffer_t dma_buffer); + Expected get_dma_buffer(); BufferType get_type(); void set_stream_callback(TransferDoneCallbackAsyncInfer callback); @@ -36,6 +38,7 @@ class ConfiguredInferModel::Bindings::InferStream::Impl union { MemoryView m_view; hailo_pix_buffer_t m_pix_buffer; + hailo_dma_buffer_t m_dma_buffer; }; TransferDoneCallbackAsyncInfer m_stream_callback; }; @@ -45,8 +48,10 @@ class InferModel::InferStream::Impl public: Impl(const hailo_vstream_info_t &vstream_info) : m_vstream_info(vstream_info), m_user_buffer_format(vstream_info.format), m_nms_score_threshold(static_cast(INVALID_NMS_CONFIG)), m_nms_iou_threshold(static_cast(INVALID_NMS_CONFIG)), - m_nms_max_proposals_per_class(static_cast(INVALID_NMS_CONFIG)) - {} + m_nms_max_proposals_per_class(static_cast(INVALID_NMS_CONFIG)), m_nms_max_accumulated_mask_size(static_cast(INVALID_NMS_CONFIG)) + { + m_user_buffer_format.flags = HAILO_FORMAT_FLAGS_NONE; // Init user's format flags to NONE for transposed models + } std::string name() const; hailo_3d_image_shape_t shape() const; @@ -61,6 +66,7 @@ class InferModel::InferStream::Impl void set_nms_score_threshold(float32_t threshold); void set_nms_iou_threshold(float32_t threshold); void set_nms_max_proposals_per_class(uint32_t max_proposals_per_class); + void set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size); private: friend class InferModel; @@ -71,6 +77,7 @@ class InferModel::InferStream::Impl float32_t m_nms_score_threshold; float32_t m_nms_iou_threshold; uint32_t m_nms_max_proposals_per_class; + uint32_t m_nms_max_accumulated_mask_size; }; class AsyncInferJob::Impl @@ -95,7 +102,8 @@ class ConfiguredInferModelImpl public: static Expected> create(std::shared_ptr net_group, const std::unordered_map &inputs_formats, const std::unordered_map &outputs_formats, - const std::vector &input_names, const std::vector &output_names, const uint32_t timeout = HAILO_DEFAULT_VSTREAM_TIMEOUT_MS); + const std::vector &input_names, const std::vector &output_names, VDevice &vdevice, + const uint32_t timeout = HAILO_DEFAULT_VSTREAM_TIMEOUT_MS); ConfiguredInferModelImpl(std::shared_ptr cng, std::shared_ptr async_infer_runner, @@ -110,7 +118,7 @@ class ConfiguredInferModelImpl hailo_status run(ConfiguredInferModel::Bindings bindings, std::chrono::milliseconds timeout); Expected run_async(ConfiguredInferModel::Bindings bindings, std::function callback); - Expected get_hw_latency_measurement(const std::string &network_name); + Expected get_hw_latency_measurement(); hailo_status set_scheduler_timeout(const std::chrono::milliseconds &timeout); hailo_status set_scheduler_threshold(uint32_t threshold); hailo_status set_scheduler_priority(uint8_t priority); diff --git a/hailort/libhailort/src/net_flow/pipeline/inference_pipeline.cpp b/hailort/libhailort/src/net_flow/pipeline/inference_pipeline.cpp index 0cc4ad60..46d45829 100644 --- a/hailort/libhailort/src/net_flow/pipeline/inference_pipeline.cpp +++ b/hailort/libhailort/src/net_flow/pipeline/inference_pipeline.cpp @@ -218,7 +218,7 @@ hailo_status InferVStreams::infer(const std::map& input auto status = input_vstream.write(MemoryView::create_const( input_buffer.data() + offset, input_vstream.get_frame_size())); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { LOGGER__DEBUG("Input stream was aborted!"); return status; } @@ -249,7 +249,7 @@ hailo_status InferVStreams::infer(const std::map& input auto error_status = HAILO_SUCCESS; for (auto& result : results) { status = result->get(); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { continue; } if (HAILO_SUCCESS != status) { @@ -389,4 +389,19 @@ hailo_status InferVStreams::set_nms_max_proposals_per_class(uint32_t max_proposa return HAILO_SUCCESS; } +hailo_status InferVStreams::set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size) +{ + auto has_mask_output = false; + for (auto &ouput_vstream : m_outputs) { + if (HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK == ouput_vstream.get_info().format.order) { + has_mask_output = true; + CHECK_SUCCESS(ouput_vstream.set_nms_max_accumulated_mask_size(max_accumulated_mask_size)); + } + } + CHECK(has_mask_output, HAILO_INVALID_OPERATION, + "'set_nms_max_accumulated_mask_size()' is called, but there is no NMS WITH BYTE MASK output in this model."); + + return HAILO_SUCCESS; +} + } /* namespace hailort */ diff --git a/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.cpp b/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.cpp new file mode 100644 index 00000000..54260eef --- /dev/null +++ b/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.cpp @@ -0,0 +1,993 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file multi_io_elements.cpp + * @brief Implementation of the multiple input/outputs elements + **/ + +#include "net_flow/pipeline/vstream_internal.hpp" +#include "net_flow/pipeline/multi_io_elements.hpp" + +namespace hailort +{ + +BaseMuxElement::BaseMuxElement(size_t sink_count, const std::string &name, std::chrono::milliseconds timeout, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline, hailo_status &status) : + PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline), + m_timeout(timeout) +{ + m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE); + m_sinks.reserve(sink_count); + for (uint32_t i = 0; i < sink_count; ++i) { + m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK); + m_sink_name_to_index[m_sinks[i].name()] = i; + } + m_barrier = make_shared_nothrow(sink_count); + if (nullptr == m_barrier) { + status = HAILO_OUT_OF_HOST_MEMORY; + return; + } + status = HAILO_SUCCESS; +} + +std::vector BaseMuxElement::execution_pads() +{ + if (m_next_pads.size() == 0) { + if (PipelineDirection::PUSH == m_pipeline_direction) { + m_next_pads.reserve(m_sources.size()); + for (auto &source : m_sources ) { + m_next_pads.push_back(source.next()); + } + } else { + m_next_pads.reserve(m_sinks.size()); + for (auto &sink : m_sinks ) { + m_next_pads.push_back(sink.prev()); + } + } + } + return m_next_pads; +} + +hailo_status BaseMuxElement::execute_terminate(hailo_status error_status) +{ + if (m_is_terminated) { + return HAILO_SUCCESS; + } + + auto terminate_status = PipelineElement::execute_terminate(error_status); + + { + // Ensuring nothing currently runs + std::unique_lock lock(m_mutex); + } + m_barrier->terminate(); + + CHECK_SUCCESS(terminate_status); + + return HAILO_SUCCESS; +} + + +hailo_status BaseMuxElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) +{ + return HAILO_INVALID_OPERATION; +} + +void BaseMuxElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) +{ + assert(PipelineDirection::PUSH == m_pipeline_direction); + assert(m_next_pads.size() == 1); + + m_barrier->arrive_and_wait(); + if (HAILO_SUCCESS == m_pipeline_status->load()) { + std::unique_lock lock(m_mutex); + m_input_buffers[sink.name()] = std::move(buffer); + if (m_input_buffers.size() == m_sink_name_to_index.size()) { // Last sink to set its buffer + + for (auto &input_buffer : m_input_buffers) { + if (HAILO_SUCCESS != input_buffer.second.action_status()) { + handle_non_recoverable_async_error(input_buffer.second.action_status()); + m_input_buffers.clear(); + m_barrier->terminate(); + return; + } + } + + std::vector input_buffers(m_input_buffers.size()); + for (auto &input_buffer : m_input_buffers) { + input_buffers[m_sink_name_to_index[input_buffer.first]] = std::move(input_buffer.second); + } + + auto output = action(std::move(input_buffers), PipelineBuffer()); + if (HAILO_SUCCESS == output.status()) { + m_next_pads[0]->run_push_async(output.release()); + } else { + m_next_pads[0]->run_push_async(PipelineBuffer(output.status())); + } + + m_input_buffers.clear(); + } + } +} + +Expected BaseMuxElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/) +{ + CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION, + "PostInferElement {} does not support run_pull operation", name()); + std::vector inputs; + inputs.reserve(m_sinks.size()); + for (auto &sink : m_sinks) { + auto buffer = sink.prev()->run_pull(); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { + return make_unexpected(buffer.status()); + } + CHECK_EXPECTED(buffer); + + inputs.push_back(buffer.release()); + } + + auto output = action(std::move(inputs), std::move(optional)); + CHECK_EXPECTED(output); + + return output; +} + +Expected> NmsPostProcessMuxElement::create(std::shared_ptr nms_op, + const std::string &name, std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline) +{ + assert(nms_op->outputs_metadata().size() == 1); + auto vstream_info = nms_op->metadata()->get_output_vstream_info(); + CHECK_EXPECTED(vstream_info); + + auto duration_collector = DurationCollector::create(elem_flags); + CHECK_EXPECTED(duration_collector); + + hailo_status status = HAILO_UNINITIALIZED; + auto nms_elem_ptr = make_shared_nothrow(nms_op, name, timeout, + duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline, status); + CHECK_SUCCESS_AS_EXPECTED(status); + CHECK_AS_EXPECTED(nullptr != nms_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + + LOGGER__INFO("Created {}", nms_elem_ptr->description()); + return nms_elem_ptr; +} + +Expected> NmsPostProcessMuxElement::create(std::shared_ptr nms_op, + const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline) +{ + return NmsPostProcessMuxElement::create(nms_op, name, build_params.timeout, + build_params.elem_stats_flags, + build_params.pipeline_status, pipeline_direction, async_pipeline); +} + +Expected> NmsPostProcessMuxElement::create(std::shared_ptr nms_op, + const std::string &name, const hailo_vstream_params_t &vstream_params, + std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline) +{ + return NmsPostProcessMuxElement::create(nms_op, name, std::chrono::milliseconds(vstream_params.timeout_ms), + vstream_params.pipeline_elements_stats_flags, + pipeline_status, pipeline_direction, async_pipeline); +} + +NmsPostProcessMuxElement::NmsPostProcessMuxElement(std::shared_ptr nms_op, + const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline, hailo_status &status) : + BaseMuxElement(nms_op->inputs_metadata().size(), name, timeout, std::move(duration_collector), std::move(pipeline_status), + pipeline_direction, async_pipeline, status), + m_nms_op(nms_op) +{} + +Expected NmsPostProcessMuxElement::action(std::vector &&input_buffers, PipelineBuffer &&optional) +{ + std::map inputs; + std::map outputs; + for (size_t i = 0; i < input_buffers.size(); ++i) { + inputs.insert({m_sinks_names[i], input_buffers[i].as_view()}); + } + auto pool = next_pad_downstream().element().get_buffer_pool(); + assert(pool); + + auto acquired_buffer = pool->get_available_buffer(std::move(optional), m_timeout); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == acquired_buffer.status()) { + return make_unexpected(acquired_buffer.status()); + } + + if (!acquired_buffer) { + for (auto &input : input_buffers) { + input.set_action_status(acquired_buffer.status()); + } + } + CHECK_EXPECTED(acquired_buffer); + outputs.insert({"", acquired_buffer->as_view()}); // TODO: fill with correct name + m_duration_collector.start_measurement(); + + auto post_process_result = m_nms_op->execute(inputs, outputs); + m_duration_collector.complete_measurement(); + + for (auto &input : input_buffers) { + input.set_action_status(post_process_result); + } + acquired_buffer->set_action_status(post_process_result); + + if (post_process_result != HAILO_INSUFFICIENT_BUFFER) { + // In YOLOv5-Seg there is an option for the user to change the frame size. + // Therefore we want to return an error status if the buffer is not big enough for all the detections found. + // We return the actual buffer and the error status, + // so the user will be able to choose if the change the frame_size or ignore the rest of the detections. + CHECK_SUCCESS_AS_EXPECTED(post_process_result); + } + return acquired_buffer; +} + +std::string NmsPostProcessMuxElement::description() const +{ + std::stringstream element_description; + element_description << "(" << this->name() << " | " << m_nms_op->metadata()->get_op_description() << ")"; + return element_description.str(); +} + +static hailo_nms_info_t fuse_nms_info(const std::vector &nms_infos) +{ + hailo_nms_info_t fused_info = nms_infos[0]; + fused_info.is_defused = false; + fused_info.number_of_classes = 0; + for (const auto &nms_info : nms_infos) { + fused_info.number_of_classes += nms_info.number_of_classes; + assert(nms_infos[0].max_bboxes_per_class == nms_info.max_bboxes_per_class); + assert(nms_infos[0].bbox_size == nms_info.bbox_size); + assert(nms_infos[0].chunks_per_frame == nms_info.chunks_per_frame); + assert(nms_infos[0].burst_size == nms_info.burst_size); + assert(nms_infos[0].burst_type == nms_info.burst_type); + } + return fused_info; +} + +Expected> NmsMuxElement::create(const std::vector &nms_infos, + const std::string &name, std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status, + PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline) +{ + const auto &fused_info = fuse_nms_info(nms_infos); + + auto duration_collector = DurationCollector::create(elem_flags); + CHECK_EXPECTED(duration_collector); + + auto status = HAILO_UNINITIALIZED; + auto nms_elem_ptr = make_shared_nothrow(nms_infos, fused_info, name, timeout, + duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline, status); + CHECK_SUCCESS_AS_EXPECTED(status); + CHECK_AS_EXPECTED(nullptr != nms_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + + LOGGER__INFO("Created {}", nms_elem_ptr->description()); + + return nms_elem_ptr; +} + +Expected> NmsMuxElement::create(const std::vector &nms_infos, const std::string &name, + const hailo_vstream_params_t &vstream_params, std::shared_ptr> pipeline_status, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) +{ + return NmsMuxElement::create(nms_infos, name, std::chrono::milliseconds(vstream_params.timeout_ms), + vstream_params.pipeline_elements_stats_flags, pipeline_status, pipeline_direction, + async_pipeline); +} + +Expected> NmsMuxElement::create(const std::vector &nms_infos, + const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline) +{ + return NmsMuxElement::create(nms_infos, name, build_params.timeout, build_params.elem_stats_flags, + build_params.pipeline_status, pipeline_direction, async_pipeline); +} + +NmsMuxElement::NmsMuxElement(const std::vector &nms_infos, const hailo_nms_info_t &fused_nms_info, + const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline, hailo_status &status) : + BaseMuxElement(nms_infos.size(), name, timeout, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, + async_pipeline, status), + m_nms_infos(nms_infos), + m_fused_nms_info(fused_nms_info) +{} + +const hailo_nms_info_t &NmsMuxElement::get_fused_nms_info() const +{ + return m_fused_nms_info; +} + +Expected NmsMuxElement::action(std::vector &&inputs, PipelineBuffer &&optional) +{ + std::vector input_views; + + input_views.reserve(inputs.size()); + for (auto &input_buf : inputs) { + input_views.push_back(input_buf.as_view()); + } + auto pool = next_pad_downstream().element().get_buffer_pool(); + assert(pool); + + auto acquired_buffer = pool->get_available_buffer(std::move(optional), m_timeout); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == acquired_buffer.status()) { + return make_unexpected(acquired_buffer.status()); + } + + if (!acquired_buffer) { + for (auto &input : inputs) { + input.set_action_status(acquired_buffer.status()); + } + } + CHECK_AS_EXPECTED(HAILO_TIMEOUT != acquired_buffer.status(), HAILO_TIMEOUT, + "{} failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_timeout.count()); + CHECK_EXPECTED(acquired_buffer); + + m_duration_collector.start_measurement(); + const auto status = fuse_buffers(input_views, m_nms_infos, acquired_buffer.value().as_view()); + m_duration_collector.complete_measurement(); + + for (auto &input : inputs) { + input.set_action_status(status); + } + acquired_buffer->set_action_status(status); + + CHECK_SUCCESS_AS_EXPECTED(status); + + return acquired_buffer.release(); +} + +BaseDemuxElement::BaseDemuxElement(size_t source_count, const std::string &name, std::chrono::milliseconds timeout, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : + PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline), + m_timeout(timeout), + m_is_activated(false), + m_was_stream_aborted(false), + m_source_name_to_index(), + m_was_source_called(source_count, false), + m_buffers_for_action() +{ + m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK); + m_sources.reserve(source_count); + for (uint32_t i = 0; i < source_count; i++) { + m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE); + m_source_name_to_index[m_sources[i].name()] = i; + } +} + +hailo_status BaseDemuxElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/) +{ + CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION, + "BaseDemuxElement {} does not support run_push operation", name()); + + auto outputs = action(std::move(buffer)); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == outputs.status()) { + return outputs.status(); + } + CHECK_EXPECTED_AS_STATUS(outputs); + + for (const auto &pad : execution_pads()) { + assert(m_source_name_to_index.count(pad->prev()->name()) > 0); + auto source_index = m_source_name_to_index[pad->prev()->name()]; + auto status = pad->run_push(std::move(outputs.value()[source_index])); + + if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { + LOGGER__INFO("run_push of {} was shutdown!", name()); + return status; + } + if (HAILO_STREAM_ABORT == status) { + LOGGER__INFO("run_push of {} was aborted!", name()); + return status; + } + CHECK_SUCCESS(status); + } + + return HAILO_SUCCESS; +} + +void BaseDemuxElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/) +{ + assert(PipelineDirection::PUSH == m_pipeline_direction); + if (HAILO_SUCCESS != buffer.action_status()) { + for (const auto &pad : execution_pads()) { + auto source_index = m_source_name_to_index[pad->prev()->name()]; + auto pool = m_sources[source_index].next()->element().get_buffer_pool(); + assert(pool); + + auto acquired_buffer = pool->acquire_buffer(m_timeout); + if (HAILO_SUCCESS == acquired_buffer.status()) { + acquired_buffer->set_action_status(buffer.action_status()); + pad->run_push_async(acquired_buffer.release()); + } else { + handle_non_recoverable_async_error(acquired_buffer.status()); + } + } + return; + } + + auto outputs = action(std::move(buffer)); + + for (const auto &pad : execution_pads()) { + assert(m_source_name_to_index.count(pad->prev()->name()) > 0); + auto source_index = m_source_name_to_index[pad->prev()->name()]; + if (HAILO_SUCCESS == outputs.status()) { + pad->run_push_async(std::move(outputs.value()[source_index])); + } else { + pad->run_push_async(PipelineBuffer(outputs.status())); + } + } +} + +Expected BaseDemuxElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source) +{ + CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION, + "BaseDemuxElement {} does not support run_pull operation", name()); + + CHECK_AS_EXPECTED(!optional, HAILO_INVALID_ARGUMENT, "Optional buffer is not allowed in demux element!"); + + std::unique_lock lock(m_mutex); + if (!m_is_activated) { + return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); + } + + if (m_was_stream_aborted) { + return make_unexpected(HAILO_STREAM_ABORT); + } + + m_was_source_called[m_source_name_to_index[source.name()]] = true; + + if (were_all_srcs_arrived()) { + // If all srcs arrived, execute the demux + auto input = execution_pads()[0]->run_pull(); + if (HAILO_STREAM_ABORT == input.status()) { + LOGGER__INFO("run_pull of demux element was aborted!"); + m_was_stream_aborted = true; + lock.unlock(); + m_cv.notify_all(); + return make_unexpected(input.status()); + } + if (HAILO_SHUTDOWN_EVENT_SIGNALED == input.status()) { + LOGGER__INFO("run_pull of demux element was aborted in {} because pipeline deactivated!", name()); + m_is_activated = false; + lock.unlock(); + m_cv.notify_all(); + return make_unexpected(input.status()); + } + CHECK_EXPECTED(input); + + auto outputs = action(input.release()); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == outputs.status()) { + LOGGER__INFO("run_pull of demux element was aborted in {} because pipeline deactivated!", name()); + m_is_activated = false; + lock.unlock(); + m_cv.notify_all(); + return make_unexpected(outputs.status()); + } + CHECK_EXPECTED(outputs); + + m_buffers_for_action = outputs.release(); + + for (uint32_t i = 0; i < m_was_source_called.size(); i++) { + m_was_source_called[i] = false; + } + + // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again + lock.unlock(); + m_cv.notify_all(); + } else { + // If not all srcs arrived, wait until m_was_source_called is false (set to false after the demux execution) + auto wait_successful = m_cv.wait_for(lock, m_timeout, [&](){ + return !m_was_source_called[m_source_name_to_index[source.name()]] || m_was_stream_aborted || !m_is_activated; + }); + CHECK_AS_EXPECTED(wait_successful, HAILO_TIMEOUT, "Waiting for other threads in demux {} has reached a timeout (timeout={}ms)", name(), m_timeout.count()); + + if (m_was_stream_aborted) { + lock.unlock(); + m_cv.notify_all(); + return make_unexpected(HAILO_STREAM_ABORT); + } + + // We check if the element is not activated in case notify_all() was called from deactivate() + if (!m_is_activated) { + lock.unlock(); + m_cv.notify_all(); + return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); + } + } + + assert(m_source_name_to_index[source.name()] < m_buffers_for_action.size()); + return std::move(m_buffers_for_action[m_source_name_to_index[source.name()]]); +} + +bool BaseDemuxElement::were_all_srcs_arrived() +{ + return std::all_of(m_was_source_called.begin(), m_was_source_called.end(), [](bool v) { return v; }); +} + +hailo_status BaseDemuxElement::execute_activate() +{ + if (m_is_activated) { + return HAILO_SUCCESS; + } + m_is_activated = true;// TODO Should this always be true, no matter the status of source().activate()? + m_was_stream_aborted = false; + + return PipelineElementInternal::execute_activate(); +} + +hailo_status BaseDemuxElement::execute_deactivate() +{ + if (!m_is_activated) { + return HAILO_SUCCESS; + } + m_is_activated = false; + + // deactivate should be called before mutex acquire and notify_all because it is possible that all queues are waiting on + // the run_pull of the source (HwRead) and the mutex is already acquired so this would prevent a timeout error + hailo_status status = PipelineElementInternal::execute_deactivate(); + + { + // There is a case where the other thread is halted (via context switch) before the wait_for() function, + // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout. + // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here. + std::unique_lock lock(m_mutex); + } + m_cv.notify_all(); + + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + +hailo_status BaseDemuxElement::execute_post_deactivate(bool should_clear_abort) +{ + for (uint32_t i = 0; i < m_was_source_called.size(); i++) { + m_was_source_called[i] = false; + } + return PipelineElementInternal::execute_post_deactivate(should_clear_abort); +} + +hailo_status BaseDemuxElement::execute_abort() +{ + auto status = PipelineElementInternal::execute_abort(); + CHECK_SUCCESS(status); + { + // There is a case where the other thread is halted (via context switch) before the wait_for() function, + // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout. + // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here. + std::unique_lock lock(m_mutex); + } + m_cv.notify_all(); + + return HAILO_SUCCESS; +} + +hailo_status BaseDemuxElement::set_timeout(std::chrono::milliseconds timeout) +{ + m_timeout = timeout; + return HAILO_SUCCESS; +} + +Expected BaseDemuxElement::get_source_index_from_source_name(const std::string &source_name) +{ + CHECK_AS_EXPECTED(contains(m_source_name_to_index, source_name), HAILO_NOT_FOUND); + auto ret_val = m_source_name_to_index.at(source_name); + return ret_val; +} + +std::vector BaseDemuxElement::execution_pads() +{ + if (m_next_pads.size() == 0) + { + if (PipelineDirection::PUSH == m_pipeline_direction) { + m_next_pads.reserve(m_sources.size()); + for (auto &source : m_sources ) { + m_next_pads.push_back(source.next()); + } + } else { + m_next_pads.reserve(m_sinks.size()); + for (auto &sink : m_sinks ) { + m_next_pads.push_back(sink.prev()); + } + } + } + return m_next_pads; +} + +Expected> TransformDemuxElement::create(std::shared_ptr demuxer, + const std::string &name, std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) +{ + auto duration_collector = DurationCollector::create(elem_flags); + CHECK_EXPECTED(duration_collector); + + + auto demux_elem_ptr = make_shared_nothrow(demuxer, name, timeout, + duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline); + CHECK_AS_EXPECTED(nullptr != demux_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + + return demux_elem_ptr; +} + +Expected> TransformDemuxElement::create(std::shared_ptr demuxer, + const std::string &name, const ElementBuildParams &build_params, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) +{ + return TransformDemuxElement::create(demuxer, name, build_params.timeout, build_params.elem_stats_flags, + build_params.pipeline_status, pipeline_direction, async_pipeline); +} + +TransformDemuxElement::TransformDemuxElement(std::shared_ptr demuxer, + const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline) : + BaseDemuxElement(demuxer->get_edges_stream_info().size(), name, timeout, std::move(duration_collector), + std::move(pipeline_status), pipeline_direction, async_pipeline), + m_demuxer(demuxer) +{} + +Expected> TransformDemuxElement::action(PipelineBuffer &&input) +{ + std::vector outputs; + std::vector raw_buffers; + + auto mux_edges = m_demuxer->get_edges_stream_info(); + outputs.reserve(mux_edges.size()); + raw_buffers.reserve(mux_edges.size()); + + for (uint32_t i = 0; i < mux_edges.size(); i++) { + + auto pool = m_sources[i].next()->element().get_buffer_pool(); + assert(pool); + + auto acquired_buffer = pool->acquire_buffer(m_timeout); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == acquired_buffer.status()) { + return make_unexpected(acquired_buffer.status()); + } + + if (!acquired_buffer) { + input.set_action_status(acquired_buffer.status()); + } + CHECK_EXPECTED(acquired_buffer, "Failed to acquire buffer"); + outputs.emplace_back(acquired_buffer.release()); + raw_buffers.push_back(outputs.back().as_view()); + } + + m_duration_collector.start_measurement(); + const auto status = m_demuxer->transform_demux(input.as_view(), raw_buffers); + m_duration_collector.complete_measurement(); + + input.set_action_status(status); + for (auto &output : outputs) { + output.set_action_status(status); + } + + CHECK_SUCCESS_AS_EXPECTED(status); + + return outputs; +} + +PixBufferElement::PixBufferElement(const std::string &name, std::chrono::milliseconds timeout, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + hailo_format_order_t order, std::shared_ptr async_pipeline) : + BaseDemuxElement(((order == HAILO_FORMAT_ORDER_I420) ? NUMBER_OF_PLANES_I420 : NUMBER_OF_PLANES_NV12_NV21), + name, timeout, std::move(duration_collector), std::move(pipeline_status), + PipelineDirection::PUSH, async_pipeline), + m_order(order) +{} + +Expected> PixBufferElement::create(const std::string &name, + std::chrono::milliseconds timeout, DurationCollector &&duration_collector, + std::shared_ptr> pipeline_status, hailo_format_order_t order, + std::shared_ptr async_pipeline) +{ + auto pix_buffer_splitter_elem_ptr = make_shared_nothrow(name, timeout, + std::move(duration_collector), std::move(pipeline_status), order, async_pipeline); + CHECK_AS_EXPECTED(nullptr != pix_buffer_splitter_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + return pix_buffer_splitter_elem_ptr; +} + +Expected> PixBufferElement::action(PipelineBuffer &&input) +{ + // splits the planes into buffers + m_duration_collector.start_measurement(); + std::vector outputs; + + auto input_pix_buffer_expected = input.as_hailo_pix_buffer(m_order); + + if (!input_pix_buffer_expected) { + input.set_action_status(input_pix_buffer_expected.status()); + } + CHECK_EXPECTED(input_pix_buffer_expected); + auto input_pix_buffer = input_pix_buffer_expected.release(); + + if (PipelineBuffer::Type::FLUSH == input.get_type()) { + for (uint32_t i = 0; i < input_pix_buffer.number_of_planes; i++) { + outputs.emplace_back(PipelineBuffer(PipelineBuffer::Type::FLUSH)); + } + } else { + auto shared_input_buff = make_shared_nothrow(std::move(input)); + if (!shared_input_buff) { + handle_non_recoverable_async_error(HAILO_OUT_OF_HOST_MEMORY); + } + CHECK_NOT_NULL_AS_EXPECTED(shared_input_buff, HAILO_OUT_OF_HOST_MEMORY); + + for (uint32_t i = 0; i < input_pix_buffer.number_of_planes; i++) { + outputs.emplace_back(MemoryView(input_pix_buffer.planes[i].user_ptr, input_pix_buffer.planes[i].bytes_used), + [input_ptr = shared_input_buff](hailo_status status) + { + if (HAILO_SUCCESS != status) { + input_ptr->set_action_status(status); + } + }); + } + } + + m_duration_collector.complete_measurement(); + return outputs; +} + +Expected> AsyncHwElement::create(const std::unordered_map &named_stream_infos, + std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags, const std::string &name, + std::shared_ptr> pipeline_status, std::shared_ptr net_group, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) +{ + auto duration_collector = DurationCollector::create(elem_flags); + CHECK_EXPECTED(duration_collector); + + auto min_buffer_pool_size = net_group->get_min_buffer_pool_size(); + CHECK_EXPECTED(min_buffer_pool_size); + + auto status = HAILO_UNINITIALIZED; + auto elem_ptr = make_shared_nothrow(named_stream_infos, timeout, name, + duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline, net_group, + min_buffer_pool_size.release(), status); + CHECK_AS_EXPECTED(nullptr != elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + CHECK_SUCCESS_AS_EXPECTED(status); + + LOGGER__INFO("Created {}", elem_ptr->description()); + + return elem_ptr; +} + +AsyncHwElement::AsyncHwElement(const std::unordered_map &named_stream_infos, std::chrono::milliseconds timeout, + const std::string &name, DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline, std::shared_ptr net_group, + const size_t max_ongoing_transfers, hailo_status &status) : + PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline), + m_timeout(timeout), + m_net_group(net_group), + m_max_ongoing_transfers(max_ongoing_transfers) +{ + uint32_t sinks_count = 0; + uint32_t sources_count = 0; + for (const auto &stream_info_pair : named_stream_infos) { + if (HAILO_D2H_STREAM == stream_info_pair.second.direction) { + m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE); + const auto &source_name = m_sources[sources_count++].name(); + m_source_name_to_stream_name[source_name] = stream_info_pair.first; + + m_source_name_to_index[source_name] = static_cast(m_sources.size() - 1); + } else { + m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK); + const auto &sink_name = m_sinks[sinks_count++].name(); + m_sink_name_to_stream_name[sink_name] = stream_info_pair.first; + m_sink_name_to_index[sink_name] = static_cast(m_sinks.size() - 1); + } + } + m_barrier = make_shared_nothrow(sinks_count); + if (nullptr == m_barrier) { + status = HAILO_OUT_OF_HOST_MEMORY; + return; + } + status = HAILO_SUCCESS; +} + +// This func overrides the regular dataflow of this element and calls all next elements run_push_async directly +// (normally, the run_push_async of the next elements will be called by the LL async read_done) +void AsyncHwElement::handle_error_in_hw_async_elem(hailo_status error_status) +{ + for (auto &name_output_stream_pair : m_source_name_to_index) { + auto source_index = name_output_stream_pair.second; + assert(source_index < m_sources.size()); + + auto pool = m_sources[source_index].next()->element().get_buffer_pool(); + assert(pool); + + auto expected_buffer = pool->acquire_buffer(m_timeout); + if (HAILO_SUCCESS == expected_buffer.status()) { + expected_buffer->set_action_status(error_status); + m_sources[source_index].next()->run_push_async(expected_buffer.release()); + } else { + m_sources[source_index].next()->run_push_async(PipelineBuffer(error_status)); + } + } + + return; +} + +void AsyncHwElement::action() +{ + // Assuming m_input_buffers is full (has a valid buffer for all sinks) + for (auto &input_buffer : m_input_buffers) { + if (HAILO_SUCCESS != input_buffer.second.action_status()) { + handle_error_in_hw_async_elem(input_buffer.second.action_status()); + m_input_buffers.clear(); + return; + } + } + + // TODO: HRT-13324 Change to be map of + std::unordered_map> source_name_to_output_buffer; + for (auto &name_to_index_pair : m_source_name_to_index) { + auto pool = m_sources[name_to_index_pair.second].next()->element().get_buffer_pool(); + assert(pool); + + auto expected_buffer = pool->acquire_buffer(m_timeout); + if (HAILO_SUCCESS != expected_buffer.status()) { + handle_non_recoverable_async_error(expected_buffer.status()); + m_input_buffers.clear(); + m_barrier->terminate(); + return; + } + source_name_to_output_buffer[name_to_index_pair.first] = make_shared_nothrow(expected_buffer.release()); + } + + NamedBuffersCallbacks named_buffers_callbacks; + + for (auto &input_buffer : m_input_buffers) { + const auto &stream_name = m_sink_name_to_stream_name.at(input_buffer.first); + // std::function requires its lambda to be copyable, so using shared_ptr + auto buffer_shared = make_shared_nothrow(std::move(input_buffer.second)); + if (nullptr == buffer_shared) { + handle_non_recoverable_async_error(HAILO_OUT_OF_HOST_MEMORY); + m_input_buffers.clear(); + m_barrier->terminate(); + return; + } + named_buffers_callbacks.emplace(stream_name, std::make_pair(buffer_shared->as_view(), + [buffer_shared](hailo_status status) { buffer_shared->set_action_status(status); })); + } + + for (auto &output_buffer : source_name_to_output_buffer) { + const auto &stream_name = m_source_name_to_stream_name.at(output_buffer.first); + named_buffers_callbacks.emplace(stream_name, std::make_pair(output_buffer.second->as_view(), + [this, buffer = output_buffer.second, source_name = output_buffer.first](hailo_status status){ + buffer->set_action_status(status); + if (HAILO_SUCCESS == m_pipeline_status->load()) { + assert(contains(m_source_name_to_index, source_name)); + // If pipeline_status is not success, someone already handled this error and no reason for this buffer to be pushed + assert(contains(m_source_name_to_index, source_name)); + m_sources[m_source_name_to_index[source_name]].next()->run_push_async(std::move(*buffer)); + } + })); + } + + auto status = m_net_group->wait_for_ongoing_callbacks_count_under(m_max_ongoing_transfers); + if (HAILO_SUCCESS != status ) { + handle_non_recoverable_async_error(status); + m_input_buffers.clear(); + m_barrier->terminate(); + return; + } + + status = m_net_group->infer_async(named_buffers_callbacks, [](hailo_status){}); + if (HAILO_SUCCESS != status ) { + handle_non_recoverable_async_error(status); + m_input_buffers.clear(); + m_barrier->terminate(); + return; + } + + m_input_buffers.clear(); +} + +void AsyncHwElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) +{ + assert(contains(m_sink_name_to_index, sink.name())); + + m_barrier->arrive_and_wait(); + if (HAILO_SUCCESS == m_pipeline_status->load()) { + std::unique_lock lock(m_mutex); + m_input_buffers[sink.name()] = std::move(buffer); + if (m_input_buffers.size() == m_sink_name_to_index.size()) { // Last sink to set its buffer + action(); + } + } +} + +hailo_status AsyncHwElement::run_push(PipelineBuffer &&/*optional*/, const PipelinePad &/*sink*/) +{ + return HAILO_INVALID_OPERATION; +} + +Expected AsyncHwElement::get_source_index_from_output_stream_name(const std::string &output_stream_name) +{ + for (const auto &name_pair : m_source_name_to_stream_name) { + if (name_pair.second == output_stream_name) { + assert(contains(m_source_name_to_index, name_pair.first)); + uint32_t ret_val = m_source_name_to_index.at(name_pair.first); + return ret_val; + } + } + return make_unexpected(HAILO_NOT_FOUND); +} + +Expected AsyncHwElement::get_source_index_from_source_name(const std::string &source_name) +{ + CHECK_AS_EXPECTED(contains(m_source_name_to_index, source_name), HAILO_NOT_FOUND, "couldnt find src '{}'", source_name); + auto ret_val = m_source_name_to_index.at(source_name); + return ret_val; +} + +Expected AsyncHwElement::get_sink_index_from_input_stream_name(const std::string &input_stream_name) +{ + for (const auto &name_pair : m_sink_name_to_stream_name) { + if (name_pair.second == input_stream_name) { + return Expected(m_sink_name_to_index.at(name_pair.first)); + } + } + return make_unexpected(HAILO_INVALID_ARGUMENT); +} + +Expected AsyncHwElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/) +{ + return make_unexpected(HAILO_NOT_IMPLEMENTED); +} + +std::vector AsyncHwElement::execution_pads() +{ + std::vector result; + result.reserve(m_sources.size()); + for (auto& pad : m_sources) { + result.push_back(pad.next()); + } + return result; +} + +hailo_status AsyncHwElement::execute_terminate(hailo_status error_status) +{ + if (m_is_terminated) { + return HAILO_SUCCESS; + } + + m_barrier->terminate(); + + // Checking success of shutdown is best effort (terminate should be called even if shutdown fails) + auto shutdown_status = m_net_group->shutdown(); + auto terminate_status = PipelineElement::execute_terminate(error_status); + CHECK_SUCCESS(shutdown_status); + CHECK_SUCCESS(terminate_status); + + return HAILO_SUCCESS; +} + +std::vector> AsyncHwElement::get_hw_interacted_buffer_pools_h2d() +{ + std::vector> res; + for (auto &sink : m_sinks) { + res.push_back(sink.prev()->element().get_buffer_pool()); + } + return res; +} + +std::vector> AsyncHwElement::get_hw_interacted_buffer_pools_d2h() +{ + std::vector> res; + for (auto &source : m_sources) { + auto pools = source.element().get_buffer_pool(); + res.push_back(source.next()->element().get_buffer_pool()); + } + return res; +} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.hpp b/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.hpp new file mode 100644 index 00000000..b7468f1a --- /dev/null +++ b/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.hpp @@ -0,0 +1,279 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file multi_io_elements.hpp + * @brief all multiple inputs/outputs elements in the pipeline. + **/ + +#ifndef _HAILO_MULTI_IO_ELEMENTS_HPP_ +#define _HAILO_MULTI_IO_ELEMENTS_HPP_ + +#include "net_flow/ops_metadata/yolov5_seg_op_metadata.hpp" + +namespace hailort +{ + +class BaseMuxElement : public PipelineElementInternal +{ +public: + virtual ~BaseMuxElement() = default; + + virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; + +protected: + BaseMuxElement(size_t sink_count, const std::string &name, std::chrono::milliseconds timeout, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline, + hailo_status &status); + virtual hailo_status execute_terminate(hailo_status error_status) override; + virtual Expected action(std::vector &&inputs, PipelineBuffer &&optional) = 0; + virtual std::vector execution_pads() override; + + PipelinePad &next_pad_downstream() + { + return *m_sources[0].next(); + } + + std::chrono::milliseconds m_timeout; + +private: + std::mutex m_mutex; + std::unordered_map m_sink_name_to_index; + std::unordered_map m_input_buffers; + std::vector m_next_pads; + BarrierPtr m_barrier; +}; + +class NmsPostProcessMuxElement : public BaseMuxElement +{ +public: + static Expected> create(std::shared_ptr nms_op, + const std::string &name, std::chrono::milliseconds timeout, + hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + static Expected> create(std::shared_ptr nms_op, + const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + static Expected> create(std::shared_ptr nms_op, + const std::string &name, const hailo_vstream_params_t &vstream_params, + std::shared_ptr> pipeline_status, + PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + NmsPostProcessMuxElement(std::shared_ptr nms_op, const std::string &name, + std::chrono::milliseconds timeout, DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline, hailo_status &status); + virtual std::string description() const override; + + void add_sink_name(const std::string &name) // TODO: remove this (HRT-8875) + { + m_sinks_names.push_back(name); + } + + std::shared_ptr get_op() { return m_nms_op; } + + virtual hailo_status set_nms_score_threshold(float32_t threshold) + { + auto nms_metadata = std::dynamic_pointer_cast(get_op()->metadata()); + assert(nullptr != nms_metadata); + nms_metadata->nms_config().nms_score_th = threshold; + + return HAILO_SUCCESS; + } + + virtual hailo_status set_nms_iou_threshold(float32_t threshold) + { + auto nms_metadata = std::dynamic_pointer_cast(get_op()->metadata()); + assert(nullptr != nms_metadata); + nms_metadata->nms_config().nms_iou_th = threshold; + + return HAILO_SUCCESS; + } + + virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) + { + auto nms_metadata = std::dynamic_pointer_cast(get_op()->metadata()); + assert(nullptr != nms_metadata); + nms_metadata->nms_config().max_proposals_per_class = max_proposals_per_class; + + return HAILO_SUCCESS; + } + + virtual hailo_status set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size) + { + auto yolov5seg_metadata = std::dynamic_pointer_cast(get_op()->metadata()); + assert(nullptr != yolov5seg_metadata); + yolov5seg_metadata->yolov5seg_config().max_accumulated_mask_size = max_accumulated_mask_size; + + return HAILO_SUCCESS; + } + +protected: + virtual Expected action(std::vector &&inputs, PipelineBuffer &&optional) override; + +private: + std::shared_ptr m_nms_op; + std::vector m_sinks_names; // TODO: remove this (HRT-8875) +}; + +class NmsMuxElement : public BaseMuxElement +{ +public: + static Expected> create(const std::vector &nms_infos, + const std::string &name, std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status, + PipelineDirection pipeline_direction = PipelineDirection::PULL, std::shared_ptr async_pipeline = nullptr); + static Expected> create(const std::vector &nms_infos, const std::string &name, + const hailo_vstream_params_t &vstream_params, std::shared_ptr> pipeline_status, + PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + static Expected> create(const std::vector &nms_infos, + const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + NmsMuxElement(const std::vector &nms_infos, const hailo_nms_info_t &fused_nms_info, const std::string &name, + std::chrono::milliseconds timeout, DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline, hailo_status &status); + const hailo_nms_info_t &get_fused_nms_info() const; + +protected: + virtual Expected action(std::vector &&inputs, PipelineBuffer &&optional) override; + +private: + std::vector m_nms_infos; + hailo_nms_info_t m_fused_nms_info; +}; + +class BaseDemuxElement : public PipelineElementInternal +{ +public: + virtual ~BaseDemuxElement() = default; + + virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; + hailo_status set_timeout(std::chrono::milliseconds timeout); + + virtual Expected get_source_index_from_source_name(const std::string &source_name) override; + +protected: + BaseDemuxElement(size_t source_count, const std::string &name, std::chrono::milliseconds timeout, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); + virtual hailo_status execute_activate() override; + virtual hailo_status execute_deactivate() override; + virtual hailo_status execute_post_deactivate(bool should_clear_abort) override; + virtual hailo_status execute_abort() override; + virtual Expected> action(PipelineBuffer &&input) = 0; + virtual std::vector execution_pads() override; + + std::chrono::milliseconds m_timeout; + +private: + bool were_all_srcs_arrived(); + + std::atomic_bool m_is_activated; + std::atomic_bool m_was_stream_aborted; + std::unordered_map m_source_name_to_index; + std::vector m_was_source_called; + std::vector m_buffers_for_action; + std::mutex m_mutex; + std::condition_variable m_cv; + std::vector m_next_pads; +}; + +class TransformDemuxElement : public BaseDemuxElement +{ +public: + static Expected> create(std::shared_ptr demuxer, + const std::string &name, std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + static Expected> create(std::shared_ptr demuxer, + const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, + std::shared_ptr async_pipeline = nullptr); + TransformDemuxElement(std::shared_ptr demuxer, const std::string &name, + std::chrono::milliseconds timeout, DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); + +protected: + virtual Expected> action(PipelineBuffer &&input) override; + +private: + std::shared_ptr m_demuxer; +}; + +class PixBufferElement : public BaseDemuxElement +{ +public: + static Expected> create(const std::string &name, + std::chrono::milliseconds timeout, DurationCollector &&duration_collector, + std::shared_ptr> pipeline_status, hailo_format_order_t order, + std::shared_ptr async_pipeline = nullptr); + + PixBufferElement(const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status, hailo_format_order_t order, + std::shared_ptr async_pipeline); + +protected: + virtual Expected> action(PipelineBuffer &&input); + hailo_format_order_t m_order; +}; + +// Note: This element does infer - it sends writes to HW and reads the outputs +class AsyncHwElement : public PipelineElementInternal +{ +public: + static Expected> create(const std::unordered_map &named_stream_infos, + std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags, const std::string &name, + std::shared_ptr> pipeline_status, + std::shared_ptr net_group, PipelineDirection pipeline_direction = PipelineDirection::PUSH, + std::shared_ptr async_pipeline = nullptr); + AsyncHwElement(const std::unordered_map &named_stream_infos, std::chrono::milliseconds timeout, + const std::string &name, DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline, std::shared_ptr net_group, + const size_t max_ongoing_transfers, hailo_status &status); + virtual ~AsyncHwElement() = default; + + virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; + void action(); + virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; + + Expected get_source_index_from_output_stream_name(const std::string &output_stream_name); + Expected get_sink_index_from_input_stream_name(const std::string &input_stream_name); + virtual Expected get_source_index_from_source_name(const std::string &source_name) override; + + std::vector get_hw_interacted_buffer_pools_h2d(); + std::vector get_hw_interacted_buffer_pools_d2h(); + +protected: + virtual std::vector execution_pads() override; + virtual hailo_status execute_terminate(hailo_status error_status) override; + +private: + void handle_error_in_hw_async_elem(hailo_status error_status); + + std::chrono::milliseconds m_timeout; + std::shared_ptr m_net_group; + size_t m_max_ongoing_transfers; + + std::unordered_map m_sink_name_to_stream_name; + std::unordered_map m_source_name_to_stream_name; + std::unordered_map m_input_buffers; + std::mutex m_mutex; + std::unordered_map m_source_name_to_index; + std::unordered_map m_sink_name_to_index; + BarrierPtr m_barrier; +}; + + + +} /* namespace hailort */ + +#endif /* _HAILO_MULTI_IO_ELEMENTS_HPP_ */ diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp b/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp index 6360ed86..0dff98fe 100644 --- a/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp +++ b/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp @@ -13,7 +13,11 @@ #include "hailo/expected.hpp" #include "hailo/hailort.h" +#include "hailo/hailort_common.hpp" +#include "hailo/vdevice.hpp" #include "net_flow/pipeline/pipeline.hpp" +#include "utils/buffer_storage.hpp" + #include namespace hailort @@ -37,10 +41,6 @@ void PipelineBuffer::Metadata::set_start_time(PipelineTimePoint val) m_start_time = val; } -PipelineBuffer::PipelineBuffer() : - PipelineBuffer(Type::DATA) -{} - PipelineBuffer::PipelineBuffer(Type type) : m_type(type), m_pool(nullptr), @@ -70,22 +70,8 @@ PipelineBuffer::PipelineBuffer(hailo_status action_status, const TransferDoneCal }; } -PipelineBuffer::PipelineBuffer(MemoryView view, bool is_user_buffer, BufferPoolPtr pool, bool should_measure, hailo_status action_status) : - m_type(Type::DATA), - m_pool(pool), - m_view(view), - m_metadata(Metadata(add_timestamp(should_measure))), - m_is_user_buffer(is_user_buffer), - m_should_call_exec_done(true), - m_action_status(action_status) -{ - m_exec_done = [buffer_pool = m_pool, mem_view = m_view, is_user_buffer = m_is_user_buffer](hailo_status){ - release_buffer(buffer_pool, mem_view, is_user_buffer); - }; -} - -PipelineBuffer::PipelineBuffer(MemoryView view, const TransferDoneCallbackAsyncInfer &exec_done, bool is_user_buffer, BufferPoolPtr pool, bool should_measure, - hailo_status action_status) : +PipelineBuffer::PipelineBuffer(MemoryView view, const TransferDoneCallbackAsyncInfer &exec_done, hailo_status action_status, bool is_user_buffer, BufferPoolPtr pool, + bool should_measure) : m_type(Type::DATA), m_pool(pool), m_view(view), @@ -116,20 +102,6 @@ PipelineBuffer::PipelineBuffer(hailo_pix_buffer_t buffer, const TransferDoneCall }; } -PipelineBuffer::PipelineBuffer(hailo_pix_buffer_t buffer) : - m_type(Type::DATA), - m_pool(nullptr), - m_view(), - m_metadata(), - m_is_user_buffer(false), - m_should_call_exec_done(true) -{ - set_additional_data(std::make_shared(buffer)); - m_exec_done = [buffer_pool = m_pool, mem_view = m_view, is_user_buffer = m_is_user_buffer](hailo_status){ - release_buffer(buffer_pool, mem_view, is_user_buffer); - }; -} - PipelineBuffer::PipelineBuffer(PipelineBuffer &&other) : m_type(other.m_type), m_pool(std::move(other.m_pool)), @@ -196,43 +168,8 @@ Expected PipelineBuffer::as_hailo_pix_buffer(hailo_format_or auto pix_buffer = get_metadata().get_additional_data(); if (nullptr == pix_buffer) { - switch(order){ - case HAILO_FORMAT_ORDER_NV12: - case HAILO_FORMAT_ORDER_NV21: { - CHECK_AS_EXPECTED(0 == (m_view.size() % 3), HAILO_INVALID_ARGUMENT, "buffer size must be divisible by 3"); - - auto y_plane_size = m_view.size() * 2 / 3; - auto uv_plane_size = m_view.size() * 1 / 3; - - auto uv_data_ptr = reinterpret_cast(m_view.data()) + y_plane_size; - - hailo_pix_buffer_plane_t y {uint32_t(y_plane_size), uint32_t(y_plane_size), m_view.data()}; - hailo_pix_buffer_plane_t uv {uint32_t(uv_plane_size), uint32_t(uv_plane_size), uv_data_ptr}; - hailo_pix_buffer_t buffer{0, {y, uv}, NUMBER_OF_PLANES_NV12_NV21}; - - return buffer; - } - case HAILO_FORMAT_ORDER_I420: { - CHECK_AS_EXPECTED(0 == (m_view.size() % 6), HAILO_INVALID_ARGUMENT, "buffer size must be divisible by 6"); - - auto y_plane_size = m_view.size() * 2 / 3; - auto u_plane_size = m_view.size() * 1 / 6; - auto v_plane_size = m_view.size() * 1 / 6; - - auto u_data_ptr = (char*)m_view.data() + y_plane_size; - auto v_data_ptr = u_data_ptr + u_plane_size; - - hailo_pix_buffer_plane_t y {uint32_t(y_plane_size), uint32_t(y_plane_size), m_view.data()}; - hailo_pix_buffer_plane_t u {uint32_t(u_plane_size), uint32_t(u_plane_size), u_data_ptr}; - hailo_pix_buffer_plane_t v {uint32_t(v_plane_size), uint32_t(v_plane_size), v_data_ptr}; - hailo_pix_buffer_t buffer{0, {y, u, v}, NUMBER_OF_PLANES_I420}; - - return buffer; - } - default: { - CHECK_AS_EXPECTED(false, HAILO_INTERNAL_FAILURE, "unsupported format order"); - } - } + auto mem_view = as_view(); + return HailoRTCommon::as_hailo_pix_buffer(mem_view, order); } else { uint32_t expected_number_of_planes; switch(order){ @@ -262,12 +199,6 @@ void PipelineBuffer::set_metadata(Metadata &&val) m_metadata = std::move(val); } -TransferDoneCallbackAsyncInfer PipelineBuffer::get_exec_done_cb() -{ - m_should_call_exec_done = false; - return m_exec_done; -} - PipelineTimePoint PipelineBuffer::add_timestamp(bool should_measure) { return should_measure ? std::chrono::steady_clock::now() : PipelineTimePoint{}; @@ -293,6 +224,14 @@ void PipelineBuffer::set_action_status(hailo_status status) m_action_status = status; } +void PipelineBuffer::call_exec_done() +{ + if (m_should_call_exec_done) { + m_exec_done(action_status()); + m_should_call_exec_done = false; + } +} + Expected BufferPool::create(size_t buffer_size, size_t buffer_count, EventPtr shutdown_event, hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, bool is_empty, bool is_dma_able) @@ -345,31 +284,33 @@ BufferPool::BufferPool(size_t buffer_size, bool is_holding_user_buffers, bool me m_buffers(std::move(buffers)), m_free_mem_views(std::move(free_mem_views)), m_done_cbs(std::move(done_cbs)), - m_queue_size_accumulator(std::move(queue_size_accumulator)) + m_queue_size_accumulator(std::move(queue_size_accumulator)), + m_is_already_running(false) { } size_t BufferPool::buffer_size() { - return m_buffer_size; + std::unique_lock lock(m_buffer_size_mutex); + return m_buffer_size.load(); } -hailo_status BufferPool::enqueue_buffer(MemoryView mem_view) +hailo_status BufferPool::enqueue_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done) { - CHECK(mem_view.size() == m_buffer_size, HAILO_INTERNAL_FAILURE, "Buffer size is not the same as expected for pool! ({} != {})", mem_view.size(), m_buffer_size); + m_is_already_running = true; + auto pool_buffer_size = buffer_size(); + CHECK(mem_view.size() == pool_buffer_size, HAILO_INTERNAL_FAILURE, + "Buffer size is not the same as expected for pool! ({} != {})", mem_view.size(), pool_buffer_size); + std::unique_lock lock(m_enqueue_mutex); auto status = m_free_mem_views.enqueue(mem_view); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { + return HAILO_SHUTDOWN_EVENT_SIGNALED; + } CHECK_SUCCESS(status); - return HAILO_SUCCESS; -} - -hailo_status BufferPool::enqueue_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done) -{ - auto status = enqueue_buffer(mem_view); - CHECK_SUCCESS(status); - - status = m_done_cbs.enqueue(exec_done); + // TODO: Stop using 2 queues, hold a queue of pipeline_buffer instead. + status = m_done_cbs.enqueue(exec_done, true); // we get here only if acquire_free_mem_view succeeded, so we want to push cb to keep sync between the queues CHECK_SUCCESS(status); return HAILO_SUCCESS; @@ -377,12 +318,12 @@ hailo_status BufferPool::enqueue_buffer(MemoryView mem_view, const TransferDoneC bool BufferPool::is_full() { - return (m_max_buffer_count - m_free_mem_views.size_approx() == 0); + return (m_max_buffer_count - num_of_buffers_in_pool() == 0); } size_t BufferPool::num_of_buffers_in_pool() { - return m_done_cbs.size_approx(); + return m_free_mem_views.size_approx(); } bool BufferPool::is_holding_user_buffers() @@ -390,57 +331,18 @@ bool BufferPool::is_holding_user_buffers() return m_is_holding_user_buffers; } -// This function changes the m_max_buffer_count to be num_of_buffers, and it must be called when pool is empty of buffers -hailo_status BufferPool::allocate_buffers(bool is_dma_able, size_t num_of_buffers) -{ - m_is_holding_user_buffers = false; - CHECK(m_free_mem_views.size_approx() == 0, HAILO_INTERNAL_FAILURE, "Cannot allocate buffers for pool, since pool is not empty!"); - m_max_buffer_count = num_of_buffers; - for (size_t i = 0; i < m_max_buffer_count; i++) { - BufferStorageParams buffer_storage_params; - if (is_dma_able) { - buffer_storage_params = BufferStorageParams::create_dma(); - } - auto buffer = Buffer::create(m_buffer_size, buffer_storage_params); - CHECK_EXPECTED_AS_STATUS(buffer); - - auto status = m_free_mem_views.enqueue(MemoryView(buffer.value())); - CHECK_SUCCESS(status); - m_buffers.emplace_back(buffer.release()); - } - return HAILO_SUCCESS; -} - Expected BufferPool::acquire_buffer(std::chrono::milliseconds timeout, bool ignore_shutdown_event) { - auto mem_view = acquire_free_mem_view(timeout, ignore_shutdown_event); - if ((HAILO_SUCCESS != mem_view.status()) && (m_is_holding_user_buffers)) { - auto done_cb = acquire_on_done_cb(timeout, true); - CHECK_EXPECTED(done_cb); - - done_cb.value()(mem_view.status()); - } - if (HAILO_SHUTDOWN_EVENT_SIGNALED == mem_view.status()) { - return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); - } - CHECK_EXPECTED(mem_view); - - if (m_is_holding_user_buffers) { - auto done_cb = acquire_on_done_cb(timeout, true); - CHECK_EXPECTED(done_cb); + m_is_already_running = true; - return PipelineBuffer(mem_view.release(), done_cb.release(), m_is_holding_user_buffers, shared_from_this(), m_measure_vstream_latency); - } - - return PipelineBuffer(mem_view.release(), m_is_holding_user_buffers, shared_from_this(), m_measure_vstream_latency); -} - -Expected> BufferPool::acquire_buffer_ptr(std::chrono::milliseconds timeout) -{ - auto mem_view = acquire_free_mem_view(timeout); + std::unique_lock lock(m_dequeue_mutex); + auto mem_view = acquire_free_mem_view(timeout, ignore_shutdown_event); if ((HAILO_SUCCESS != mem_view.status()) && (m_is_holding_user_buffers)) { - auto done_cb = acquire_on_done_cb(timeout, true); + auto done_cb = acquire_on_done_cb(timeout, ignore_shutdown_event); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == done_cb.status()) { + return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); + } CHECK_EXPECTED(done_cb); done_cb.value()(mem_view.status()); @@ -450,18 +352,17 @@ Expected> BufferPool::acquire_buffer_ptr(std::ch } CHECK_EXPECTED(mem_view); - std::shared_ptr ptr = nullptr; if (m_is_holding_user_buffers) { - auto done_cb = acquire_on_done_cb(timeout, true); + auto done_cb = acquire_on_done_cb(timeout, true); // we get here only if acquire_free_mem_view succeeded, so we want to pop cb to keep sync between the queues + if (HAILO_SHUTDOWN_EVENT_SIGNALED == done_cb.status()) { + return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); + } CHECK_EXPECTED(done_cb); - ptr = make_shared_nothrow(mem_view.release(), done_cb.release(), m_is_holding_user_buffers, shared_from_this(), m_measure_vstream_latency); - } else { - ptr = make_shared_nothrow(mem_view.release(), m_is_holding_user_buffers, shared_from_this(), m_measure_vstream_latency); + return PipelineBuffer(mem_view.release(), done_cb.release(), HAILO_SUCCESS, m_is_holding_user_buffers, shared_from_this(), m_measure_vstream_latency); } - CHECK_NOT_NULL_AS_EXPECTED(ptr, HAILO_OUT_OF_HOST_MEMORY); - return ptr; + return PipelineBuffer(mem_view.release(), [](hailo_status){}, HAILO_SUCCESS, m_is_holding_user_buffers, shared_from_this(), m_measure_vstream_latency); } Expected BufferPool::acquire_free_mem_view(std::chrono::milliseconds timeout, @@ -511,10 +412,13 @@ AccumulatorPtr BufferPool::get_queue_size_accumulator() Expected BufferPool::get_available_buffer(PipelineBuffer &&optional, std::chrono::milliseconds timeout) { + m_is_already_running = true; + if (optional) { - CHECK_AS_EXPECTED(optional.size() == buffer_size(), HAILO_INVALID_OPERATION, + auto pool_buffer_size = buffer_size(); + CHECK_AS_EXPECTED(optional.size() == pool_buffer_size, HAILO_INVALID_OPERATION, "Optional buffer size must be equal to pool buffer size. Optional buffer size = {}, buffer pool size = {}", - optional.size(), buffer_size()); + optional.size(), pool_buffer_size); return std::move(optional); } @@ -528,11 +432,31 @@ Expected BufferPool::get_available_buffer(PipelineBuffer &&optio hailo_status BufferPool::release_buffer(MemoryView mem_view) { - std::unique_lock lock(m_release_buffer_mutex); + std::unique_lock lock(m_enqueue_mutex); // This can be called after the shutdown event was signaled so we ignore it here return m_free_mem_views.enqueue(std::move(mem_view), true); } +hailo_status BufferPool::map_to_vdevice(VDevice &vdevice, hailo_dma_buffer_direction_t direction) +{ + for (auto &buff : m_buffers) { + auto dma_mapped_buffer = DmaMappedBuffer::create(vdevice, buff.data(), buff.size(), direction); + CHECK_EXPECTED(dma_mapped_buffer); + m_dma_mapped_buffers.emplace_back(dma_mapped_buffer.release()); + } + return HAILO_SUCCESS; +} + +hailo_status BufferPool::set_buffer_size(uint32_t buffer_size) +{ + std::unique_lock lock(m_buffer_size_mutex); + CHECK(!m_is_already_running, HAILO_INVALID_OPERATION, + "Setting buffer size of pool size after starting inference in not allowed"); + + m_buffer_size = buffer_size; + return HAILO_SUCCESS; +} + Expected DurationCollector::create(hailo_pipeline_elem_stats_flags_t flags, uint32_t num_frames_before_collection_start) { @@ -629,7 +553,7 @@ const std::string &PipelineObject::name() const std::string PipelineObject::create_element_name(const std::string &element_name, const std::string &stream_name, uint8_t stream_index) { std::stringstream name; - name << element_name << static_cast(stream_index) << "_" << stream_name; + name << element_name << static_cast(stream_index) << stream_name; return name.str(); } @@ -719,11 +643,6 @@ hailo_status PipelinePad::dequeue_user_buffers(hailo_status error_status) return m_element.dequeue_user_buffers(error_status); } -hailo_status PipelinePad::wait_for_finish() -{ - return m_element.wait_for_finish(); -} - hailo_status PipelinePad::clear_abort() { return m_element.clear_abort(); @@ -873,22 +792,64 @@ std::string PipelineElement::description() const return element_description.str(); } -hailo_status PipelineElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) +std::string PipelineElement::links_description() const +{ + std::stringstream element_base_description; + + element_base_description << "| inputs:"; + if ((!sinks().empty()) && (nullptr != sinks()[0].prev())) { + for(const auto &sink : sinks()) { + if (sink.prev()) { + element_base_description << " " << sink.prev()->element().name(); + } + } + } else { + element_base_description << " user"; + } + + element_base_description << " | outputs:"; + if ((!sources().empty()) && (nullptr != sources()[0].next())) { + for(const auto &source : sources()) { + if (source.next()) { + element_base_description << " " << source.next()->element().name(); + } + } + } else { + element_base_description << " user"; + } + + return element_base_description.str(); +} + +void PipelineElement::print_deep_description(std::vector &visited_elements) +{ + auto visited_node = find(visited_elements.begin(), visited_elements.end(), this->name()); + if (visited_elements.end() != visited_node) { + return; + } + + LOGGER__INFO("{} {}", this->name().c_str(), this->links_description().c_str()); + visited_elements.emplace_back(this->name()); + + for (auto &source : sources()) { + source.next()->element().print_deep_description(visited_elements); + } +} + +hailo_status PipelineElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done) { (void)mem_view; (void)exec_done; - (void)source_name; LOGGER__ERROR("enqueue_execution_buffer is not implemented for {}!", name()); return HAILO_NOT_IMPLEMENTED; }; -hailo_status PipelineElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done) -{ - return enqueue_execution_buffer(mem_view, exec_done, ""); -}; - hailo_status PipelineElement::empty_buffer_pool(BufferPoolPtr pool, hailo_status error_status, std::chrono::milliseconds timeout) { + if (!pool) { + return HAILO_SUCCESS; + } + if (!pool->is_holding_user_buffers()) { return HAILO_SUCCESS; } @@ -901,38 +862,17 @@ hailo_status PipelineElement::empty_buffer_pool(BufferPoolPtr pool, hailo_status return acquired_buffer.status(); } - auto exec_done_cb = acquired_buffer->get_exec_done_cb(); - exec_done_cb(error_status); + acquired_buffer->set_action_status(error_status); } return HAILO_SUCCESS; } -hailo_status PipelineElement::fill_buffer_pool(bool /*is_dma_able*/, size_t /*num_of_buffers*/, const uint32_t /*source_index*/) -{ - return HAILO_NOT_IMPLEMENTED; -} - -Expected PipelineElement::can_push_buffer_upstream(const uint32_t /*source_index*/) -{ - return make_unexpected(HAILO_NOT_IMPLEMENTED); -} - -Expected PipelineElement::can_push_buffer_downstream(const uint32_t /*source_index*/) -{ - return make_unexpected(HAILO_NOT_IMPLEMENTED); -} - -hailo_status PipelineElement::fill_buffer_pool(bool /*is_dma_able*/, size_t /*num_of_buffers*/, const std::string &/*source_name*/) -{ - return HAILO_NOT_IMPLEMENTED; -} - -Expected PipelineElement::can_push_buffer_upstream(const std::string &/*source_name*/) +Expected PipelineElement::can_push_buffer_upstream() { return make_unexpected(HAILO_NOT_IMPLEMENTED); } -Expected PipelineElement::can_push_buffer_downstream(const std::string &/*source_name*/) +Expected PipelineElement::can_push_buffer_downstream() { return make_unexpected(HAILO_NOT_IMPLEMENTED); } @@ -982,11 +922,6 @@ hailo_status PipelineElement::dequeue_user_buffers(hailo_status error_status) return execute_dequeue_user_buffers(error_status); } -hailo_status PipelineElement::wait_for_finish() -{ - return execute_wait_for_finish(); -} - hailo_status PipelineElement::execute_activate() { return execute([&](auto *pad){ return pad->activate(); }); @@ -1033,11 +968,6 @@ hailo_status PipelineElement::execute_dequeue_user_buffers(hailo_status error_st return execute([&](auto *pad){ return pad->dequeue_user_buffers(error_status); }); } -hailo_status PipelineElement::execute_wait_for_finish() -{ - return execute([&](auto *pad){ return pad->wait_for_finish(); }); -} - hailo_status PipelineElement::execute(std::function func) { for (auto pad : execution_pads()) { diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp b/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp index 8689a7ba..67d7d2d8 100644 --- a/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp +++ b/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp @@ -14,6 +14,7 @@ #include "hailo/expected.hpp" #include "hailo/hailort.h" #include "hailo/runtime_statistics.hpp" +#include "hailo/dma_mapped_buffer.hpp" #include "net_flow/ops/nms_post_process.hpp" #include "utils/thread_safe_queue.hpp" @@ -38,6 +39,7 @@ enum class BufferType UNINITIALIZED, VIEW, PIX_BUFFER, + DMA_BUFFER, }; using TransferDoneCallbackAsyncInfer = std::function; @@ -46,8 +48,7 @@ using PipelineTimePoint = std::chrono::steady_clock::time_point; #define BUFFER_POOL_DEFAULT_QUEUE_TIMEOUT (std::chrono::milliseconds(10000)) #define DEFAULT_NUM_FRAMES_BEFORE_COLLECTION_START (100) -#define NUMBER_OF_PLANES_NV12_NV21 (2) -#define NUMBER_OF_PLANES_I420 (3) +class VDevice; struct AdditionalData {}; @@ -110,14 +111,13 @@ class PipelineBuffer final }; // Creates an empty PipelineBuffer (with no buffer/memory view) - PipelineBuffer(); PipelineBuffer(Type type); - PipelineBuffer(hailo_status status, const TransferDoneCallbackAsyncInfer &exec_done = [](hailo_status){}); - PipelineBuffer(MemoryView view, bool is_user_buffer = true, BufferPoolPtr pool = nullptr, bool should_measure = false, hailo_status status = HAILO_SUCCESS); - PipelineBuffer(MemoryView view, const TransferDoneCallbackAsyncInfer &exec_done, - bool is_user_buffer = true, BufferPoolPtr pool = nullptr, bool should_measure = false, hailo_status status = HAILO_SUCCESS); - PipelineBuffer(hailo_pix_buffer_t buffer); - PipelineBuffer(hailo_pix_buffer_t buffer, const TransferDoneCallbackAsyncInfer &exec_done); + // TODO HRT-12185: remove the option to pass a lambda as a parameter and save it as a member since it increases the memory consumption Significantly + PipelineBuffer(hailo_status action_status = HAILO_SUCCESS, const TransferDoneCallbackAsyncInfer &exec_done = [](hailo_status){}); + PipelineBuffer(MemoryView view, const TransferDoneCallbackAsyncInfer &exec_done = [](hailo_status){}, + hailo_status action_status = HAILO_SUCCESS, bool is_user_buffer = true, BufferPoolPtr pool = nullptr, bool should_measure = false); + PipelineBuffer(hailo_pix_buffer_t buffer, const TransferDoneCallbackAsyncInfer &exec_done = [](hailo_status){}); + ~PipelineBuffer(); PipelineBuffer(const PipelineBuffer &) = delete; @@ -134,9 +134,9 @@ class PipelineBuffer final Metadata get_metadata() const; void set_metadata(Metadata &&val); void set_additional_data(std::shared_ptr data) { m_metadata.set_additional_data(data);} - TransferDoneCallbackAsyncInfer get_exec_done_cb(); hailo_status action_status(); void set_action_status(hailo_status status); + void call_exec_done(); private: Type m_type; @@ -166,23 +166,22 @@ class BufferPool : public std::enable_shared_from_this virtual ~BufferPool() = default; size_t buffer_size(); - hailo_status enqueue_buffer(MemoryView mem_view); - hailo_status enqueue_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done); - hailo_status allocate_buffers(bool is_dma_able, size_t num_of_buffers); + hailo_status enqueue_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done = [](hailo_status){}); Expected acquire_buffer(std::chrono::milliseconds timeout, bool ignore_shutdown_event = false); - Expected> acquire_buffer_ptr(std::chrono::milliseconds timeout); AccumulatorPtr get_queue_size_accumulator(); Expected get_available_buffer(PipelineBuffer &&optional, std::chrono::milliseconds timeout); bool is_full(); size_t num_of_buffers_in_pool(); bool is_holding_user_buffers(); + hailo_status map_to_vdevice(VDevice &vdevice, hailo_dma_buffer_direction_t direction); + hailo_status set_buffer_size(uint32_t buffer_size); private: Expected acquire_free_mem_view(std::chrono::milliseconds timeout, bool ignore_shutdown_event = false); Expected acquire_on_done_cb(std::chrono::milliseconds timeout, bool ignore_shutdown_event = false); hailo_status release_buffer(MemoryView mem_view); - const size_t m_buffer_size; + std::atomic m_buffer_size; bool m_is_holding_user_buffers; size_t m_max_buffer_count; const bool m_measure_vstream_latency; @@ -192,10 +191,20 @@ class BufferPool : public std::enable_shared_from_this // So when the pool has allocated buffers, it will hold them in the vector and have pointers to them in the queue. // And when the pool holds user buffers, the vector will be empty and only the queue will hold the user's buffers. std::vector m_buffers; + + // When m_buffers is not empty, and we need to pre-map the buffers to the vdevice, this vector will hold reference + // to the mapping objects. + std::vector m_dma_mapped_buffers; + SpscQueue m_free_mem_views; SpscQueue m_done_cbs; AccumulatorPtr m_queue_size_accumulator; - std::mutex m_release_buffer_mutex; + // we have enqueue and dequeue mutex to allow mpmc + std::mutex m_enqueue_mutex; + std::mutex m_dequeue_mutex; + std::mutex m_buffer_size_mutex; + + std::atomic m_is_already_running; friend class PipelineBuffer; }; @@ -292,7 +301,6 @@ class PipelinePad final : public PipelineObject hailo_status abort(); hailo_status terminate(hailo_status error_status); hailo_status dequeue_user_buffers(hailo_status error_status); - hailo_status wait_for_finish(); hailo_status clear_abort(); virtual hailo_status run_push(PipelineBuffer &&buffer); void run_push_async(PipelineBuffer &&buffer); @@ -345,7 +353,6 @@ class PipelineElement : public PipelineObject hailo_status terminate(hailo_status error_status); hailo_status dequeue_user_buffers(hailo_status error_status); hailo_status clear_abort(); - hailo_status wait_for_finish(); AccumulatorPtr get_fps_accumulator(); AccumulatorPtr get_latency_accumulator(); bool is_terminating_element(); @@ -355,16 +362,13 @@ class PipelineElement : public PipelineObject const std::vector &sinks() const; const std::vector &sources() const; virtual std::string description() const; + std::string links_description() const; + void print_deep_description(std::vector &visited_elements); - virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name); - hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done); + virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done); hailo_status empty_buffer_pool(BufferPoolPtr pool, hailo_status error_status, std::chrono::milliseconds timeout); - virtual Expected can_push_buffer_upstream(const uint32_t source_index = UINT32_MAX); - virtual Expected can_push_buffer_downstream(const uint32_t source_index = UINT32_MAX); - virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index = UINT32_MAX); - virtual Expected can_push_buffer_upstream(const std::string &source_name = ""); - virtual Expected can_push_buffer_downstream(const std::string &source_name = ""); - virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name = ""); + virtual Expected can_push_buffer_upstream(); + virtual Expected can_push_buffer_downstream(); virtual Expected get_source_index_from_source_name(const std::string &/*source_name*/) { // This function is overriden in multi-srcs elements @@ -383,6 +387,16 @@ class PipelineElement : public PipelineObject return HAILO_INVALID_OPERATION; } + virtual hailo_status set_nms_max_accumulated_mask_size(uint32_t /*max_accumulated_mask_size*/) { + return HAILO_INVALID_OPERATION; + } + + virtual BufferPoolPtr get_buffer_pool() const + { + // This method should be overriden by element with local pools + return nullptr; + } + protected: DurationCollector m_duration_collector; std::shared_ptr> m_pipeline_status; @@ -405,7 +419,6 @@ class PipelineElement : public PipelineObject virtual hailo_status execute_terminate(hailo_status error_status); virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status); virtual hailo_status execute_clear_abort(); - virtual hailo_status execute_wait_for_finish(); virtual hailo_status execute(std::function); diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.cpp b/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.cpp index 084ea3a1..6c945fbb 100644 --- a/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.cpp +++ b/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.cpp @@ -8,15 +8,13 @@ **/ #include "net_flow/pipeline/pipeline_internal.hpp" #include "net_flow/pipeline/async_infer_runner.hpp" -#include "common/os_utils.hpp" -#include "common/runtime_statistics_internal.hpp" namespace hailort { PipelineElementInternal::PipelineElementInternal(const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : + std::shared_ptr> &&pipeline_status, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : PipelineElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction), m_async_pipeline(async_pipeline) {} @@ -33,64 +31,9 @@ void PipelineElementInternal::handle_non_recoverable_async_error(hailo_status er } } -SourceElement::SourceElement(const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : - PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline) -{ - m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE); -} - -PipelinePad &SourceElement::source() -{ - return m_sources[0]; -} - -std::vector SourceElement::execution_pads() -{ - std::vector result{&source()}; - return result; -} - -SinkElement::SinkElement(const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : - PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline) -{ - m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK); -} - -PipelinePad &SinkElement::sink() -{ - return m_sinks[0]; -} - -std::vector SinkElement::execution_pads() -{ - std::vector result{&sink()}; - return result; -} - -hailo_status SinkElement::execute_terminate(hailo_status /*error_status*/) -{ - return HAILO_SUCCESS; -} - -hailo_status SinkElement::execute_dequeue_user_buffers(hailo_status /*error_status*/) -{ - return HAILO_SUCCESS; -} - -hailo_status AsyncPushQueueElement::execute_dequeue_user_buffers(hailo_status error_status) -{ - auto status = m_queue.clear(); - CHECK_SUCCESS(PipelineElement::execute_dequeue_user_buffers(error_status)); - return status; -} - IntermediateElement::IntermediateElement(const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : + std::shared_ptr> &&pipeline_status, PipelineDirection pipeline_direction, + std::shared_ptr async_pipeline) : PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline) { m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK); @@ -103,1467 +46,4 @@ std::vector IntermediateElement::execution_pads() return result; } -FilterElement::FilterElement(const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, - PipelineDirection pipeline_direction, BufferPoolPtr buffer_pool, - std::chrono::milliseconds timeout, std::shared_ptr async_pipeline) : - IntermediateElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline), - m_pool(buffer_pool), - m_timeout(timeout) -{} - -hailo_status FilterElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/) -{ - auto output = action(std::move(buffer), PipelineBuffer()); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) { - return output.status(); - } - CHECK_EXPECTED_AS_STATUS(output); - - hailo_status status = next_pad().run_push(output.release()); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { - LOGGER__INFO("run_push of {} was shutdown!", name()); - return status; - } - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("run_push of {} was aborted!", name()); - return status; - } - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} - -void FilterElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/) -{ - assert(m_pipeline_direction == PipelineDirection::PUSH); - if (HAILO_SUCCESS != buffer.action_status()) { - auto buffer_from_pool = m_pool->get_available_buffer(PipelineBuffer(), m_timeout); - if (HAILO_SUCCESS != buffer_from_pool.status()) { - handle_non_recoverable_async_error(buffer_from_pool.status()); - } else { - buffer_from_pool->set_action_status(buffer.action_status()); - - auto exec_done_cb = buffer.get_exec_done_cb(); - exec_done_cb(buffer.action_status()); - - next_pad().run_push_async(buffer_from_pool.release()); - } - return; - } - - auto output = action(std::move(buffer), PipelineBuffer()); - if (HAILO_SUCCESS == output.status()) { - next_pad().run_push_async(output.release()); - } else { - next_pad().run_push_async(PipelineBuffer(output.status())); - } - return; -} - -Expected FilterElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/) -{ - auto buffer = next_pad().run_pull(); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { - LOGGER__INFO("run_pull in FilterElement was shutdown!"); - return make_unexpected(buffer.status()); - } - CHECK_EXPECTED(buffer); - return action(buffer.release(), std::move(optional)); -} - -std::vector FilterElement::get_queue_size_accumulators() -{ - if (nullptr == m_pool || nullptr == m_pool->get_queue_size_accumulator()) { - return std::vector(); - } - return {m_pool->get_queue_size_accumulator()}; -} - -hailo_status FilterElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) -{ - (void)source_name; - - auto status = m_pool->enqueue_buffer(mem_view, exec_done); - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} - -hailo_status FilterElement::execute_dequeue_user_buffers(hailo_status error_status) -{ - auto status = empty_buffer_pool(m_pool, error_status, m_timeout); - CHECK_SUCCESS(status); - return PipelineElement::execute_dequeue_user_buffers(error_status); -} - -Expected FilterElement::can_push_buffer_upstream(const uint32_t /*source_index*/) -{ - return !m_pool->is_full(); -} - -hailo_status FilterElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t /*source_index*/) -{ - auto status = m_pool->allocate_buffers(is_dma_able, num_of_buffers); - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} - -Expected FilterElement::can_push_buffer_upstream(const std::string &source_name) -{ - auto source_index = get_source_index_from_source_name(source_name); - CHECK_EXPECTED(source_index); - return can_push_buffer_upstream(*source_index); -} - -hailo_status FilterElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) -{ - auto source_index = get_source_index_from_source_name(source_name); - CHECK_EXPECTED_AS_STATUS(source_index); - return fill_buffer_pool(is_dma_able, num_of_buffers, *source_index); -} - -Expected> BaseQueueElement::create_queue(size_t queue_size, EventPtr shutdown_event) -{ - auto queue = SpscQueue::create(queue_size, shutdown_event); - CHECK_EXPECTED(queue); - - return queue.release(); -} - -BaseQueueElement::BaseQueueElement(SpscQueue &&queue, EventPtr shutdown_event, const std::string &name, - std::chrono::milliseconds timeout, DurationCollector &&duration_collector, - AccumulatorPtr &&queue_size_accumulator, std::shared_ptr> &&pipeline_status, - Event &&activation_event, Event &&deactivation_event, PipelineDirection pipeline_direction, - std::shared_ptr async_pipeline) : - IntermediateElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline), - m_queue(std::move(queue)), - m_shutdown_event(shutdown_event), - m_timeout(timeout), - m_is_thread_running(true), - m_activation_event(std::move(activation_event)), - m_deactivation_event(std::move(deactivation_event)), - m_queue_size_accumulator(std::move(queue_size_accumulator)), - m_is_run_in_thread_running(false) -{} - -BaseQueueElement::~BaseQueueElement() -{ - LOGGER__INFO("Queue element {} has {} frames in his Queue on destruction", name(), m_queue.size_approx()); -} - -void BaseQueueElement::start_thread() -{ - m_thread = std::thread([this] () { - OsUtils::set_current_thread_name(thread_name()); - while (m_is_thread_running.load()) { - auto status = m_activation_event.wait(INIFINITE_TIMEOUT()); - - if (!m_is_thread_running) { - LOGGER__INFO("Thread in element {} is not running anymore, exiting..", this->name()); - break; - } - if (HAILO_SUCCESS == status) { - { - std::unique_lock lock(m_mutex); - m_is_run_in_thread_running = true; - } - m_cv.notify_all(); - - status = run_in_thread(); - - { - std::unique_lock lock(m_mutex); - m_is_run_in_thread_running = false; - } - m_cv.notify_all(); - } - - if (HAILO_SUCCESS != status) { - if (HAILO_SHUTDOWN_EVENT_SIGNALED != status) { - // We do not want to log error for HAILO_STREAM_ABORTED_BY_USER - if (HAILO_STREAM_ABORTED_BY_USER != status) { - LOGGER__ERROR("Queue element {} run in thread function failed! status = {}", this->name(), status); - } - - // Store the real error in pipeline_status - m_pipeline_status->store(status); - - // Signal other threads to stop - hailo_status shutdown_status = m_shutdown_event->signal(); - if (HAILO_SUCCESS != shutdown_status) { - LOGGER__CRITICAL("Failed shutting down queue with status {}", shutdown_status); - } - } - //Thread has done its execution. Mark to the thread to wait for activation again - hailo_status event_status = m_activation_event.reset(); - if (HAILO_SUCCESS != event_status) { - LOGGER__CRITICAL("Failed reset activation event of element {}, with status {}", this->name(), event_status); - } - - // Mark to deactivation function that the thread is done - event_status = m_deactivation_event.signal(); - if (HAILO_SUCCESS != event_status) { - LOGGER__CRITICAL("Failed signaling deactivation event of element {}, with status {}", this->name(), event_status); - } - } - } - }); -} - -void BaseQueueElement::stop_thread() -{ - m_shutdown_event->signal(); - - // Mark thread as not running, then wake it in case it is waiting on m_activation_event - m_is_thread_running = false; - m_activation_event.signal(); - - if (m_thread.joinable()) { - m_thread.join(); - } -} - -std::vector BaseQueueElement::get_queue_size_accumulators() -{ - if (nullptr == m_queue_size_accumulator) { - return std::vector(); - } - return {m_queue_size_accumulator}; -} - -hailo_status BaseQueueElement::execute_activate() -{ - hailo_status status = PipelineElementInternal::execute_activate(); - CHECK_SUCCESS(status); - - status = m_activation_event.signal(); - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} - -hailo_status BaseQueueElement::execute_post_deactivate(bool should_clear_abort) -{ - hailo_status status = m_deactivation_event.wait(INIFINITE_TIMEOUT()); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to post_deactivate() in {} with status {}", name(), status); - } - - status = m_deactivation_event.reset(); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to reset of deactivation event in {} with status {}", name(), status); - } - - return PipelineElementInternal::execute_post_deactivate(should_clear_abort); -} - -hailo_status BaseQueueElement::execute_clear() -{ - auto status = PipelineElementInternal::execute_clear(); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to clear() in {} with status {}", name(), status); - } - - auto queue_status = m_queue.clear(); - CHECK_SUCCESS(queue_status, "Failed to clear() queue in {} with status {}", name(), status); - - return status; -} - -hailo_status BaseQueueElement::execute_wait_for_finish() -{ - std::unique_lock lock(m_mutex); - m_cv.wait(lock, [this] () { - return !m_is_run_in_thread_running; - }); - return HAILO_SUCCESS; -} - -hailo_status BaseQueueElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) -{ - (void)source_name; - return m_sinks[0].prev()->element().enqueue_execution_buffer(mem_view, exec_done, m_sinks[0].prev()->name()); -} - -Expected BaseQueueElement::can_push_buffer_upstream(const uint32_t source_index) -{ - return m_sinks[0].prev()->element().can_push_buffer_upstream(source_index); -} - -Expected BaseQueueElement::can_push_buffer_downstream(const uint32_t /*source_index*/) -{ - return !m_queue.is_queue_full(); -} - -hailo_status BaseQueueElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) -{ - return m_sinks[0].prev()->element().fill_buffer_pool(is_dma_able, num_of_buffers, source_index); -} - -Expected BaseQueueElement::can_push_buffer_upstream(const std::string &source_name) -{ - return m_sinks[0].prev()->element().can_push_buffer_upstream(source_name); -} - -Expected BaseQueueElement::can_push_buffer_downstream(const std::string &/*source_name*/) -{ - return !m_queue.is_queue_full(); -} - -hailo_status BaseQueueElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) -{ - return m_sinks[0].prev()->element().fill_buffer_pool(is_dma_able, num_of_buffers, source_name); -} - -hailo_status PushQueueElement::execute_abort() -{ - auto status = m_shutdown_event->reset(); - CHECK_SUCCESS(status); - - m_pipeline_status->store(HAILO_STREAM_ABORTED_BY_USER); - - status = PipelineElementInternal::execute_abort(); - CHECK_SUCCESS(status); - - status = m_activation_event.signal(); - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} - -hailo_status BaseQueueElement::execute_clear_abort() -{ - auto status = m_shutdown_event->reset(); - CHECK_SUCCESS(status); - - m_pipeline_status->store(HAILO_SUCCESS); - return PipelineElementInternal::execute_clear_abort(); -} - -hailo_status BaseQueueElement::set_timeout(std::chrono::milliseconds timeout) -{ - m_timeout = timeout; - return HAILO_SUCCESS; -} - -std::string BaseQueueElement::description() const -{ - std::stringstream element_description; - - element_description << "(" << this->name(); - if (HAILO_INFINITE != this->m_timeout.count()) { - element_description << " | timeout: " << std::chrono::duration_cast(this->m_timeout).count() << "s"; - } - element_description << ")"; - - return element_description.str(); -} - -hailo_status BaseQueueElement::pipeline_status() -{ - auto status = m_pipeline_status->load(); - - // We treat HAILO_STREAM_ABORTED_BY_USER as success because it is caused by user action (aborting streams) - if (HAILO_STREAM_ABORTED_BY_USER == status) { - return HAILO_SUCCESS; - } - return status; -} - -Expected> PushQueueElement::create(const std::string &name, std::chrono::milliseconds timeout, - size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event, - std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction, - std::shared_ptr async_pipeline) -{ - auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event); - CHECK_EXPECTED(queue); - - auto activation_event = Event::create(Event::State::not_signalled); - CHECK_EXPECTED(activation_event); - - auto deactivation_event = Event::create(Event::State::not_signalled); - CHECK_EXPECTED(deactivation_event); - - // TODO: Support fps/latency collection for queue elems (HRT-7711) - auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE); - CHECK_EXPECTED(duration_collector); - - AccumulatorPtr queue_size_accumulator = nullptr; - if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) { - queue_size_accumulator = make_shared_nothrow>("queue_size"); - CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY); - } - - auto queue_ptr = make_shared_nothrow(queue.release(), shutdown_event, name, timeout, - duration_collector.release(), std::move(queue_size_accumulator), std::move(pipeline_status), - activation_event.release(), deactivation_event.release(), pipeline_direction, async_pipeline); - CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PushQueueElement {} failed!", name); - - LOGGER__INFO("Created {}", queue_ptr->name()); - - return queue_ptr; -} - -Expected> PushQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params, - EventPtr shutdown_event, std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction, - std::shared_ptr async_pipeline) -{ - return PushQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms), vstream_params.queue_size, - vstream_params.pipeline_elements_stats_flags, shutdown_event, pipeline_status, pipeline_direction, async_pipeline); -} - -PushQueueElement::PushQueueElement(SpscQueue &&queue, EventPtr shutdown_event, const std::string &name, - std::chrono::milliseconds timeout, DurationCollector &&duration_collector, - AccumulatorPtr &&queue_size_accumulator, std::shared_ptr> &&pipeline_status, - Event &&activation_event, Event &&deactivation_event, PipelineDirection pipeline_direction, - std::shared_ptr async_pipeline, bool should_start_thread) : - BaseQueueElement(std::move(queue), shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator), - std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), pipeline_direction, async_pipeline) -{ - if (should_start_thread) { - start_thread(); - } -} - -PushQueueElement::~PushQueueElement() -{ - stop_thread(); -} - -hailo_status PushQueueElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/) -{ - // TODO: Support fps/latency collection for queue elems (HRT-7711) - if (nullptr != m_queue_size_accumulator) { - m_queue_size_accumulator->add_data_point(static_cast(m_queue.size_approx())); - } - auto status = m_pipeline_status->load(); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("run_push of {} was aborted!", name()); - return status; - } - CHECK_SUCCESS(m_pipeline_status->load()); - status = m_queue.enqueue(std::move(buffer), m_timeout); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { - auto queue_thread_status = pipeline_status(); - CHECK_SUCCESS(queue_thread_status, - "Shutdown event was signaled in enqueue of queue element {} because thread has failed with status={}!", name(), - queue_thread_status); - LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name()); - return HAILO_SHUTDOWN_EVENT_SIGNALED; - } - CHECK_SUCCESS(status); - return HAILO_SUCCESS; -} - -void PushQueueElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) { - LOGGER__ERROR("run_push_async is not supported for {}", name()); - assert(false); -} - -Expected PushQueueElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/) -{ - return make_unexpected(HAILO_INVALID_OPERATION); -} - -hailo_status PushQueueElement::execute_deactivate() -{ - // Mark to the threads that deactivate() was called. - hailo_status status = m_queue.enqueue(PipelineBuffer(PipelineBuffer::Type::DEACTIVATE)); - if (HAILO_SUCCESS != status) { - // We want to deactivate source even if enqueue failed - auto deactivation_status = PipelineElementInternal::execute_deactivate(); - CHECK_SUCCESS(deactivation_status); - if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_SHUTDOWN_EVENT_SIGNALED == status)) { - LOGGER__INFO("enqueue() in element {} was aborted, got status = {}", name(), status); - } - else { - LOGGER__ERROR("enqueue() in element {} failed, got status = {}", name(), status); - return status; - } - } - - return HAILO_SUCCESS; -} - -PipelinePad &PushQueueElement::next_pad() -{ - // Note: The next elem to be run is downstream from this elem (i.e. buffers are pushed) - return *m_sources[0].next(); -} - -hailo_status PushQueueElement::run_in_thread() -{ - auto buffer = m_queue.dequeue(INIFINITE_TIMEOUT()); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { - LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name()); - return HAILO_SHUTDOWN_EVENT_SIGNALED; - } - CHECK_EXPECTED_AS_STATUS(buffer); - - // Return if deactivated - if (PipelineBuffer::Type::DEACTIVATE == buffer->get_type()) { - hailo_status status = m_shutdown_event->signal(); - CHECK_SUCCESS(status); - - status = next_pad().deactivate(); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Deactivate of source in {} has failed with status {}", name(), status); - } - - return HAILO_SHUTDOWN_EVENT_SIGNALED; - } - - hailo_status status = next_pad().run_push(buffer.release()); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("run_push of {} was aborted!", name()); - return status; - } - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} - -Expected> AsyncPushQueueElement::create(const std::string &name, std::chrono::milliseconds timeout, - size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event, std::shared_ptr> pipeline_status, - std::shared_ptr async_pipeline, PipelineDirection pipeline_direction) -{ - auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event); - CHECK_EXPECTED(queue); - - auto activation_event = Event::create(Event::State::not_signalled); - CHECK_EXPECTED(activation_event); - - auto deactivation_event = Event::create(Event::State::not_signalled); - CHECK_EXPECTED(deactivation_event); - - // TODO: Support fps/latency collection for queue elems (HRT-7711) - auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE); - CHECK_EXPECTED(duration_collector); - - AccumulatorPtr queue_size_accumulator = nullptr; - if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) { - queue_size_accumulator = make_shared_nothrow>("queue_size"); - CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY); - } - - auto queue_ptr = make_shared_nothrow(queue.release(), shutdown_event, name, timeout, - duration_collector.release(), std::move(queue_size_accumulator), std::move(pipeline_status), - activation_event.release(), deactivation_event.release(), pipeline_direction, async_pipeline); - CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PushQueueElement {} failed!", name); - - LOGGER__INFO("Created {}", queue_ptr->name()); - - return queue_ptr; -} - -Expected> AsyncPushQueueElement::create(const std::string &name, const ElementBuildParams &build_params, - std::shared_ptr async_pipeline, PipelineDirection pipeline_direction) -{ - return AsyncPushQueueElement::create(name, build_params.timeout, build_params.buffer_pool_size_edges, - build_params.elem_stats_flags, build_params.shutdown_event, build_params.pipeline_status, async_pipeline, pipeline_direction); -} - -AsyncPushQueueElement::AsyncPushQueueElement(SpscQueue &&queue, EventPtr shutdown_event, const std::string &name, - std::chrono::milliseconds timeout, DurationCollector &&duration_collector, - AccumulatorPtr &&queue_size_accumulator, std::shared_ptr> &&pipeline_status, - Event &&activation_event, Event &&deactivation_event, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : - PushQueueElement(std::move(queue), shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator), - std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), pipeline_direction, async_pipeline, false) -{ - start_thread(); -} - -void AsyncPushQueueElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/) -{ - // TODO: Support fps/latency collection for queue elems (HRT-7711) - if (nullptr != m_queue_size_accumulator) { - m_queue_size_accumulator->add_data_point(static_cast(m_queue.size_approx())); - } - - auto status = m_queue.enqueue(std::move(buffer), m_timeout); - if (HAILO_SUCCESS != status && HAILO_SHUTDOWN_EVENT_SIGNALED != status) { - handle_non_recoverable_async_error(status); - stop_thread(); - } -} - -void AsyncPushQueueElement::start_thread() -{ - m_thread = std::thread([this] () { - OsUtils::set_current_thread_name(thread_name()); - while (m_is_thread_running.load()) { - auto status = m_pipeline_status->load(); - if (HAILO_SUCCESS != status) { - LOGGER__INFO("Thread in element {} is not running anymore, exiting..", name()); - m_is_thread_running = false; - break; - } - - status = run_in_thread(); - if (HAILO_SUCCESS != status) { - handle_non_recoverable_async_error(status); - m_is_thread_running = false; - break; - } - } - }); -} - -hailo_status AsyncPushQueueElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) -{ - return HAILO_INVALID_OPERATION; -} - -hailo_status AsyncPushQueueElement::run_in_thread() -{ - auto buffer = m_queue.dequeue(INIFINITE_TIMEOUT()); - auto buffer_status = buffer.status(); - switch (buffer_status) { - case HAILO_SHUTDOWN_EVENT_SIGNALED: - break; - - case HAILO_SUCCESS: - // Return if deactivated - if (PipelineBuffer::Type::DEACTIVATE == buffer->get_type()) { - hailo_status status = m_shutdown_event->signal(); - CHECK_SUCCESS(status); - - status = next_pad().deactivate(); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Deactivate of source in {} has failed with status {}", name(), status); - } - - return HAILO_SHUTDOWN_EVENT_SIGNALED; - } - - next_pad().run_push_async(buffer.release()); - break; - - default: - next_pad().run_push_async(PipelineBuffer(buffer_status)); - } - - return buffer_status; -} - -hailo_status AsyncPushQueueElement::execute_deactivate() -{ - // Mark to the threads that deactivate() was called. - hailo_status status = m_queue.enqueue(PipelineBuffer(PipelineBuffer::Type::DEACTIVATE)); - if (HAILO_SUCCESS != status) { - // We want to deactivate source even if enqueue failed - auto deactivation_status = PipelineElementInternal::execute_deactivate(); - CHECK_SUCCESS(deactivation_status); - if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_SHUTDOWN_EVENT_SIGNALED == status)) { - LOGGER__INFO("enqueue() in element {} was aborted, got status = {}", name(), status); - } else { - LOGGER__ERROR("enqueue() in element {} failed, got status = {}", name(), status); - return status; - } - } - - return HAILO_SUCCESS; -} - -hailo_status AsyncPushQueueElement::execute_post_deactivate(bool should_clear_abort) -{ - // We marked thread to stop with PipelineBuffer::Type::DEACTIVATE, now we wait for it to finish - stop_thread(); - return PipelineElementInternal::execute_post_deactivate(should_clear_abort); -} - -hailo_status AsyncPushQueueElement::execute_terminate(hailo_status error_status) -{ - if (m_is_terminated) { - return HAILO_SUCCESS; - } - - auto terminate_status = PipelineElement::execute_terminate(error_status); - - if ((!next_pad().element().is_terminating_element())) { - stop_thread(); - } - - CHECK_SUCCESS(terminate_status); - - return HAILO_SUCCESS; -} - -Expected> PullQueueElement::create(const std::string &name, std::chrono::milliseconds timeout, - size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event, - std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction) -{ - auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event); - CHECK_EXPECTED(queue); - - auto activation_event = Event::create(Event::State::not_signalled); - CHECK_EXPECTED(activation_event); - - auto deactivation_event = Event::create(Event::State::not_signalled); - CHECK_EXPECTED(deactivation_event); - - // TODO: Support fps/latency collection for queue elems (HRT-7711) - auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE); - CHECK_EXPECTED(duration_collector); - - AccumulatorPtr queue_size_accumulator = nullptr; - if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) { - queue_size_accumulator = make_shared_nothrow>("queue_size"); - CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY); - } - - auto queue_ptr = make_shared_nothrow(queue.release(), shutdown_event, name, timeout, - duration_collector.release(), std::move(queue_size_accumulator), std::move(pipeline_status), - activation_event.release(), deactivation_event.release(), pipeline_direction); - CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PullQueueElement {} failed!", name); - - LOGGER__INFO("Created {}", queue_ptr->name()); - - return queue_ptr; -} -Expected> PullQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params, - EventPtr shutdown_event, std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction) -{ - return PullQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms), - vstream_params.queue_size, vstream_params.pipeline_elements_stats_flags, shutdown_event, pipeline_status, pipeline_direction); -} - -PullQueueElement::PullQueueElement(SpscQueue &&queue, EventPtr shutdown_event, const std::string &name, - std::chrono::milliseconds timeout, DurationCollector &&duration_collector, - AccumulatorPtr &&queue_size_accumulator, std::shared_ptr> &&pipeline_status, - Event &&activation_event, Event &&deactivation_event, PipelineDirection pipeline_direction) : - BaseQueueElement(std::move(queue), shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator), - std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), pipeline_direction, nullptr) -{ - start_thread(); -} - -PullQueueElement::~PullQueueElement() -{ - stop_thread(); -} - -hailo_status PullQueueElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) -{ - return HAILO_INVALID_OPERATION; -} - -void PullQueueElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) -{ - LOGGER__ERROR("run_push_async is not supported for {}", name()); - assert(false); -} - -Expected PullQueueElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*sink*/) -{ - // TODO: Support fps/latency collection for queue elems (HRT-7711) - CHECK_AS_EXPECTED(!optional, HAILO_INVALID_ARGUMENT, "Optional buffer is not allowed in queue element!"); - - if (nullptr != m_queue_size_accumulator) { - m_queue_size_accumulator->add_data_point(static_cast(m_queue.size_approx())); - } - auto output = m_queue.dequeue(m_timeout); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) { - auto queue_thread_status = pipeline_status(); - CHECK_SUCCESS_AS_EXPECTED(queue_thread_status, - "Shutdown event was signaled in dequeue of queue element {} because thread has failed with status={}!", name(), - queue_thread_status); - LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name()); - return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); - } - CHECK_EXPECTED(output); - - return output; -} - -hailo_status PullQueueElement::execute_deactivate() -{ - hailo_status status = PipelineElementInternal::execute_deactivate(); - auto shutdown_event_status = m_shutdown_event->signal(); - CHECK_SUCCESS(status); - CHECK_SUCCESS(shutdown_event_status); - - return HAILO_SUCCESS; -} - -PipelinePad &PullQueueElement::next_pad() -{ - // Note: The next elem to be run is upstream from this elem (i.e. buffers are pulled) - return *m_sinks[0].prev(); -} - -hailo_status PullQueueElement::run_in_thread() -{ - auto buffer = next_pad().run_pull(); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { - LOGGER__INFO("Shutdown event was signaled in run_pull of queue element {}!", name()); - return HAILO_SHUTDOWN_EVENT_SIGNALED; - } - if (HAILO_STREAM_ABORTED_BY_USER == buffer.status()) { - LOGGER__INFO("run_pull of queue element {} was aborted!", name()); - return HAILO_STREAM_ABORTED_BY_USER; - } - if (HAILO_NETWORK_GROUP_NOT_ACTIVATED == buffer.status()) { - LOGGER__INFO("run_pull of queue element {} was called before network_group is activated!", name()); - return HAILO_NETWORK_GROUP_NOT_ACTIVATED; - } - CHECK_EXPECTED_AS_STATUS(buffer); - - hailo_status status = m_queue.enqueue(buffer.release(), INIFINITE_TIMEOUT()); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { - LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name()); - return HAILO_SHUTDOWN_EVENT_SIGNALED; - } - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} - -Expected> UserBufferQueueElement::create(const std::string &name, std::chrono::milliseconds timeout, - hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction) -{ - auto pending_buffer_queue = BaseQueueElement::create_queue(1, shutdown_event); - CHECK_EXPECTED(pending_buffer_queue); - - auto full_buffer_queue = BaseQueueElement::create_queue(1, shutdown_event); - CHECK_EXPECTED(full_buffer_queue); - - auto activation_event = Event::create(Event::State::not_signalled); - CHECK_EXPECTED(activation_event); - - auto deactivation_event = Event::create(Event::State::not_signalled); - CHECK_EXPECTED(deactivation_event); - - // TODO: Support fps/latency collection for queue elems (HRT-7711) - auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE); - CHECK_EXPECTED(duration_collector); - - AccumulatorPtr queue_size_accumulator = nullptr; - if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) { - queue_size_accumulator = make_shared_nothrow>("queue_size"); - CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY); - } - - auto queue_ptr = make_shared_nothrow(pending_buffer_queue.release(), - full_buffer_queue.release(), shutdown_event, name, timeout, duration_collector.release(), - std::move(queue_size_accumulator), std::move(pipeline_status), activation_event.release(), - deactivation_event.release(), pipeline_direction); - CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating UserBufferQueueElement {} failed!", name); - - LOGGER__INFO("Created {}", queue_ptr->name()); - - return queue_ptr; -} - -Expected> UserBufferQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params, - EventPtr shutdown_event, std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction) -{ - return UserBufferQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms), - vstream_params.pipeline_elements_stats_flags, shutdown_event, pipeline_status, pipeline_direction); -} - -UserBufferQueueElement::UserBufferQueueElement(SpscQueue &&queue, SpscQueue &&full_buffer_queue, - EventPtr shutdown_event, const std::string &name, std::chrono::milliseconds timeout, - DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator, - std::shared_ptr> &&pipeline_status, - Event &&activation_event, Event &&deactivation_event, - PipelineDirection pipeline_direction) : - PullQueueElement(std::move(queue), shutdown_event, name, timeout, std::move(duration_collector), - std::move(queue_size_accumulator), std::move(pipeline_status), std::move(activation_event), - std::move(deactivation_event), - pipeline_direction), - m_full_buffer_queue(std::move(full_buffer_queue)) -{} - -Expected UserBufferQueueElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/) -{ - // TODO: Support fps/latency collection for queue elems (HRT-7711) - CHECK_AS_EXPECTED(optional, HAILO_INVALID_ARGUMENT, "Optional buffer must be valid in {}!", name()); - - hailo_status status = m_queue.enqueue(std::move(optional), m_timeout); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { - LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name()); - return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); - } - CHECK_SUCCESS_AS_EXPECTED(status); - - if (nullptr != m_queue_size_accumulator) { - m_queue_size_accumulator->add_data_point(static_cast(m_full_buffer_queue.size_approx())); - } - auto output = m_full_buffer_queue.dequeue(m_timeout); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) { - LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name()); - return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); - } - CHECK_AS_EXPECTED(HAILO_TIMEOUT != output.status(), HAILO_TIMEOUT, "{} (D2H) failed with status={} (timeout={}ms)", - name(), HAILO_TIMEOUT, m_timeout.count()); - CHECK_EXPECTED(output); - - CHECK_AS_EXPECTED(output->data() == optional.data(), HAILO_INTERNAL_FAILURE, "The buffer received in {} was not the same as the user buffer!", name()); - return output; -} - -hailo_status UserBufferQueueElement::execute_clear() -{ - auto status = PipelineElementInternal::execute_clear(); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to clear() in {} with status {}", name(), status); - } - - auto queue_clear_status = m_full_buffer_queue.clear(); - if (HAILO_SUCCESS != queue_clear_status) { - LOGGER__ERROR("Failed to clear() in {} with status {}", name(), queue_clear_status); - status = queue_clear_status; - } - - queue_clear_status = m_queue.clear(); - if (HAILO_SUCCESS != queue_clear_status) { - LOGGER__ERROR("Failed to clear() in {} with status {}", name(), queue_clear_status); - status = queue_clear_status; - } - - return status; -} - -hailo_status UserBufferQueueElement::run_in_thread() -{ - auto optional = m_queue.dequeue(INIFINITE_TIMEOUT()); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == optional.status()) { - LOGGER__INFO("Shutdown event was signaled in dequeue of {}!", name()); - return HAILO_SHUTDOWN_EVENT_SIGNALED; - } - CHECK_EXPECTED_AS_STATUS(optional); - - auto buffer = next_pad().run_pull(optional.release()); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { - LOGGER__INFO("Shutdown event was signaled in run_pull of {}!", name()); - return HAILO_SHUTDOWN_EVENT_SIGNALED; - } - if (HAILO_STREAM_ABORTED_BY_USER == buffer.status()) { - LOGGER__INFO("run_pull of {} was aborted!", name()); - return HAILO_STREAM_ABORTED_BY_USER; - } - CHECK_EXPECTED_AS_STATUS(buffer); - - hailo_status status = m_full_buffer_queue.enqueue(buffer.release(), INIFINITE_TIMEOUT()); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { - LOGGER__INFO("Shutdown event was signaled in enqueue of {}!", name()); - return HAILO_SHUTDOWN_EVENT_SIGNALED; - } - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} - -BaseMuxElement::BaseMuxElement(size_t sink_count, const std::string &name, std::chrono::milliseconds timeout, - DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, - BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : - PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline), - m_timeout(timeout), - m_pool(buffer_pool) -{ - m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE); - m_sinks.reserve(sink_count); - m_sink_has_arrived.reserve(sink_count); - for (uint32_t i = 0; i < sink_count; ++i) { - m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK); - m_index_of_sink[m_sinks[i].name()] = i; - m_sink_has_arrived[m_sinks[i].name()] = false; - } -} - -std::vector BaseMuxElement::execution_pads() -{ - if (m_next_pads.size() == 0) { - if (PipelineDirection::PUSH == m_pipeline_direction) { - m_next_pads.reserve(m_sources.size()); - for (auto &source : m_sources ) { - m_next_pads.push_back(source.next()); - } - } else { - m_next_pads.reserve(m_sinks.size()); - for (auto &sink : m_sinks ) { - m_next_pads.push_back(sink.prev()); - } - } - } - return m_next_pads; -} - -hailo_status BaseMuxElement::execute_terminate(hailo_status error_status) -{ - if (m_is_terminated) { - return HAILO_SUCCESS; - } - - auto terminate_status = PipelineElement::execute_terminate(error_status); - - if (!m_is_terminating_element) { - { - // There is a case where the other thread is halted (via context switch) before the wait_for() function, - // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout. - // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here. - std::unique_lock lock(m_mutex); - } - m_cv.notify_all(); - } - - CHECK_SUCCESS(terminate_status); - - return HAILO_SUCCESS; -} - - -hailo_status BaseMuxElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) -{ - return HAILO_INVALID_OPERATION; -} - -void BaseMuxElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) -{ - assert(PipelineDirection::PUSH == m_pipeline_direction); - assert(m_next_pads.size() == 1); - - std::unique_lock lock(m_mutex); - - m_sink_has_arrived[sink.name()] = true; - m_input_buffers[sink.name()] = std::move(buffer); - if (has_all_sinks_arrived()) { - hailo_status all_buffers_status = HAILO_SUCCESS; - for (auto &input_buffer : m_input_buffers) { - if (HAILO_SUCCESS != input_buffer.second.action_status()) { - all_buffers_status = input_buffer.second.action_status(); - break; // error from one buffer is enough - } - } - - if (HAILO_SUCCESS != all_buffers_status) { - auto acquired_buffer = m_pool->get_available_buffer(PipelineBuffer(), m_timeout); - if (HAILO_SUCCESS == acquired_buffer.status()) { - acquired_buffer->set_action_status(all_buffers_status); - - auto exec_done_cb = m_input_buffers[sink.name()].get_exec_done_cb(); - exec_done_cb(m_input_buffers[sink.name()].action_status()); - - m_next_pads[0]->run_push_async(acquired_buffer.release()); - } else { - handle_non_recoverable_async_error(acquired_buffer.status()); - } - } else { - std::vector input_buffers; - input_buffers.resize(m_input_buffers.size()); - for (auto &input_buffer : m_input_buffers) { - input_buffers[m_index_of_sink[input_buffer.first]] = std::move(input_buffer.second); - } - - auto output = action(std::move(input_buffers), PipelineBuffer()); - if (HAILO_SUCCESS == output.status()) { - m_next_pads[0]->run_push_async(output.release()); - } else { - m_next_pads[0]->run_push_async(PipelineBuffer(output.status())); - } - } - - for (const auto &curr_sink : m_sinks) { - m_sink_has_arrived[curr_sink.name()] = false; - } - m_input_buffers.clear(); - - // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again - lock.unlock(); - m_cv.notify_all(); - } else { - auto done = m_cv.wait_for(lock, m_timeout, [&](){ - if (m_pipeline_status->load() != HAILO_SUCCESS) { - return true; // so we can exit this flow - } - return !m_sink_has_arrived[sink.name()]; - }); - - if (!done) { - LOGGER__ERROR("Waiting for other threads in AsyncHwElement {} has reached a timeout (timeout={}ms)", name(), m_timeout.count()); - handle_non_recoverable_async_error(HAILO_TIMEOUT); - } - - if (m_pipeline_status->load() == HAILO_STREAM_ABORTED_BY_USER) { - lock.unlock(); - m_cv.notify_all(); - } - } -} - -bool BaseMuxElement::has_all_sinks_arrived() -{ - for (const auto ¤t_sink : m_sink_has_arrived) { - if (!current_sink.second) { - return false; - } - } - return true; -} -Expected BaseMuxElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/) -{ - CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION, - "PostInferElement {} does not support run_pull operation", name()); - std::vector inputs; - inputs.reserve(m_sinks.size()); - for (auto &sink : m_sinks) { - auto buffer = sink.prev()->run_pull(); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { - return make_unexpected(buffer.status()); - } - CHECK_EXPECTED(buffer); - - inputs.push_back(buffer.release()); - } - - auto output = action(std::move(inputs), std::move(optional)); - CHECK_EXPECTED(output); - - return output; -} - -hailo_status BaseMuxElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) -{ - (void)source_name; - auto status = m_pool->enqueue_buffer(mem_view, exec_done); - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} - -hailo_status BaseMuxElement::execute_dequeue_user_buffers(hailo_status error_status) -{ - auto status = empty_buffer_pool(m_pool, error_status, m_timeout); - CHECK_SUCCESS(status); - return PipelineElement::execute_dequeue_user_buffers(error_status); -} - -Expected BaseMuxElement::can_push_buffer_upstream(const uint32_t /*source_index*/) -{ - return !m_pool->is_full(); -} - -hailo_status BaseMuxElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t /*source_index*/) -{ - auto status = m_pool->allocate_buffers(is_dma_able, num_of_buffers); - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} - -Expected BaseMuxElement::can_push_buffer_upstream(const std::string &source_name) -{ - auto source_index = get_source_index_from_source_name(source_name); - CHECK_EXPECTED(source_index); - return can_push_buffer_upstream(*source_index); -} - -hailo_status BaseMuxElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) -{ - auto source_index = get_source_index_from_source_name(source_name); - CHECK_EXPECTED_AS_STATUS(source_index); - return fill_buffer_pool(is_dma_able, num_of_buffers, *source_index); -} - -BaseDemuxElement::BaseDemuxElement(size_t source_count, const std::string &name, std::chrono::milliseconds timeout, - DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, - std::vector pools, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : - PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline), - m_timeout(timeout), - m_pools(pools), - m_is_activated(false), - m_was_stream_aborted(false), - m_source_name_to_index(), - m_was_source_called(source_count, false), - m_buffers_for_action() -{ - m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK); - m_sources.reserve(source_count); - for (uint32_t i = 0; i < source_count; i++) { - m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE); - m_source_name_to_index[m_sources[i].name()] = i; - } -} - -hailo_status BaseDemuxElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/) -{ - CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION, - "BaseDemuxElement {} does not support run_push operation", name()); - - auto outputs = action(std::move(buffer)); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == outputs.status()) { - return outputs.status(); - } - CHECK_EXPECTED_AS_STATUS(outputs); - - for (const auto &pad : execution_pads()) { - assert(m_source_name_to_index.count(pad->prev()->name()) > 0); - auto source_index = m_source_name_to_index[pad->prev()->name()]; - auto status = pad->run_push(std::move(outputs.value()[source_index])); - - if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { - LOGGER__INFO("run_push of {} was shutdown!", name()); - return status; - } - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("run_push of {} was aborted!", name()); - return status; - } - CHECK_SUCCESS(status); - } - - return HAILO_SUCCESS; -} - -void BaseDemuxElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/) -{ - assert(PipelineDirection::PUSH == m_pipeline_direction); - if (HAILO_SUCCESS != buffer.action_status()) { - for (const auto &pad : execution_pads()) { - auto source_index = m_source_name_to_index[pad->prev()->name()]; - auto acquired_buffer = m_pools[source_index]->acquire_buffer(m_timeout); - if (HAILO_SUCCESS == acquired_buffer.status()) { - acquired_buffer->set_action_status(buffer.action_status()); - - auto exec_done_cb = buffer.get_exec_done_cb(); - exec_done_cb(buffer.action_status()); - - pad->run_push_async(acquired_buffer.release()); - } else { - handle_non_recoverable_async_error(acquired_buffer.status()); - } - } - return; - } - - auto outputs = action(std::move(buffer)); - - for (const auto &pad : execution_pads()) { - assert(m_source_name_to_index.count(pad->prev()->name()) > 0); - auto source_index = m_source_name_to_index[pad->prev()->name()]; - if (HAILO_SUCCESS == outputs.status()) { - pad->run_push_async(std::move(outputs.value()[source_index])); - } else { - pad->run_push_async(PipelineBuffer(outputs.status())); - } - } -} - -Expected BaseDemuxElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source) -{ - CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION, - "BaseDemuxElement {} does not support run_pull operation", name()); - - CHECK_AS_EXPECTED(!optional, HAILO_INVALID_ARGUMENT, "Optional buffer is not allowed in demux element!"); - - std::unique_lock lock(m_mutex); - if (!m_is_activated) { - return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); - } - - if (m_was_stream_aborted) { - return make_unexpected(HAILO_STREAM_ABORTED_BY_USER); - } - - m_was_source_called[m_source_name_to_index[source.name()]] = true; - - if (were_all_srcs_arrived()) { - // If all srcs arrived, execute the demux - auto input = execution_pads()[0]->run_pull(); - if (HAILO_STREAM_ABORTED_BY_USER == input.status()) { - LOGGER__INFO("run_pull of demux element was aborted!"); - m_was_stream_aborted = true; - lock.unlock(); - m_cv.notify_all(); - return make_unexpected(input.status()); - } - if (HAILO_SHUTDOWN_EVENT_SIGNALED == input.status()) { - return make_unexpected(input.status()); - } - CHECK_EXPECTED(input); - - auto outputs = action(input.release()); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == outputs.status()) { - return make_unexpected(outputs.status()); - } - CHECK_EXPECTED(outputs); - - m_buffers_for_action = outputs.release(); - - for (uint32_t i = 0; i < m_was_source_called.size(); i++) { - m_was_source_called[i] = false; - } - - // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again - lock.unlock(); - m_cv.notify_all(); - } else { - // If not all srcs arrived, wait until m_was_source_called is false (set to false after the demux execution) - auto wait_successful = m_cv.wait_for(lock, m_timeout, [&](){ - return !m_was_source_called[m_source_name_to_index[source.name()]] || m_was_stream_aborted || !m_is_activated; - }); - CHECK_AS_EXPECTED(wait_successful, HAILO_TIMEOUT, "Waiting for other threads in demux {} has reached a timeout (timeout={}ms)", name(), m_timeout.count()); - - if (m_was_stream_aborted) { - lock.unlock(); - m_cv.notify_all(); - return make_unexpected(HAILO_STREAM_ABORTED_BY_USER); - } - - // We check if the element is not activated in case notify_all() was called from deactivate() - if (!m_is_activated) { - lock.unlock(); - m_cv.notify_all(); - return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); - } - } - - assert(m_source_name_to_index[source.name()] < m_buffers_for_action.size()); - return std::move(m_buffers_for_action[m_source_name_to_index[source.name()]]); -} - -bool BaseDemuxElement::were_all_srcs_arrived() -{ - return std::all_of(m_was_source_called.begin(), m_was_source_called.end(), [](bool v) { return v; }); -} - -hailo_status BaseDemuxElement::execute_activate() -{ - if (m_is_activated) { - return HAILO_SUCCESS; - } - m_is_activated = true;// TODO Should this always be true, no matter the status of source().activate()? - m_was_stream_aborted = false; - - return PipelineElementInternal::execute_activate(); -} - -hailo_status BaseDemuxElement::execute_deactivate() -{ - if (!m_is_activated) { - return HAILO_SUCCESS; - } - m_is_activated = false; - - // deactivate should be called before mutex acquire and notify_all because it is possible that all queues are waiting on - // the run_pull of the source (HwRead) and the mutex is already acquired so this would prevent a timeout error - hailo_status status = PipelineElementInternal::execute_deactivate(); - - { - // There is a case where the other thread is halted (via context switch) before the wait_for() function, - // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout. - // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here. - std::unique_lock lock(m_mutex); - } - m_cv.notify_all(); - - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} - -hailo_status BaseDemuxElement::execute_post_deactivate(bool should_clear_abort) -{ - for (uint32_t i = 0; i < m_was_source_called.size(); i++) { - m_was_source_called[i] = false; - } - return PipelineElementInternal::execute_post_deactivate(should_clear_abort); -} - -hailo_status BaseDemuxElement::execute_abort() -{ - auto status = PipelineElementInternal::execute_abort(); - CHECK_SUCCESS(status); - { - // There is a case where the other thread is halted (via context switch) before the wait_for() function, - // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout. - // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here. - std::unique_lock lock(m_mutex); - } - m_cv.notify_all(); - - return HAILO_SUCCESS; -} - -hailo_status BaseDemuxElement::set_timeout(std::chrono::milliseconds timeout) -{ - m_timeout = timeout; - return HAILO_SUCCESS; -} - -hailo_status BaseDemuxElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) -{ - auto pool_id = m_source_name_to_index.at(source_name); - auto status = m_pools[pool_id]->enqueue_buffer(mem_view, exec_done); - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} - -hailo_status BaseDemuxElement::execute_dequeue_user_buffers(hailo_status error_status) -{ - for (auto &pool : m_pools) { - auto status = empty_buffer_pool(pool, error_status, m_timeout); - CHECK_SUCCESS(status); - } - return PipelineElement::execute_dequeue_user_buffers(error_status);; -} - -Expected BaseDemuxElement::can_push_buffer_upstream(const uint32_t source_index) -{ - CHECK_AS_EXPECTED(source_index < m_pools.size(), HAILO_INTERNAL_FAILURE); - return !m_pools[source_index]->is_full(); -} - -hailo_status BaseDemuxElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) -{ - CHECK(source_index < m_pools.size(), HAILO_INTERNAL_FAILURE); - CHECK_SUCCESS(m_pools[source_index]->allocate_buffers(is_dma_able, num_of_buffers)); - return HAILO_SUCCESS; -} - -Expected BaseDemuxElement::can_push_buffer_upstream(const std::string &source_name) -{ - auto source_index = get_source_index_from_source_name(source_name); - CHECK_EXPECTED(source_index); - return can_push_buffer_upstream(*source_index); -} - -hailo_status BaseDemuxElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) -{ - auto source_index = get_source_index_from_source_name(source_name); - CHECK_EXPECTED_AS_STATUS(source_index); - return fill_buffer_pool(is_dma_able, num_of_buffers, *source_index); -} - -Expected BaseDemuxElement::get_source_index_from_source_name(const std::string &source_name) -{ - CHECK_AS_EXPECTED(contains(m_source_name_to_index, source_name), HAILO_NOT_FOUND); - auto ret_val = m_source_name_to_index.at(source_name); - return ret_val; -} - -std::vector BaseDemuxElement::execution_pads() -{ - if (m_next_pads.size() == 0) - { - if (PipelineDirection::PUSH == m_pipeline_direction) { - m_next_pads.reserve(m_sources.size()); - for (auto &source : m_sources ) { - m_next_pads.push_back(source.next()); - } - } else { - m_next_pads.reserve(m_sinks.size()); - for (auto &sink : m_sinks ) { - m_next_pads.push_back(sink.prev()); - } - } - } - return m_next_pads; -} - } /* namespace hailort */ diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.hpp b/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.hpp index c699d74d..c904a59d 100644 --- a/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.hpp +++ b/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.hpp @@ -12,6 +12,8 @@ #include "net_flow/pipeline/pipeline.hpp" +#include "common/barrier.hpp" + namespace hailort { @@ -38,40 +40,12 @@ class PipelineElementInternal : public PipelineElement protected: void handle_non_recoverable_async_error(hailo_status error_status); + std::weak_ptr m_async_pipeline; friend class PipelinePad; }; - -// An element with one source pad only (generates data) -class SourceElement : public PipelineElementInternal -{ -public: - SourceElement(const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); - PipelinePad &source(); - -protected: - virtual std::vector execution_pads() override; -}; - -// An element with one sink pad only (consumes data) -class SinkElement : public PipelineElementInternal -{ -public: - SinkElement(const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); - PipelinePad &sink(); - -protected: - virtual std::vector execution_pads() override; - virtual hailo_status execute_terminate(hailo_status error_status) override; - virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override; -}; - // Transfers data from one pad to another pad. Has one sink pad and one source pad. class IntermediateElement : public PipelineElementInternal { @@ -85,283 +59,6 @@ class IntermediateElement : public PipelineElementInternal virtual std::vector execution_pads() override; }; -class FilterElement : public IntermediateElement -{ -public: - FilterElement(const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, - PipelineDirection pipeline_direction, BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout, - std::shared_ptr async_pipeline); - virtual ~FilterElement() = default; - - virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; - - virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override; - virtual Expected can_push_buffer_upstream(const uint32_t source_index) override; - virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override; - virtual Expected can_push_buffer_upstream(const std::string &source_name) override; - virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override; - - virtual std::vector get_queue_size_accumulators() override; - -protected: - // The optional buffer functions as an output buffer that the user can write to instead of acquiring a new buffer - virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) = 0; - virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override; - - BufferPoolPtr m_pool; - std::chrono::milliseconds m_timeout; -}; - -class BaseQueueElement : public IntermediateElement -{ -public: - virtual ~BaseQueueElement(); - - hailo_status set_timeout(std::chrono::milliseconds timeout); - virtual std::string description() const override; - - static constexpr auto INIFINITE_TIMEOUT() { return std::chrono::milliseconds(HAILO_INFINITE); } - -protected: - static Expected> create_queue(size_t queue_size, EventPtr shutdown_event); - BaseQueueElement(SpscQueue &&queue, EventPtr shutdown_event, const std::string &name, - std::chrono::milliseconds timeout, DurationCollector &&duration_collector, - AccumulatorPtr &&queue_size_accumulator, std::shared_ptr> &&pipeline_status, - Event &&activation_event, Event &&deactivation_event, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); - - hailo_status pipeline_status(); - - virtual hailo_status execute_activate() override; - virtual hailo_status execute_post_deactivate(bool should_clear_abort) override; - virtual hailo_status execute_clear() override; - virtual hailo_status execute_clear_abort() override; - virtual hailo_status execute_wait_for_finish() override; - - virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override; - virtual Expected can_push_buffer_upstream(const uint32_t source_index) override; - virtual Expected can_push_buffer_downstream(const uint32_t source_index) override; - virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override; - virtual Expected can_push_buffer_upstream(const std::string &source_name) override; - virtual Expected can_push_buffer_downstream(const std::string &source_name) override; - virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override; - - /// Starts/stops the queue thread. This functions needs to be called on subclasses ctor and dtor - /// accordingly because otherwise, if we will start/stop thread in this class we will face pure-call - /// to `run_in_thread`. - /// This functions don't return status because they are meant to be called on ctor and dtor - virtual void start_thread(); - virtual void stop_thread(); - - virtual std::vector get_queue_size_accumulators() override; - - virtual hailo_status run_in_thread() = 0; - virtual std::string thread_name() = 0; - - SpscQueue m_queue; - EventPtr m_shutdown_event; - std::chrono::milliseconds m_timeout; - std::thread m_thread; - std::atomic_bool m_is_thread_running; - Event m_activation_event; - Event m_deactivation_event; - AccumulatorPtr m_queue_size_accumulator; - std::atomic_bool m_is_run_in_thread_running; - std::condition_variable m_cv; - std::mutex m_mutex; -}; - -class PushQueueElement : public BaseQueueElement -{ -public: - static Expected> create(const std::string &name, std::chrono::milliseconds timeout, - size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event, - std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PUSH, - std::shared_ptr async_pipeline = nullptr); - static Expected> create(const std::string &name, const hailo_vstream_params_t &vstream_params, - EventPtr shutdown_event, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction = PipelineDirection::PUSH, std::shared_ptr async_pipeline = nullptr); - PushQueueElement(SpscQueue &&queue, EventPtr shutdown_event, const std::string &name, - std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator, - std::shared_ptr> &&pipeline_status, Event &&activation_event, Event &&deactivation_event, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline, bool should_start_thread = true); - virtual ~PushQueueElement(); - - virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; - virtual PipelinePad &next_pad() override; - -protected: - virtual hailo_status execute_deactivate() override; - virtual hailo_status run_in_thread() override; - virtual std::string thread_name() override { return "PUSH_QUEUE"; }; - virtual hailo_status execute_abort() override; -}; - -class AsyncPushQueueElement : public PushQueueElement -{ -public: - static Expected> create(const std::string &name, std::chrono::milliseconds timeout, - size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event, - std::shared_ptr> pipeline_status, std::shared_ptr async_pipeline, - PipelineDirection pipeline_direction = PipelineDirection::PUSH); - static Expected> create(const std::string &name, const ElementBuildParams &build_params, - std::shared_ptr async_pipeline, PipelineDirection pipeline_direction); - AsyncPushQueueElement(SpscQueue &&queue, EventPtr shutdown_event, const std::string &name, - std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator, - std::shared_ptr> &&pipeline_status, Event &&activation_event, Event &&deactivation_event, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); - - virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override; - -protected: - virtual hailo_status run_in_thread() override; - virtual std::string thread_name() override { return "ASYNC_PUSH_Q"; }; - virtual void start_thread() override; - virtual hailo_status execute_terminate(hailo_status error_status); - virtual hailo_status execute_post_deactivate(bool should_clear_abort) override; - virtual hailo_status execute_deactivate() override; -}; - -class PullQueueElement : public BaseQueueElement -{ -public: - static Expected> create(const std::string &name, std::chrono::milliseconds timeout, - size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event, - std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PULL); - static Expected> create(const std::string &name, const hailo_vstream_params_t &vstream_params, - EventPtr shutdown_event, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction = PipelineDirection::PULL); - PullQueueElement(SpscQueue &&queue, EventPtr shutdown_event, const std::string &name, - std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator, - std::shared_ptr> &&pipeline_status, Event &&activation_event, Event &&deactivation_event, - PipelineDirection pipeline_direction); - virtual ~PullQueueElement(); - - virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; - virtual PipelinePad &next_pad() override; - -protected: - virtual hailo_status execute_deactivate() override; - virtual hailo_status run_in_thread() override; - virtual std::string thread_name() override { return "PULL_QUEUE"; }; -}; - -class UserBufferQueueElement : public PullQueueElement -{ -public: - static Expected> create(const std::string &name, std::chrono::milliseconds timeout, - hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction = PipelineDirection::PULL); - static Expected> create(const std::string &name, const hailo_vstream_params_t &vstream_params, - EventPtr shutdown_event, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction = PipelineDirection::PULL); - UserBufferQueueElement(SpscQueue &&queue, SpscQueue &&full_buffer_queue, EventPtr shutdown_event, - const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator, - std::shared_ptr> &&pipeline_status, Event &&activation_event, Event &&deactivation_event, - PipelineDirection pipeline_direction); - - virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; - -protected: - virtual hailo_status execute_clear() override; - virtual hailo_status run_in_thread() override; - -private: - SpscQueue m_full_buffer_queue; -}; - -class BaseMuxElement : public PipelineElementInternal -{ -public: - virtual ~BaseMuxElement() = default; - - virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; - - virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override; - virtual Expected can_push_buffer_upstream(const uint32_t source_index) override; - virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override; - virtual Expected can_push_buffer_upstream(const std::string &source_name) override; - virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override; - -protected: - BaseMuxElement(size_t sink_count, const std::string &name, std::chrono::milliseconds timeout, - DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, - BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); - virtual hailo_status execute_terminate(hailo_status error_status) override; - virtual Expected action(std::vector &&inputs, PipelineBuffer &&optional) = 0; - virtual std::vector execution_pads() override; - virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override; - - std::chrono::milliseconds m_timeout; - BufferPoolPtr m_pool; - -private: - bool has_all_sinks_arrived(); - std::unordered_map m_sink_has_arrived; - std::mutex m_mutex; - std::unordered_map m_index_of_sink; - std::unordered_map m_input_buffers; - std::vector m_next_pads; - std::condition_variable m_cv; -}; - -class BaseDemuxElement : public PipelineElementInternal -{ -public: - virtual ~BaseDemuxElement() = default; - - virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; - hailo_status set_timeout(std::chrono::milliseconds timeout); - - virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override; - virtual Expected can_push_buffer_upstream(const uint32_t source_index) override; - virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override; - virtual Expected can_push_buffer_upstream(const std::string &source_name) override; - virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override; - - virtual Expected get_source_index_from_source_name(const std::string &source_name) override; - -protected: - BaseDemuxElement(size_t source_count, const std::string &name, std::chrono::milliseconds timeout, - DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, - std::vector pools, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); - virtual hailo_status execute_activate() override; - virtual hailo_status execute_deactivate() override; - virtual hailo_status execute_post_deactivate(bool should_clear_abort) override; - virtual hailo_status execute_abort() override; - virtual Expected> action(PipelineBuffer &&input) = 0; - virtual std::vector execution_pads() override; - virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override; - - std::chrono::milliseconds m_timeout; - std::vector m_pools; - -private: - bool were_all_srcs_arrived(); - - std::atomic_bool m_is_activated; - std::atomic_bool m_was_stream_aborted; - std::unordered_map m_source_name_to_index; - std::vector m_was_source_called; - std::vector m_buffers_for_action; - std::mutex m_mutex; - std::condition_variable m_cv; - std::vector m_next_pads; -}; - enum class AccumulatorType { FPS, diff --git a/hailort/libhailort/src/net_flow/pipeline/queue_elements.cpp b/hailort/libhailort/src/net_flow/pipeline/queue_elements.cpp new file mode 100644 index 00000000..3527fde9 --- /dev/null +++ b/hailort/libhailort/src/net_flow/pipeline/queue_elements.cpp @@ -0,0 +1,842 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file queue_elements.cpp + * @brief Implementation of the queue elements + **/ + +#include "net_flow/pipeline/vstream_internal.hpp" +#include "net_flow/pipeline/queue_elements.hpp" +#include "common/os_utils.hpp" +#include "common/runtime_statistics_internal.hpp" + +namespace hailort +{ + +Expected> BaseQueueElement::create_queue(size_t queue_size, EventPtr shutdown_event) +{ + auto queue = SpscQueue::create(queue_size, shutdown_event); + CHECK_EXPECTED(queue); + + return queue.release(); +} + +BaseQueueElement::BaseQueueElement(SpscQueue &&queue, BufferPoolPtr buffer_pool, EventPtr shutdown_event, const std::string &name, + std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator, + std::shared_ptr> &&pipeline_status, Event &&activation_event, Event &&deactivation_event, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : + IntermediateElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline), + m_queue(std::move(queue)), + m_shutdown_event(shutdown_event), + m_timeout(timeout), + m_is_thread_running(true), + m_activation_event(std::move(activation_event)), + m_deactivation_event(std::move(deactivation_event)), + m_queue_size_accumulator(std::move(queue_size_accumulator)), + m_pool(buffer_pool) +{} + +BaseQueueElement::~BaseQueueElement() +{ + LOGGER__INFO("Queue element {} has {} frames in his Queue on destruction", name(), m_queue.size_approx()); +} + +void BaseQueueElement::start_thread() +{ + m_thread = std::thread([this] () { + OsUtils::set_current_thread_name(thread_name()); + while (m_is_thread_running.load()) { + auto status = m_activation_event.wait(INIFINITE_TIMEOUT()); + + if (!m_is_thread_running) { + LOGGER__INFO("Thread in element {} is not running anymore, exiting..", this->name()); + break; + } + if (HAILO_SUCCESS == status) { + status = run_in_thread(); + } + + if (HAILO_SUCCESS != status) { + if (HAILO_SHUTDOWN_EVENT_SIGNALED != status) { + // We do not want to log error for HAILO_STREAM_ABORT + if (HAILO_STREAM_ABORT != status) { + LOGGER__ERROR("Queue element {} run in thread function failed! status = {}", this->name(), status); + } + + // Store the real error in pipeline_status + m_pipeline_status->store(status); + } + // Signal other threads to stop + hailo_status shutdown_status = m_shutdown_event->signal(); + if (HAILO_SUCCESS != shutdown_status) { + LOGGER__CRITICAL("Failed shutting down queue with status {}", shutdown_status); + } + + // Thread has done its execution. Mark to the thread to wait for activation again + hailo_status event_status = m_activation_event.reset(); + if (HAILO_SUCCESS != event_status) { + LOGGER__CRITICAL("Failed reset activation event of element {}, with status {}", this->name(), event_status); + } + + // Mark to deactivation function that the thread is done + event_status = m_deactivation_event.signal(); + if (HAILO_SUCCESS != event_status) { + LOGGER__CRITICAL("Failed signaling deactivation event of element {}, with status {}", this->name(), event_status); + } + } + } + }); +} + +void BaseQueueElement::stop_thread() +{ + m_shutdown_event->signal(); + + // Mark thread as not running, then wake it in case it is waiting on m_activation_event + m_is_thread_running = false; + m_activation_event.signal(); + + if (m_thread.joinable()) { + m_thread.join(); + } +} + +std::vector BaseQueueElement::get_queue_size_accumulators() +{ + if (nullptr == m_queue_size_accumulator) { + return std::vector(); + } + return {m_queue_size_accumulator}; +} + +hailo_status BaseQueueElement::execute_activate() +{ + auto status = m_shutdown_event->reset(); + CHECK_SUCCESS(status); + + status = PipelineElementInternal::execute_activate(); + CHECK_SUCCESS(status); + + status = m_deactivation_event.reset(); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to reset of deactivation event in {} with status {}", name(), status); + } + + status = m_activation_event.signal(); + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + +hailo_status BaseQueueElement::execute_post_deactivate(bool should_clear_abort) +{ + hailo_status status = m_deactivation_event.wait(INIFINITE_TIMEOUT()); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to post_deactivate() in {} with status {}", name(), status); + } + + return PipelineElementInternal::execute_post_deactivate(should_clear_abort); +} + +hailo_status BaseQueueElement::execute_clear() +{ + auto status = PipelineElementInternal::execute_clear(); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to clear() in {} with status {}", name(), status); + } + + auto queue_clear_status = m_queue.clear(); + if (HAILO_SUCCESS != queue_clear_status) { + LOGGER__ERROR("Failed to clear() in {} with status {}", name(), queue_clear_status); + status = queue_clear_status; + } + + auto pool_clear_status = empty_buffer_pool(m_pool, HAILO_SUCCESS, BUFFER_POOL_DEFAULT_QUEUE_TIMEOUT); + if (HAILO_SUCCESS != pool_clear_status) { + LOGGER__ERROR("Failed to clear() in {} with status {}", name(), pool_clear_status); + status = pool_clear_status; + } + + return status; +} + +hailo_status PushQueueElement::execute_abort() +{ + auto status = m_shutdown_event->reset(); + CHECK_SUCCESS(status); + + m_pipeline_status->store(HAILO_STREAM_ABORT); + + status = PipelineElementInternal::execute_abort(); + CHECK_SUCCESS(status); + + status = m_activation_event.signal(); + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + +hailo_status BaseQueueElement::execute_clear_abort() +{ + auto status = m_shutdown_event->reset(); + CHECK_SUCCESS(status); + + m_pipeline_status->store(HAILO_SUCCESS); + return PipelineElementInternal::execute_clear_abort(); +} + +hailo_status BaseQueueElement::set_timeout(std::chrono::milliseconds timeout) +{ + m_timeout = timeout; + return HAILO_SUCCESS; +} + +std::string BaseQueueElement::description() const +{ + std::stringstream element_description; + + element_description << "(" << this->name(); + if (HAILO_INFINITE != this->m_timeout.count()) { + element_description << " | timeout: " << std::chrono::duration_cast(this->m_timeout).count() << "s"; + } + element_description << ")"; + + return element_description.str(); +} + +hailo_status BaseQueueElement::pipeline_status() +{ + auto status = m_pipeline_status->load(); + + // We treat HAILO_STREAM_ABORT as success because it is caused by user action (aborting streams) + if (HAILO_STREAM_ABORT == status) { + return HAILO_SUCCESS; + } + return status; +} + +Expected> PushQueueElement::create(const std::string &name, std::chrono::milliseconds timeout, + size_t queue_size, size_t frame_size, hailo_pipeline_elem_stats_flags_t flags, hailo_vstream_stats_flags_t vs_flags, + std::shared_ptr> pipeline_status, std::shared_ptr async_pipeline) +{ + auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled); + CHECK_EXPECTED(shutdown_event_exp); + auto shutdown_event = shutdown_event_exp.release(); + + auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event); + CHECK_EXPECTED(queue); + + auto activation_event = Event::create(Event::State::not_signalled); + CHECK_EXPECTED(activation_event); + + auto deactivation_event = Event::create(Event::State::not_signalled); + CHECK_EXPECTED(deactivation_event); + + // We do not measure duration for Q elements + auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE); + CHECK_EXPECTED(duration_collector); + + AccumulatorPtr queue_size_accumulator = nullptr; + if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) { + queue_size_accumulator = make_shared_nothrow>("queue_size"); + CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY); + } + + auto buffer_pool = BufferPool::create(frame_size, queue_size, shutdown_event, flags, vs_flags); + CHECK_EXPECTED(buffer_pool); + + auto queue_ptr = make_shared_nothrow(queue.release(), buffer_pool.release(), shutdown_event, name, timeout, + duration_collector.release(), std::move(queue_size_accumulator), std::move(pipeline_status), + activation_event.release(), deactivation_event.release(), async_pipeline, true); + CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PushQueueElement {} failed!", name); + + LOGGER__INFO("Created {}", queue_ptr->description()); + + return queue_ptr; +} + +Expected> PushQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params, + size_t frame_size, std::shared_ptr> pipeline_status, + std::shared_ptr async_pipeline) +{ + return PushQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms), vstream_params.queue_size, + frame_size, vstream_params.pipeline_elements_stats_flags, vstream_params.vstream_stats_flags, + pipeline_status, async_pipeline); +} + +PushQueueElement::PushQueueElement(SpscQueue &&queue, BufferPoolPtr buffer_pool, EventPtr shutdown_event, const std::string &name, + std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator, + std::shared_ptr> &&pipeline_status, Event &&activation_event, Event &&deactivation_event, + std::shared_ptr async_pipeline, bool should_start_thread) : + BaseQueueElement(std::move(queue), buffer_pool, shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator), + std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), PipelineDirection::PUSH, async_pipeline) +{ + if (should_start_thread) { + start_thread(); + } +} + +PushQueueElement::~PushQueueElement() +{ + stop_thread(); +} + +hailo_status PushQueueElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/) +{ + auto status = m_pipeline_status->load(); + if (HAILO_STREAM_ABORT == status) { + LOGGER__INFO("run_push of {} was aborted!", name()); + return status; + } + CHECK_SUCCESS(m_pipeline_status->load()); + + if (nullptr != m_queue_size_accumulator) { + m_queue_size_accumulator->add_data_point(static_cast(m_queue.size_approx())); + } + status = m_queue.enqueue(std::move(buffer), m_timeout); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { + auto queue_thread_status = pipeline_status(); + CHECK_SUCCESS(queue_thread_status, + "Shutdown event was signaled in enqueue of queue element {} because thread has failed with status={}!", name(), + queue_thread_status); + LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name()); + return HAILO_SHUTDOWN_EVENT_SIGNALED; + } + CHECK_SUCCESS(status); + return HAILO_SUCCESS; +} + +void PushQueueElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) { + LOGGER__ERROR("run_push_async is not supported for {}", name()); + assert(false); +} + +Expected PushQueueElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/) +{ + return make_unexpected(HAILO_INVALID_OPERATION); +} + +hailo_status PushQueueElement::execute_deactivate() +{ + // Mark to the threads that deactivate() was called. + hailo_status status = m_queue.enqueue(PipelineBuffer(PipelineBuffer::Type::DEACTIVATE)); + if (HAILO_SUCCESS != status) { + // We want to deactivate source even if enqueue failed + auto deactivation_status = PipelineElementInternal::execute_deactivate(); + CHECK_SUCCESS(deactivation_status); + if ((HAILO_STREAM_ABORT == status) || (HAILO_SHUTDOWN_EVENT_SIGNALED == status)) { + LOGGER__INFO("enqueue() in element {} was aborted, got status = {}", name(), status); + } + else { + LOGGER__ERROR("enqueue() in element {} failed, got status = {}", name(), status); + return status; + } + } + + return HAILO_SUCCESS; +} + +PipelinePad &PushQueueElement::next_pad() +{ + // Note: The next elem to be run is downstream from this elem (i.e. buffers are pushed) + return *m_sources[0].next(); +} + +hailo_status PushQueueElement::run_in_thread() +{ + auto buffer = m_queue.dequeue(INIFINITE_TIMEOUT()); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { + LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name()); + return HAILO_SHUTDOWN_EVENT_SIGNALED; + } + CHECK_EXPECTED_AS_STATUS(buffer); + + // Return if deactivated + if (PipelineBuffer::Type::DEACTIVATE == buffer->get_type()) { + hailo_status status = m_shutdown_event->signal(); + CHECK_SUCCESS(status); + + status = next_pad().deactivate(); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Deactivate of source in {} has failed with status {}", name(), status); + } + + return HAILO_SHUTDOWN_EVENT_SIGNALED; + } + + hailo_status status = next_pad().run_push(buffer.release()); + if (HAILO_STREAM_ABORT == status) { + LOGGER__INFO("run_push of {} was aborted!", name()); + return status; + } + else if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { + LOGGER__INFO("run_push of {} stopped because Shutdown event was signaled!", name()); + return HAILO_SHUTDOWN_EVENT_SIGNALED; + } + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + +Expected> AsyncPushQueueElement::create(const std::string &name, std::chrono::milliseconds timeout, + size_t queue_size, size_t frame_size, bool is_empty, bool interacts_with_hw, hailo_pipeline_elem_stats_flags_t flags, + hailo_vstream_stats_flags_t vstream_stats_flags, EventPtr shutdown_event, + std::shared_ptr> pipeline_status, std::shared_ptr async_pipeline, bool is_entry) +{ + if (is_entry) { + // Multiplying by 2 to ensure dual-buffering when edge-element is the bottleneck + queue_size = queue_size * 2; + } + + auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event); + CHECK_EXPECTED(queue); + + auto activation_event = Event::create(Event::State::not_signalled); + CHECK_EXPECTED(activation_event); + + auto deactivation_event = Event::create(Event::State::not_signalled); + CHECK_EXPECTED(deactivation_event); + + // We do not measure duration for Q elements + auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE); + CHECK_EXPECTED(duration_collector); + + AccumulatorPtr queue_size_accumulator = nullptr; + if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) { + queue_size_accumulator = make_shared_nothrow>("queue_size"); + CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY); + } + + auto buffer_pool = BufferPool::create(frame_size, queue_size, shutdown_event, flags, vstream_stats_flags, is_empty, interacts_with_hw); + CHECK_EXPECTED(buffer_pool); + + auto queue_ptr = make_shared_nothrow(queue.release(), buffer_pool.release(), + shutdown_event, name, timeout, duration_collector.release(), std::move(queue_size_accumulator), + std::move(pipeline_status), activation_event.release(), deactivation_event.release(), async_pipeline); + CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PushQueueElement {} failed!", name); + + LOGGER__INFO("Created {}", queue_ptr->description()); + + return queue_ptr; +} + +Expected> AsyncPushQueueElement::create(const std::string &name, const ElementBuildParams &build_params, + size_t frame_size, bool is_empty, bool interacts_with_hw, std::shared_ptr async_pipeline, bool is_entry) +{ + // Pools that interacts with HW should be as big as the edges pools (user-buffers) + auto queue_size = (interacts_with_hw) ? build_params.buffer_pool_size_edges : build_params.buffer_pool_size_internal; + return AsyncPushQueueElement::create(name, build_params.timeout, queue_size, frame_size, is_empty, interacts_with_hw, + build_params.elem_stats_flags, build_params.vstream_stats_flags, build_params.shutdown_event, build_params.pipeline_status, async_pipeline, + is_entry); +} + +AsyncPushQueueElement::AsyncPushQueueElement(SpscQueue &&queue, BufferPoolPtr buffer_pool, EventPtr shutdown_event, + const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator, + std::shared_ptr> &&pipeline_status, Event &&activation_event, Event &&deactivation_event, + std::shared_ptr async_pipeline) : + PushQueueElement(std::move(queue), buffer_pool, shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator), + std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), async_pipeline, false) +{ + start_thread(); +} + +void AsyncPushQueueElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/) +{ + // We do not measure duration for Q elements + if (nullptr != m_queue_size_accumulator) { + m_queue_size_accumulator->add_data_point(static_cast(m_queue.size_approx())); + } + + auto status = m_queue.enqueue(std::move(buffer), m_timeout); + if (HAILO_SUCCESS != status && HAILO_SHUTDOWN_EVENT_SIGNALED != status) { + handle_non_recoverable_async_error(status); + stop_thread(); + } +} + +void AsyncPushQueueElement::start_thread() +{ + m_thread = std::thread([this] () { + OsUtils::set_current_thread_name(thread_name()); + while (m_is_thread_running.load()) { + auto status = m_pipeline_status->load(); + if (HAILO_SUCCESS != status) { + LOGGER__INFO("Thread in element {} is not running anymore, exiting..", name()); + m_is_thread_running = false; + break; + } + + status = run_in_thread(); + if (HAILO_SUCCESS != status) { + handle_non_recoverable_async_error(status); + m_is_thread_running = false; + break; + } + } + }); +} + +hailo_status AsyncPushQueueElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) +{ + return HAILO_INVALID_OPERATION; +} + +hailo_status AsyncPushQueueElement::run_in_thread() +{ + auto buffer = m_queue.dequeue(INIFINITE_TIMEOUT()); + auto buffer_status = buffer.status(); + switch (buffer_status) { + case HAILO_SHUTDOWN_EVENT_SIGNALED: + break; + + case HAILO_SUCCESS: + // Return if deactivated + if (PipelineBuffer::Type::DEACTIVATE == buffer->get_type()) { + hailo_status status = m_shutdown_event->signal(); + CHECK_SUCCESS(status); + + status = next_pad().deactivate(); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Deactivate of source in {} has failed with status {}", name(), status); + } + + return HAILO_SHUTDOWN_EVENT_SIGNALED; + } + + next_pad().run_push_async(buffer.release()); + break; + + default: + next_pad().run_push_async(PipelineBuffer(buffer_status)); + } + + return buffer_status; +} + +hailo_status AsyncPushQueueElement::execute_deactivate() +{ + // Mark to the threads that deactivate() was called. + hailo_status status = m_queue.enqueue(PipelineBuffer(PipelineBuffer::Type::DEACTIVATE)); + if (HAILO_SUCCESS != status) { + // We want to deactivate source even if enqueue failed + auto deactivation_status = PipelineElementInternal::execute_deactivate(); + CHECK_SUCCESS(deactivation_status); + if ((HAILO_STREAM_ABORT == status) || (HAILO_SHUTDOWN_EVENT_SIGNALED == status)) { + LOGGER__INFO("enqueue() in element {} was aborted, got status = {}", name(), status); + } else { + LOGGER__ERROR("enqueue() in element {} failed, got status = {}", name(), status); + return status; + } + } + + return HAILO_SUCCESS; +} + +hailo_status AsyncPushQueueElement::execute_post_deactivate(bool should_clear_abort) +{ + // We marked thread to stop with PipelineBuffer::Type::DEACTIVATE, now we wait for it to finish + stop_thread(); + return PipelineElementInternal::execute_post_deactivate(should_clear_abort); +} + +hailo_status AsyncPushQueueElement::execute_terminate(hailo_status error_status) +{ + if (m_is_terminated) { + return HAILO_SUCCESS; + } + + auto terminate_status = PipelineElement::execute_terminate(error_status); + + if ((!next_pad().element().is_terminating_element())) { + stop_thread(); + } + + CHECK_SUCCESS(terminate_status); + + return HAILO_SUCCESS; +} + +hailo_status AsyncPushQueueElement::execute_dequeue_user_buffers(hailo_status error_status) +{ + auto dequeue_status = PipelineElement::execute_dequeue_user_buffers(error_status); + auto clear_queues_status = m_queue.clear(); + auto empty_pool_status = empty_buffer_pool(m_pool, error_status, m_timeout); + + CHECK_SUCCESS(dequeue_status); + CHECK_SUCCESS(clear_queues_status); + CHECK_SUCCESS(empty_pool_status); + return HAILO_SUCCESS; +} + +Expected AsyncPushQueueElement::can_push_buffer_downstream() +{ + return !m_queue.is_queue_full(); +} + +Expected> PullQueueElement::create(const std::string &name, std::chrono::milliseconds timeout, + size_t queue_size, size_t frame_size, hailo_pipeline_elem_stats_flags_t flags, hailo_vstream_stats_flags_t vstream_stats_flags, + std::shared_ptr> pipeline_status) +{ + auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled); + CHECK_EXPECTED(shutdown_event_exp); + auto shutdown_event = shutdown_event_exp.release(); + + auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event); + CHECK_EXPECTED(queue); + + auto activation_event = Event::create(Event::State::not_signalled); + CHECK_EXPECTED(activation_event); + + auto deactivation_event = Event::create(Event::State::not_signalled); + CHECK_EXPECTED(deactivation_event); + + // We do not measure duration for Q elements + auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE); + CHECK_EXPECTED(duration_collector); + + AccumulatorPtr queue_size_accumulator = nullptr; + if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) { + queue_size_accumulator = make_shared_nothrow>("queue_size"); + CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY); + } + + auto buffer_pool = BufferPool::create(frame_size, queue_size, shutdown_event, flags, vstream_stats_flags); + CHECK_EXPECTED(buffer_pool); + + auto queue_ptr = make_shared_nothrow(queue.release(), buffer_pool.release(), shutdown_event, + name, timeout, duration_collector.release(), std::move(queue_size_accumulator), std::move(pipeline_status), + activation_event.release(), deactivation_event.release()); + CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PullQueueElement {} failed!", name); + + LOGGER__INFO("Created {}", queue_ptr->description()); + + return queue_ptr; +} +Expected> PullQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params, + size_t frame_size, std::shared_ptr> pipeline_status) +{ + return PullQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms), + vstream_params.queue_size, frame_size, vstream_params.pipeline_elements_stats_flags, vstream_params.vstream_stats_flags, + pipeline_status); +} + +PullQueueElement::PullQueueElement(SpscQueue &&queue, BufferPoolPtr buffer_pool, EventPtr shutdown_event, + const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator, + std::shared_ptr> &&pipeline_status, Event &&activation_event, Event &&deactivation_event) : + BaseQueueElement(std::move(queue), buffer_pool, shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator), + std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), PipelineDirection::PULL, nullptr) +{ + start_thread(); +} + +PullQueueElement::~PullQueueElement() +{ + stop_thread(); +} + +hailo_status PullQueueElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) +{ + return HAILO_INVALID_OPERATION; +} + +void PullQueueElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) +{ + LOGGER__ERROR("run_push_async is not supported for {}", name()); + assert(false); +} + +Expected PullQueueElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*sink*/) +{ + // We do not measure duration for Q elements + CHECK_AS_EXPECTED(!optional, HAILO_INVALID_ARGUMENT, "Optional buffer is not allowed in queue element!"); + + auto output = m_queue.dequeue(m_timeout); + + if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) { + auto queue_thread_status = pipeline_status(); + CHECK_SUCCESS_AS_EXPECTED(queue_thread_status, + "Shutdown event was signaled in dequeue of queue element {} because thread has failed with status={}!", name(), + queue_thread_status); + LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name()); + return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); + } + CHECK_EXPECTED(output); + + return output; +} + +hailo_status PullQueueElement::execute_deactivate() +{ + hailo_status status = PipelineElementInternal::execute_deactivate(); + auto shutdown_event_status = m_shutdown_event->signal(); + CHECK_SUCCESS(status); + CHECK_SUCCESS(shutdown_event_status); + + return HAILO_SUCCESS; +} + +PipelinePad &PullQueueElement::next_pad() +{ + // Note: The next elem to be run is upstream from this elem (i.e. buffers are pulled) + return *m_sinks[0].prev(); +} + +hailo_status PullQueueElement::run_in_thread() +{ + auto buffer = next_pad().run_pull(); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { + LOGGER__INFO("Shutdown event was signaled in run_pull of queue element {}!", name()); + return HAILO_SHUTDOWN_EVENT_SIGNALED; + } + if (HAILO_STREAM_ABORT == buffer.status()) { + LOGGER__INFO("run_pull of queue element {} was aborted!", name()); + return HAILO_STREAM_ABORT; + } + if (HAILO_NETWORK_GROUP_NOT_ACTIVATED == buffer.status()) { + LOGGER__INFO("run_pull of queue element {} was called before network_group is activated!", name()); + return HAILO_NETWORK_GROUP_NOT_ACTIVATED; + } + CHECK_EXPECTED_AS_STATUS(buffer); + + if (nullptr != m_queue_size_accumulator) { + m_queue_size_accumulator->add_data_point(static_cast(m_queue.size_approx())); + } + + hailo_status status = m_queue.enqueue(buffer.release(), INIFINITE_TIMEOUT()); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { + LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name()); + return HAILO_SHUTDOWN_EVENT_SIGNALED; + } + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + +Expected> UserBufferQueueElement::create(const std::string &name, std::chrono::milliseconds timeout, + hailo_pipeline_elem_stats_flags_t flags, hailo_vstream_stats_flags_t vstream_stats_flags, size_t frame_size, + std::shared_ptr> pipeline_status) +{ + auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled); + CHECK_EXPECTED(shutdown_event_exp); + auto shutdown_event = shutdown_event_exp.release(); + + const auto queue_size = 1; + auto pending_buffer_queue = BaseQueueElement::create_queue(queue_size, shutdown_event); + CHECK_EXPECTED(pending_buffer_queue); + + auto activation_event = Event::create(Event::State::not_signalled); + CHECK_EXPECTED(activation_event); + + auto deactivation_event = Event::create(Event::State::not_signalled); + CHECK_EXPECTED(deactivation_event); + + // We do not measure duration for Q elements + auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE); + CHECK_EXPECTED(duration_collector); + + AccumulatorPtr queue_size_accumulator = nullptr; + if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) { + queue_size_accumulator = make_shared_nothrow>("queue_size"); + CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY); + } + + auto is_empty = true; // UserBufferQueue always holds user buffers, therefore its created empty + auto is_dma_able = false; + auto buffer_pool = BufferPool::create(frame_size, queue_size, shutdown_event, flags, vstream_stats_flags, is_empty, is_dma_able); + CHECK_EXPECTED(buffer_pool); + + auto queue_ptr = make_shared_nothrow(pending_buffer_queue.release(), + buffer_pool.release(), shutdown_event, name, timeout, duration_collector.release(), + std::move(queue_size_accumulator), std::move(pipeline_status), activation_event.release(), + deactivation_event.release()); + CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating UserBufferQueueElement {} failed!", name); + + LOGGER__INFO("Created {}", queue_ptr->description()); + + return queue_ptr; +} + +Expected> UserBufferQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params, + size_t frame_size, std::shared_ptr> pipeline_status) +{ + return UserBufferQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms), + vstream_params.pipeline_elements_stats_flags, vstream_params.vstream_stats_flags, frame_size, pipeline_status); +} + +UserBufferQueueElement::UserBufferQueueElement(SpscQueue &&queue, BufferPoolPtr buffer_pool, + EventPtr shutdown_event, const std::string &name, std::chrono::milliseconds timeout, + DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator, std::shared_ptr> &&pipeline_status, + Event &&activation_event, Event &&deactivation_event) : + PullQueueElement(std::move(queue), buffer_pool, shutdown_event, name, timeout, std::move(duration_collector), + std::move(queue_size_accumulator), std::move(pipeline_status), std::move(activation_event), + std::move(deactivation_event)) +{} + +Expected UserBufferQueueElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/) +{ + CHECK_AS_EXPECTED(optional, HAILO_INVALID_ARGUMENT, "Optional buffer must be valid in {}!", name()); + + hailo_status status = m_pool->enqueue_buffer(optional.as_view()); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { + LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name()); + return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); + } + CHECK_SUCCESS_AS_EXPECTED(status); + + auto output = m_queue.dequeue(m_timeout); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) { + LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name()); + return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); + } + + CHECK_AS_EXPECTED(HAILO_TIMEOUT != output.status(), HAILO_TIMEOUT, "{} (D2H) failed with status={} (timeout={}ms)", + name(), HAILO_TIMEOUT, m_timeout.count()); + CHECK_EXPECTED(output); + + CHECK_AS_EXPECTED(output->data() == optional.data(), HAILO_INTERNAL_FAILURE, "The buffer received in {} was not the same as the user buffer!", name()); + return output; +} + +hailo_status UserBufferQueueElement::set_buffer_pool_buffer_size(uint32_t frame_size) +{ + return m_pool->set_buffer_size(frame_size); +} + +hailo_status UserBufferQueueElement::run_in_thread() +{ + auto optional = m_pool->acquire_buffer(INIFINITE_TIMEOUT()); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == optional.status()) { + LOGGER__INFO("Shutdown event was signaled in dequeue of {}!", name()); + return HAILO_SHUTDOWN_EVENT_SIGNALED; + } + CHECK_EXPECTED_AS_STATUS(optional); + + auto buffer = next_pad().run_pull(optional.release()); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { + LOGGER__INFO("Shutdown event was signaled in run_pull of {}!", name()); + return HAILO_SHUTDOWN_EVENT_SIGNALED; + } + if (HAILO_STREAM_ABORT == buffer.status()) { + LOGGER__INFO("run_pull of {} was aborted!", name()); + return HAILO_STREAM_ABORT; + } + CHECK_EXPECTED_AS_STATUS(buffer); + + hailo_status status = m_queue.enqueue(buffer.release(), INIFINITE_TIMEOUT()); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { + LOGGER__INFO("Shutdown event was signaled in enqueue of {}!", name()); + return HAILO_SHUTDOWN_EVENT_SIGNALED; + } + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + +std::vector UserBufferQueueElement::get_queue_size_accumulators() +{ + return std::vector(); // Since this element is sync, queue state will always be 0 +} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/net_flow/pipeline/queue_elements.hpp b/hailort/libhailort/src/net_flow/pipeline/queue_elements.hpp new file mode 100644 index 00000000..4f86dafa --- /dev/null +++ b/hailort/libhailort/src/net_flow/pipeline/queue_elements.hpp @@ -0,0 +1,179 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file queue_elements.hpp + * @brief all queue elements in the pipeline. + **/ + +#ifndef _HAILO_QUEUE_ELEMENTS_HPP_ +#define _HAILO_QUEUE_ELEMENTS_HPP_ + +#include "net_flow/pipeline/pipeline_internal.hpp" + +namespace hailort +{ + +class BaseQueueElement : public IntermediateElement +{ +public: + virtual ~BaseQueueElement(); + + hailo_status set_timeout(std::chrono::milliseconds timeout); + virtual std::string description() const override; + + static constexpr auto INIFINITE_TIMEOUT() { return std::chrono::milliseconds(HAILO_INFINITE); } + + virtual BufferPoolPtr get_buffer_pool() const override + { + return m_pool; + } + +protected: + static Expected> create_queue(size_t queue_size, EventPtr shutdown_event); + BaseQueueElement(SpscQueue &&queue, BufferPoolPtr buffer_pool, + EventPtr shutdown_event, const std::string &name, + std::chrono::milliseconds timeout, DurationCollector &&duration_collector, + AccumulatorPtr &&queue_size_accumulator, std::shared_ptr> &&pipeline_status, + Event &&activation_event, Event &&deactivation_event, + PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); + + hailo_status pipeline_status(); + + virtual hailo_status execute_activate() override; + virtual hailo_status execute_post_deactivate(bool should_clear_abort) override; + virtual hailo_status execute_clear() override; + virtual hailo_status execute_clear_abort() override; + + /// Starts/stops the queue thread. This functions needs to be called on subclasses ctor and dtor + /// accordingly because otherwise, if we will start/stop thread in this class we will face pure-call + /// to `run_in_thread`. + /// This functions don't return status because they are meant to be called on ctor and dtor + virtual void start_thread(); + virtual void stop_thread(); + + virtual std::vector get_queue_size_accumulators() override; + + virtual hailo_status run_in_thread() = 0; + virtual std::string thread_name() = 0; + + SpscQueue m_queue; + EventPtr m_shutdown_event; + std::chrono::milliseconds m_timeout; + std::thread m_thread; + std::atomic_bool m_is_thread_running; + Event m_activation_event; + Event m_deactivation_event; + AccumulatorPtr m_queue_size_accumulator; + BufferPoolPtr m_pool; +}; + +class PushQueueElement : public BaseQueueElement +{ +public: + static Expected> create(const std::string &name, std::chrono::milliseconds timeout, + size_t queue_size, size_t frame_size, hailo_pipeline_elem_stats_flags_t flags, hailo_vstream_stats_flags_t vs_flags, + std::shared_ptr> pipeline_status, + std::shared_ptr async_pipeline = nullptr); + static Expected> create(const std::string &name, const hailo_vstream_params_t &vstream_params, + size_t frame_size, std::shared_ptr> pipeline_status, + std::shared_ptr async_pipeline = nullptr); + PushQueueElement(SpscQueue &&queue, BufferPoolPtr buffer_pool, EventPtr shutdown_event, const std::string &name, + std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator, + std::shared_ptr> &&pipeline_status, Event &&activation_event, Event &&deactivation_event, + std::shared_ptr async_pipeline, bool should_start_thread); + virtual ~PushQueueElement(); + + virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; + virtual PipelinePad &next_pad() override; + +protected: + virtual hailo_status execute_deactivate() override; + virtual hailo_status run_in_thread() override; + virtual std::string thread_name() override { return "PUSH_QUEUE"; }; + virtual hailo_status execute_abort() override; +}; + +class AsyncPushQueueElement : public PushQueueElement +{ +public: + static Expected> create(const std::string &name, std::chrono::milliseconds timeout, + size_t queue_size, size_t frame_size, bool is_empty, bool interacts_with_hw, hailo_pipeline_elem_stats_flags_t flags, + hailo_vstream_stats_flags_t vstream_stats_flags, EventPtr shutdown_event, + std::shared_ptr> pipeline_status, std::shared_ptr async_pipeline, bool is_entry = false); + static Expected> create(const std::string &name, const ElementBuildParams &build_params, + size_t frame_size, bool is_empty, bool interacts_with_hw, std::shared_ptr async_pipeline, bool is_entry = false); + AsyncPushQueueElement(SpscQueue &&queue, BufferPoolPtr buffer_pool, EventPtr shutdown_event, const std::string &name, + std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator, + std::shared_ptr> &&pipeline_status, Event &&activation_event, Event &&deactivation_event, + std::shared_ptr async_pipeline); + + virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override; + virtual Expected can_push_buffer_downstream() override; + +protected: + virtual hailo_status run_in_thread() override; + virtual std::string thread_name() override { return "ASYNC_PUSH_Q"; }; + virtual void start_thread() override; + virtual hailo_status execute_terminate(hailo_status error_status); + virtual hailo_status execute_post_deactivate(bool should_clear_abort) override; + virtual hailo_status execute_deactivate() override; +}; + +class PullQueueElement : public BaseQueueElement +{ +public: + static Expected> create(const std::string &name, std::chrono::milliseconds timeout, + size_t queue_size, size_t frame_size, hailo_pipeline_elem_stats_flags_t flags, hailo_vstream_stats_flags_t vstream_stats_flags, + std::shared_ptr> pipeline_status); + static Expected> create(const std::string &name, const hailo_vstream_params_t &vstream_params, + size_t frame_size, std::shared_ptr> pipeline_status); + PullQueueElement(SpscQueue &&queue, BufferPoolPtr buffer_pool, EventPtr shutdown_event, const std::string &name, + std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator, + std::shared_ptr> &&pipeline_status, Event &&activation_event, Event &&deactivation_event); + virtual ~PullQueueElement(); + + virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; + virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; + virtual PipelinePad &next_pad() override; + +protected: + virtual hailo_status execute_deactivate() override; + virtual hailo_status run_in_thread() override; + virtual std::string thread_name() override { return "PULL_QUEUE"; }; +}; + +class UserBufferQueueElement : public PullQueueElement +{ +public: + static Expected> create(const std::string &name, std::chrono::milliseconds timeout, + hailo_pipeline_elem_stats_flags_t flags, hailo_vstream_stats_flags_t vstream_stats_flags, + size_t frame_size, std::shared_ptr> pipeline_status); + static Expected> create(const std::string &name, const hailo_vstream_params_t &vstream_params, + size_t frame_size, std::shared_ptr> pipeline_status); + UserBufferQueueElement(SpscQueue &&queue, BufferPoolPtr buffer_pool, + EventPtr shutdown_event, const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, + AccumulatorPtr &&queue_size_accumulator, std::shared_ptr> &&pipeline_status, Event &&activation_event, + Event &&deactivation_event); + + virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; + hailo_status set_buffer_pool_buffer_size(uint32_t frame_size); + +virtual std::vector get_queue_size_accumulators() override; + +protected: + virtual hailo_status run_in_thread() override; +}; + + + + +} /* namespace hailort */ + +#endif /* _HAILO_QUEUE_ELEMENTS_HPP_ */ diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream.cpp b/hailort/libhailort/src/net_flow/pipeline/vstream.cpp index 8530e135..02a1c1f1 100644 --- a/hailort/libhailort/src/net_flow/pipeline/vstream.cpp +++ b/hailort/libhailort/src/net_flow/pipeline/vstream.cpp @@ -19,16 +19,6 @@ #include "hailo/hailort_common.hpp" #include "net_flow/pipeline/pipeline_internal.hpp" #include "stream_common/stream_internal.hpp" -#include "net_flow/ops/nms_post_process.hpp" -#include "net_flow/ops/ssd_post_process.hpp" -#include "net_flow/ops/yolox_post_process.hpp" -#include "net_flow/ops/yolov8_post_process.hpp" -#include "net_flow/ops/yolov5_post_process.hpp" -#include "net_flow/ops/argmax_post_process.hpp" -#include "net_flow/ops/softmax_post_process.hpp" -#include "net_flow/ops/yolov5_seg_post_process.hpp" - -#include "common/runtime_statistics_internal.hpp" #include "net_flow/pipeline/vstream_internal.hpp" #include @@ -52,2134 +42,711 @@ static std::map get_pipeline_accumulators_by_type( static std::map> get_pipeline_queue_size_accumulators( const std::vector> &pipeline); -Expected> PreInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, - const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector &dst_quant_infos, - const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags, - hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction, bool is_dma_able, std::shared_ptr async_pipeline) +BaseVStream::BaseVStream(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, + std::shared_ptr pipeline_entry, std::vector> &&pipeline, + std::shared_ptr> &&pipeline_status, + AccumulatorPtr pipeline_latency_accumulator, EventPtr &&core_op_activated_event, + hailo_status &output_status) : + m_vstream_info(vstream_info), + m_quant_infos(quant_infos), + m_vstream_params(vstream_params), + m_measure_pipeline_latency((vstream_params.vstream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_LATENCY) != 0), + m_entry_element(pipeline_entry), + m_pipeline(std::move(pipeline)), + m_is_activated(false), + m_is_aborted(false), + m_pipeline_status(std::move(pipeline_status)), + m_core_op_activated_event(std::move(core_op_activated_event)), + m_fps_accumulators(get_pipeline_accumulators_by_type(m_pipeline, AccumulatorType::FPS)), + m_latency_accumulators(get_pipeline_accumulators_by_type(m_pipeline, AccumulatorType::LATENCY)), + m_queue_size_accumulators(get_pipeline_queue_size_accumulators(m_pipeline)), + m_pipeline_latency_accumulator(pipeline_latency_accumulator) { - auto transform_context = InputTransformContext::create(src_image_shape, src_format, dst_image_shape, dst_format, - dst_quant_infos); - CHECK_EXPECTED(transform_context, "Failed Creating InputTransformContext"); - - bool is_empty = false; - auto buffer_pool = BufferPool::create(transform_context.value()->get_dst_frame_size(), buffer_pool_size, shutdown_event, elem_flags, - vstream_flags, is_empty, is_dma_able); - CHECK_EXPECTED(buffer_pool, "Failed creating BufferPool for {}", name); - - auto duration_collector = DurationCollector::create(elem_flags); - CHECK_EXPECTED(duration_collector); - - auto pre_infer_elem_ptr = make_shared_nothrow(transform_context.release(), - buffer_pool.release(), name, timeout, duration_collector.release(), std::move(pipeline_status), pipeline_direction, - async_pipeline); - CHECK_AS_EXPECTED(nullptr != pre_infer_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); - - LOGGER__INFO("Created {}", pre_infer_elem_ptr->name()); - - return pre_infer_elem_ptr; + output_status = start_vstream(); } -Expected> PreInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, - const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector &dst_quant_infos, const std::string &name, - const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction, bool is_dma_able, std::shared_ptr async_pipeline) +BaseVStream::BaseVStream(BaseVStream &&other) noexcept : + m_vstream_info(std::move(other.m_vstream_info)), + m_vstream_params(std::move(other.m_vstream_params)), + m_measure_pipeline_latency(std::move(other.m_measure_pipeline_latency)), + m_entry_element(std::move(other.m_entry_element)), + m_pipeline(std::move(other.m_pipeline)), + m_is_activated(std::exchange(other.m_is_activated, false)), + m_is_aborted(std::exchange(other.m_is_aborted, false)), + m_pipeline_status(std::move(other.m_pipeline_status)), + m_core_op_activated_event(std::move(other.m_core_op_activated_event)), + m_fps_accumulators(std::move(other.m_fps_accumulators)), + m_latency_accumulators(std::move(other.m_latency_accumulators)), + m_queue_size_accumulators(std::move(other.m_queue_size_accumulators)), + m_pipeline_latency_accumulator(std::move(other.m_pipeline_latency_accumulator)) +{} + +BaseVStream& BaseVStream::operator=(BaseVStream &&other) noexcept { - return PreInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_infos, name, - std::chrono::milliseconds(vstream_params.timeout_ms), vstream_params.queue_size, vstream_params.pipeline_elements_stats_flags, - vstream_params.vstream_stats_flags, shutdown_event, pipeline_status, pipeline_direction, is_dma_able, async_pipeline); + if (this != &other) { + // operator= is used only for vstream creation BEFORE activation. otherwise we should deactivate vstream here + assert(!m_is_activated); + m_vstream_info = std::move(other.m_vstream_info); + m_quant_infos = std::move(other.m_quant_infos); + m_vstream_params = std::move(other.m_vstream_params); + m_measure_pipeline_latency = std::move(other.m_measure_pipeline_latency); + m_entry_element = std::move(other.m_entry_element); + m_pipeline = std::move(other.m_pipeline); + m_is_activated = std::exchange(other.m_is_activated, false); + m_is_aborted = std::exchange(other.m_is_aborted, false); + m_pipeline_status = std::move(other.m_pipeline_status); + m_core_op_activated_event = std::move(other.m_core_op_activated_event); + m_fps_accumulators = std::move(other.m_fps_accumulators); + m_latency_accumulators = std::move(other.m_latency_accumulators); + m_queue_size_accumulators = std::move(other.m_queue_size_accumulators); + m_pipeline_latency_accumulator = std::move(other.m_pipeline_latency_accumulator); + } + return *this; } -Expected> PreInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, - const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector &dst_quant_infos, - const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_dma_able, - std::shared_ptr async_pipeline) +hailo_status BaseVStream::start_vstream() { - return PreInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_infos, name, - build_params.timeout, build_params.buffer_pool_size_internal, build_params.elem_stats_flags, build_params.vstream_stats_flags, - build_params.shutdown_event, build_params.pipeline_status, pipeline_direction, is_dma_able, async_pipeline); -} + auto status = resume(); + CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status, + "Failed to resume stream in {}", name()); -PreInferElement::PreInferElement(std::unique_ptr &&transform_context, BufferPoolPtr buffer_pool, - const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, PipelineDirection pipeline_direction, - std::shared_ptr async_pipeline) : - FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline), - m_transform_context(std::move(transform_context)) -{} + LOGGER__DEBUG("Activating {}...", name()); + status = m_entry_element->activate(); + CHECK_SUCCESS(status); -Expected PreInferElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/) -{ - LOGGER__ERROR("PreInferElement does not support run_pull operation"); - return make_unexpected(HAILO_INVALID_OPERATION); + m_is_activated = true; + return HAILO_SUCCESS; } -PipelinePad &PreInferElement::next_pad() +hailo_status BaseVStream::abort() { - // Note: The next elem to be run is downstream from this elem (i.e. buffers are pushed) - return *m_sources[0].next(); -} + auto status = m_entry_element->abort(); + CHECK_SUCCESS(status); + m_is_aborted = true; -std::string PreInferElement::description() const -{ - std::stringstream element_description; - element_description << "(" << this->name() << " | " << m_transform_context->description() << ")"; - return element_description.str(); + return HAILO_SUCCESS; } -Expected PreInferElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +hailo_status BaseVStream::resume() { - if (PipelineBuffer::Type::FLUSH == input.get_type()) { - return std::move(input); - } + auto status = m_entry_element->clear_abort(); + CHECK_SUCCESS(status); + m_is_aborted = false; - auto transformed_buffer = m_pool->get_available_buffer(std::move(optional), m_timeout); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == transformed_buffer.status()) { - return make_unexpected(transformed_buffer.status()); - } - - if (!transformed_buffer) { - input.get_exec_done_cb()(transformed_buffer.status()); + if (m_is_activated) { + status = m_entry_element->activate(); + CHECK_SUCCESS(status); } - CHECK_AS_EXPECTED(HAILO_TIMEOUT != transformed_buffer.status(), HAILO_TIMEOUT, - "{} (H2D) failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_timeout.count()); - CHECK_EXPECTED(transformed_buffer); - - auto dst = transformed_buffer->as_view(); - m_duration_collector.start_measurement(); - const auto status = m_transform_context->transform(input.as_view(), dst); - m_duration_collector.complete_measurement(); - - auto exec_done_cb = input.get_exec_done_cb(); - exec_done_cb(status); - transformed_buffer->set_action_status(status); - - auto metadata = input.get_metadata(); - - CHECK_SUCCESS_AS_EXPECTED(status); + return HAILO_SUCCESS; +} - // Note: The latency to be measured starts as the input buffer is sent to the InputVStream (via write()) - transformed_buffer->set_metadata(std::move(metadata)); +hailo_status BaseVStream::stop_vstream() +{ + hailo_status status = HAILO_SUCCESS; + if (m_is_activated) { + m_is_activated = false; + status = m_entry_element->deactivate(); + if (HAILO_SUCCESS != status) { + LOGGER__WARNING("Failed deactivate of vstream {} status {}", name(), status); + } - return transformed_buffer.release(); + // If VStream was aborted, do not clear low-level stream abortion, + // otherwise flush would be called on low-level stream d-tor when there is no receiver. + auto should_clear_abort = (!m_is_aborted); + status = m_entry_element->post_deactivate(should_clear_abort); + if (HAILO_SUCCESS != status) { + LOGGER__WARNING("Failed post deactivate of vstream {} status {}", name(), status); + } + } + return status; } -Expected> ConvertNmsToDetectionsElement::create( - const hailo_nms_info_t &nms_info, const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, - std::shared_ptr> pipeline_status, std::chrono::milliseconds timeout, - hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, size_t buffer_pool_size, - PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr async_pipeline) +hailo_status BaseVStream::stop_and_clear() { - // The actual data will be in the metadata - auto frame_size = 0; - auto buffer_pool_expected = BufferPool::create(frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element); - CHECK_EXPECTED(buffer_pool_expected, "Failed creating BufferPool for {}", name); - auto buffer_pool = buffer_pool_expected.release(); - - auto duration_collector = DurationCollector::create(elem_flags); - CHECK_EXPECTED(duration_collector); + auto status = HAILO_SUCCESS; + if (nullptr != m_core_op_activated_event) { + status = m_core_op_activated_event->wait(std::chrono::milliseconds(0)); + CHECK(HAILO_TIMEOUT == status, HAILO_INVALID_OPERATION, + "Trying to clear {} vstream before its network group is deactivated", name()); + } - auto convert_nms_to_detections_elem_ptr = make_shared_nothrow(std::move(nms_info), - name, duration_collector.release(), std::move(pipeline_status), buffer_pool, timeout, pipeline_direction, async_pipeline); - CHECK_AS_EXPECTED(nullptr != convert_nms_to_detections_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + status = stop_vstream(); + CHECK_SUCCESS(status); - LOGGER__INFO("Created {}", convert_nms_to_detections_elem_ptr->name()); + status = m_entry_element->clear(); + CHECK_SUCCESS(status, "Failed clearing vstream {}", name()); - return convert_nms_to_detections_elem_ptr; -} + const auto curr_pipeline_status = m_pipeline_status->load(); + if (HAILO_SUCCESS != curr_pipeline_status) { + LOGGER__TRACE("Overwritting current pipeline status {}", curr_pipeline_status); + m_pipeline_status->store(HAILO_SUCCESS); + } -Expected> ConvertNmsToDetectionsElement::create( - const hailo_nms_info_t &nms_info, const std::string &name, const ElementBuildParams &build_params, - PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr async_pipeline) -{ - return ConvertNmsToDetectionsElement::create(nms_info, name, build_params.elem_stats_flags, build_params.pipeline_status, - build_params.timeout, build_params.vstream_stats_flags, build_params.shutdown_event, build_params.buffer_pool_size_edges, - pipeline_direction, is_last_copy_element, async_pipeline); + return status; } -ConvertNmsToDetectionsElement::ConvertNmsToDetectionsElement(const hailo_nms_info_t &&nms_info, const std::string &name, - DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, BufferPoolPtr buffer_pool, - std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : - FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline), - m_nms_info(std::move(nms_info)) -{} - -hailo_status ConvertNmsToDetectionsElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink) +hailo_status BaseVStream::before_fork() { - CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION, - "ConvertNmsToDetectionsElement {} does not support run_push operation", name()); - return FilterElement::run_push(std::move(buffer), sink); + return HAILO_SUCCESS; } -PipelinePad &ConvertNmsToDetectionsElement::next_pad() +hailo_status BaseVStream::after_fork_in_parent() { - if (PipelineDirection::PUSH == m_pipeline_direction){ - return *m_sources[0].next(); - } - return *m_sinks[0].prev(); + return HAILO_SUCCESS; } -std::string ConvertNmsToDetectionsElement::description() const +hailo_status BaseVStream::after_fork_in_child() { - std::stringstream element_description; - element_description << "(" << this->name() << ")"; - return element_description.str(); + return HAILO_SUCCESS; } -Expected ConvertNmsToDetectionsElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +size_t BaseVStream::get_frame_size() const { - auto buffer = m_pool->get_available_buffer(std::move(optional), m_timeout); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { - return make_unexpected(buffer.status()); - } - - if (!buffer) { - input.get_exec_done_cb()(buffer.status()); - } - CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status()); - - buffer->set_metadata(input.get_metadata()); - - m_duration_collector.start_measurement(); - - auto detections_pair = net_flow::NmsPostProcessOp::transform__d2h_NMS_DETECTIONS(input.data(), m_nms_info); - auto detections_pipeline_data = make_shared_nothrow - (std::move(detections_pair.first),std::move(detections_pair.second)); - buffer->set_additional_data(detections_pipeline_data); - - m_duration_collector.complete_measurement(); - - auto exec_done_cb = input.get_exec_done_cb(); - exec_done_cb(HAILO_SUCCESS); - - return buffer.release(); + return HailoRTCommon::get_frame_size(m_vstream_info, m_vstream_params.user_buffer_format); } -Expected> FillNmsFormatElement::create(const hailo_nms_info_t nms_info, - const hailo_format_t &dst_format, const net_flow::NmsPostProcessConfig nms_config, const std::string &name, - hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, - std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, - size_t buffer_pool_size, PipelineDirection pipeline_direction, bool is_last_copy_element, - std::shared_ptr async_pipeline) +const hailo_vstream_info_t &BaseVStream::get_info() const { - auto frame_size = HailoRTCommon::get_nms_host_frame_size(nms_info, dst_format); - auto buffer_pool_expected = BufferPool::create(frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element); - CHECK_EXPECTED(buffer_pool_expected, "Failed creating BufferPool for {}", name); - auto buffer_pool = buffer_pool_expected.release(); - - auto duration_collector = DurationCollector::create(elem_flags); - CHECK_EXPECTED(duration_collector); - - auto fill_nms_format_element = make_shared_nothrow(std::move(nms_config), - name, duration_collector.release(), std::move(pipeline_status), buffer_pool, timeout, pipeline_direction, async_pipeline); - CHECK_AS_EXPECTED(nullptr != fill_nms_format_element, HAILO_OUT_OF_HOST_MEMORY); - - LOGGER__INFO("Created {}", fill_nms_format_element->name()); - - return fill_nms_format_element; + return m_vstream_info; } -Expected> FillNmsFormatElement::create(const hailo_nms_info_t nms_info, - const hailo_format_t &dst_format, const net_flow::NmsPostProcessConfig nms_config, const std::string &name, - const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element, - std::shared_ptr async_pipeline) +const std::vector &BaseVStream::get_quant_infos() const { - return FillNmsFormatElement::create(nms_info, dst_format, nms_config, name, build_params.elem_stats_flags, - build_params.pipeline_status, build_params.timeout, build_params.vstream_stats_flags, - build_params.shutdown_event, build_params.buffer_pool_size_edges, pipeline_direction, is_last_copy_element, - async_pipeline); + return m_quant_infos; } -FillNmsFormatElement::FillNmsFormatElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name, - DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, - BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, - std::shared_ptr async_pipeline) : - FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline), - m_nms_config(std::move(nms_config)) -{} - -hailo_status FillNmsFormatElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink) +const hailo_format_t &BaseVStream::get_user_buffer_format() const { - CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION, - "FillNmsFormatElement {} does not support run_push operation", name()); - return FilterElement::run_push(std::move(buffer), sink); + return m_vstream_params.user_buffer_format; } -PipelinePad &FillNmsFormatElement::next_pad() +std::string BaseVStream::name() const { - if (PipelineDirection::PUSH == m_pipeline_direction){ - return *m_sources[0].next(); - } - return *m_sinks[0].prev(); + return std::string(m_vstream_info.name); } -std::string FillNmsFormatElement::description() const +std::string BaseVStream::network_name() const { - std::stringstream element_description; - element_description << "(" << this->name() << ")"; - return element_description.str(); + return std::string(m_vstream_info.network_name); } -Expected FillNmsFormatElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +const std::map &BaseVStream::get_fps_accumulators() const { - auto buffer_expected = m_pool->get_available_buffer(std::move(optional), m_timeout); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer_expected.status()) { - return make_unexpected(buffer_expected.status()); - } - - if (!buffer_expected) { - input.get_exec_done_cb()(buffer_expected.status()); - } - CHECK_EXPECTED(buffer_expected, "{} (D2H) failed with status={}", name(), buffer_expected.status()); - auto buffer = buffer_expected.release(); - - buffer.set_metadata(input.get_metadata()); - - m_duration_collector.start_measurement(); - - auto detections = input.get_metadata().get_additional_data(); - auto dst = buffer.as_view(); - net_flow::NmsPostProcessOp::fill_nms_format_buffer(dst, detections->m_detections, detections->m_detections_classes_count, - m_nms_config); - - m_duration_collector.complete_measurement(); - - auto exec_done_cb = input.get_exec_done_cb(); - exec_done_cb(HAILO_SUCCESS); - - return buffer; + return m_fps_accumulators; } -Expected> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape, - const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, - const std::vector &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name, - hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, - std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, size_t buffer_pool_size, - PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr async_pipeline) +const std::map &BaseVStream::get_latency_accumulators() const { - auto frame_size = (dst_format.order == HAILO_FORMAT_ORDER_HAILO_NMS) ? HailoRTCommon::get_nms_host_frame_size(nms_info, dst_format) : HailoRTCommon::get_frame_size(dst_image_shape, dst_format); - auto buffer_pool_expected = BufferPool::create(frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element); - CHECK_EXPECTED(buffer_pool_expected, "Failed creating BufferPool for {}", name); - - auto transform_context = OutputTransformContext::create(src_image_shape, src_format, dst_image_shape, dst_format, - dst_quant_infos, nms_info); - CHECK_EXPECTED(transform_context, "Failed Creating OutputTransformContext"); - - auto duration_collector = DurationCollector::create(elem_flags); - CHECK_EXPECTED(duration_collector); - - auto post_infer_elem_ptr = make_shared_nothrow(transform_context.release(), name, - duration_collector.release(), std::move(pipeline_status), buffer_pool_expected.release(), timeout, pipeline_direction, async_pipeline); - CHECK_AS_EXPECTED(nullptr != post_infer_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); - - LOGGER__INFO("Created {}", post_infer_elem_ptr->name()); - - return post_infer_elem_ptr; + return m_latency_accumulators; } -Expected> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, - const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector &dst_quant_infos, const hailo_nms_info_t &nms_info, - const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr> pipeline_status, - EventPtr shutdown_event, PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr async_pipeline) +const std::map> &BaseVStream::get_queue_size_accumulators() const { - return PostInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_infos, nms_info, - name, vstream_params.pipeline_elements_stats_flags, pipeline_status, std::chrono::milliseconds(vstream_params.timeout_ms), - vstream_params.vstream_stats_flags, shutdown_event, vstream_params.queue_size, pipeline_direction, is_last_copy_element, async_pipeline); + return m_queue_size_accumulators; } -Expected> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape, - const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, - const std::vector &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name, - const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element, - std::shared_ptr async_pipeline) +AccumulatorPtr BaseVStream::get_pipeline_latency_accumulator() const { - return PostInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, - dst_quant_infos, nms_info, name, build_params.elem_stats_flags, build_params.pipeline_status, - build_params.timeout, build_params.vstream_stats_flags, build_params.shutdown_event, build_params.buffer_pool_size_edges, - pipeline_direction, is_last_copy_element, async_pipeline); + return m_pipeline_latency_accumulator; } -PostInferElement::PostInferElement(std::unique_ptr &&transform_context, const std::string &name, - DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, - BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : - FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline), - m_transform_context(std::move(transform_context)) -{} -Expected PostInferElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source) +const std::vector> &BaseVStream::get_pipeline() const { - CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION, - "PostInferElement {} does not support run_pull operation", name() - ); - return FilterElement::run_pull(std::move(optional), source); + return m_pipeline; } -hailo_status PostInferElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink) +Expected InputVStream::create(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, + const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, + std::shared_ptr pipeline_exit, std::vector> &&pipeline, + std::shared_ptr> &&pipeline_status, EventPtr core_op_activated_event, + AccumulatorPtr pipeline_latency_accumulator) { - CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION, - "PostInferElement {} does not support run_push operation", name()); - return FilterElement::run_push(std::move(buffer), sink); -} + auto vstream_internal = InputVStreamInternal::create(vstream_info, quant_infos, vstream_params, pipeline_entry, pipeline_exit, + std::move(pipeline), std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator); + CHECK_EXPECTED(vstream_internal); -PipelinePad &PostInferElement::next_pad() -{ - if (PipelineDirection::PUSH == m_pipeline_direction){ - return *m_sources[0].next(); - } - return *m_sinks[0].prev(); + InputVStream vstream(vstream_internal.release()); + return vstream; } -std::string PostInferElement::description() const +hailo_status InputVStream::write(const MemoryView &buffer) { - std::stringstream element_description; - element_description << "(" << this->name() << " | " << m_transform_context->description() << ")"; - return element_description.str(); + return m_vstream->write(std::move(buffer)); } -Expected PostInferElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +hailo_status InputVStream::write(const hailo_pix_buffer_t &buffer) { - auto buffer = m_pool->get_available_buffer(std::move(optional), m_timeout); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { - return make_unexpected(buffer.status()); - } + CHECK(HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR == buffer.memory_type, HAILO_NOT_SUPPORTED, "Memory type of pix buffer must be of type USERPTR!"); - if (!buffer) { - input.get_exec_done_cb()(buffer.status()); + // If only one plane is passed, address it as memview + if (1 == buffer.number_of_planes) { + return write(MemoryView(buffer.planes[0].user_ptr, buffer.planes[0].bytes_used)); } - CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status()); - // Note: The latency to be measured starts as the buffer is read from the HW (it's 'input' in this case) - buffer->set_metadata(input.get_metadata()); - - auto dst = buffer->as_view(); - m_duration_collector.start_measurement(); - const auto status = m_transform_context->transform(input.as_view(), dst); - m_duration_collector.complete_measurement(); - - auto exec_done_cb = input.get_exec_done_cb(); - exec_done_cb(status); - buffer->set_action_status(status); + // If model is multi planar, pass the pix buffer + if (m_vstream->is_multi_planar()){ + return m_vstream->write(buffer); + } - CHECK_SUCCESS_AS_EXPECTED(status); + // Other cases - allocate a contiguous buffer to hold all plains + bool is_contiguous = true; + uint32_t planes_total_size = 0; + /* assuming contiguous memory. If not, this will be overriden by the coming loop */ + void *data_ptr = buffer.planes[0].user_ptr; - return buffer.release(); -} + /* calculate total data size by summing the planes' sizes and check if the planes are contiguous */ + for (uint32_t plane_index = 0; plane_index < buffer.number_of_planes; plane_index++){ + auto &plane = buffer.planes[plane_index]; + planes_total_size += plane.bytes_used; -static hailo_nms_info_t fuse_nms_info(const std::vector &nms_infos) -{ - hailo_nms_info_t fused_info = nms_infos[0]; - fused_info.is_defused = false; - fused_info.number_of_classes = 0; - for (const auto &nms_info : nms_infos) { - fused_info.number_of_classes += nms_info.number_of_classes; - assert(nms_infos[0].max_bboxes_per_class == nms_info.max_bboxes_per_class); - assert(nms_infos[0].bbox_size == nms_info.bbox_size); - assert(nms_infos[0].chunks_per_frame == nms_info.chunks_per_frame); - assert(nms_infos[0].burst_size == nms_info.burst_size); - assert(nms_infos[0].burst_type == nms_info.burst_type); + if (is_contiguous && (plane_index + 1 < buffer.number_of_planes)){ + auto &next_plane = buffer.planes[plane_index+1]; + if ((static_cast(plane.user_ptr) + plane.bytes_used) != next_plane.user_ptr){ + is_contiguous = false; + } + } } - return fused_info; -} -Expected> RemoveOverlappingBboxesElement::create( - const net_flow::NmsPostProcessConfig nms_config, const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, - std::shared_ptr> pipeline_status, std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, - EventPtr shutdown_event, size_t buffer_pool_size, PipelineDirection pipeline_direction, bool is_last_copy_element, - std::shared_ptr async_pipeline) -{ - // The actual data will be in the metadata - auto frame_size = 0; - auto buffer_pool_expected = BufferPool::create(frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element); - CHECK_EXPECTED(buffer_pool_expected, "Failed creating BufferPool for {}", name); - auto buffer_pool = buffer_pool_expected.release(); - - auto duration_collector = DurationCollector::create(elem_flags); - CHECK_EXPECTED(duration_collector); + BufferPtr contiguous_buffer = nullptr; + if (! is_contiguous) { + /* copy to a contiguous buffer, and then pass it */ + auto expected_buffer = Buffer::create_shared(planes_total_size); + CHECK_EXPECTED_AS_STATUS(expected_buffer); + contiguous_buffer = expected_buffer.release(); + uint32_t copied_bytes = 0; - auto convert_nms_removed_overlapping_elem_ptr = make_shared_nothrow(std::move(nms_config), - name, duration_collector.release(), std::move(pipeline_status), buffer_pool, timeout, pipeline_direction, async_pipeline); - CHECK_AS_EXPECTED(nullptr != convert_nms_removed_overlapping_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + for (uint32_t plane_index = 0; plane_index < buffer.number_of_planes; plane_index++){ + auto &plane = buffer.planes[plane_index]; + std::memcpy(contiguous_buffer->data() + copied_bytes, plane.user_ptr, plane.bytes_used); + copied_bytes += plane.bytes_used; + } - LOGGER__INFO("Created {}", convert_nms_removed_overlapping_elem_ptr->name()); + data_ptr = contiguous_buffer->data(); + } - return convert_nms_removed_overlapping_elem_ptr; + return m_vstream->write(std::move(MemoryView(data_ptr, planes_total_size))); } -Expected> RemoveOverlappingBboxesElement::create(const net_flow::NmsPostProcessConfig nms_config, - const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element, - std::shared_ptr async_pipeline) +hailo_status InputVStream::flush() { - return RemoveOverlappingBboxesElement::create(nms_config, name, - build_params.elem_stats_flags, build_params.pipeline_status, build_params.timeout, build_params.vstream_stats_flags, - build_params.shutdown_event, build_params.buffer_pool_size_edges, pipeline_direction, is_last_copy_element, async_pipeline); + return m_vstream->flush(); } -RemoveOverlappingBboxesElement::RemoveOverlappingBboxesElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name, - DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, - BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : - FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline), - m_nms_config(std::move(nms_config)) -{} - -hailo_status RemoveOverlappingBboxesElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink) -{ - CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION, - "RemoveOverlappingBboxesElement {} does not support run_push operation", name()); - return FilterElement::run_push(std::move(buffer), sink); -} - -PipelinePad &RemoveOverlappingBboxesElement::next_pad() +hailo_status InputVStream::clear(std::vector &vstreams) { - if (PipelineDirection::PUSH == m_pipeline_direction){ - return *m_sources[0].next(); + for (auto &vstream : vstreams) { + auto status = vstream.stop_and_clear(); + CHECK_SUCCESS(status); + } + for (auto &vstream : vstreams) { + auto status = vstream.start_vstream(); + CHECK_SUCCESS(status); } - return *m_sinks[0].prev(); -} -std::string RemoveOverlappingBboxesElement::description() const -{ - std::stringstream element_description; - element_description << "(" << this->name() << ")"; - return element_description.str(); + return HAILO_SUCCESS; } -Expected RemoveOverlappingBboxesElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +hailo_status InputVStream::clear(std::vector> &vstreams) { - auto buffer = m_pool->get_available_buffer(std::move(optional), m_timeout); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { - return make_unexpected(buffer.status()); + for (auto &vstream : vstreams) { + auto status = vstream.get().stop_and_clear(); + CHECK_SUCCESS(status); } - - if (!buffer) { - input.get_exec_done_cb()(buffer.status()); + for (auto &vstream : vstreams) { + auto status = vstream.get().start_vstream(); + CHECK_SUCCESS(status); } - CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status()); - - buffer->set_metadata(input.get_metadata()); - m_duration_collector.start_measurement(); - auto detections_pipeline_data = input.get_metadata().get_additional_data(); - - net_flow::NmsPostProcessOp::remove_overlapping_boxes(detections_pipeline_data->m_detections, - detections_pipeline_data->m_detections_classes_count, m_nms_config.nms_iou_th); - m_duration_collector.complete_measurement(); - - auto exec_done_cb = input.get_exec_done_cb(); - exec_done_cb(HAILO_SUCCESS); - - return buffer.release(); + return HAILO_SUCCESS; } -Expected> NmsPostProcessMuxElement::create(std::shared_ptr nms_op, - const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, - hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, - std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction, bool is_last_copy_element, - std::shared_ptr async_pipeline) +hailo_status InputVStream::abort() { - assert(nms_op->outputs_metadata().size() == 1); - auto vstream_info = nms_op->metadata()->get_output_vstream_info(); - CHECK_EXPECTED(vstream_info); - - auto buffer_size = HailoRTCommon::get_nms_host_frame_size(nms_op->metadata()->get_output_vstream_info()->nms_shape, - nms_op->outputs_metadata().begin()->second.format); - - auto buffer_pool = BufferPool::create(buffer_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element); - CHECK_EXPECTED(buffer_pool, "Failed creating BufferPool"); - - auto duration_collector = DurationCollector::create(elem_flags); - CHECK_EXPECTED(duration_collector); - - auto nms_elem_ptr = make_shared_nothrow(nms_op, buffer_pool.release(), - name, timeout, duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline); - CHECK_AS_EXPECTED(nullptr != nms_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); - - LOGGER__INFO("Created {}", nms_elem_ptr->name()); - return nms_elem_ptr; + return m_vstream->abort(); } -Expected> NmsPostProcessMuxElement::create(std::shared_ptr nms_op, - const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element, - std::shared_ptr async_pipeline) +hailo_status InputVStream::resume() { - return NmsPostProcessMuxElement::create(nms_op, name, build_params.timeout, - build_params.buffer_pool_size_edges, build_params.elem_stats_flags, build_params.vstream_stats_flags, - build_params.shutdown_event, build_params.pipeline_status, pipeline_direction, is_last_copy_element, async_pipeline); + return m_vstream->resume(); } -Expected> NmsPostProcessMuxElement::create(std::shared_ptr nms_op, - const std::string &name, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event, - std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction, bool is_last_copy_element, - std::shared_ptr async_pipeline) +size_t InputVStream::get_frame_size() const { - return NmsPostProcessMuxElement::create(nms_op, name, std::chrono::milliseconds(vstream_params.timeout_ms), - vstream_params.queue_size, vstream_params.pipeline_elements_stats_flags, vstream_params.vstream_stats_flags, shutdown_event, - pipeline_status, pipeline_direction, is_last_copy_element, async_pipeline); + return m_vstream->get_frame_size(); } -NmsPostProcessMuxElement::NmsPostProcessMuxElement(std::shared_ptr nms_op, BufferPoolPtr &&pool, - const std::string &name, std::chrono::milliseconds timeout, - DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : - BaseMuxElement(nms_op->inputs_metadata().size(), name, timeout, std::move(duration_collector), std::move(pipeline_status), - std::move(pool), pipeline_direction, async_pipeline), - m_nms_op(nms_op) -{} - -std::vector NmsPostProcessMuxElement::get_queue_size_accumulators() +const hailo_vstream_info_t &InputVStream::get_info() const { - if (nullptr == m_pool->get_queue_size_accumulator()) { - return std::vector(); - } - return {m_pool->get_queue_size_accumulator()}; + return m_vstream->get_info(); } -Expected NmsPostProcessMuxElement::action(std::vector &&input_buffers, PipelineBuffer &&optional) +const std::vector &InputVStream::get_quant_infos() const { - std::map inputs; - std::map outputs; - for (size_t i = 0; i < input_buffers.size(); ++i) { - inputs.insert({m_sinks_names[i], input_buffers[i].as_view()}); - } - auto acquired_buffer = m_pool->get_available_buffer(std::move(optional), m_timeout); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == acquired_buffer.status()) { - return make_unexpected(acquired_buffer.status()); - } - - if (!acquired_buffer) { - for (auto &input : input_buffers) { - auto exec_done_cb = input.get_exec_done_cb(); - exec_done_cb(acquired_buffer.status()); - } - } - CHECK_EXPECTED(acquired_buffer); - outputs.insert({"", acquired_buffer->as_view()}); // TODO: fill with correct name - m_duration_collector.start_measurement(); - - auto post_process_result = m_nms_op->execute(inputs, outputs); - m_duration_collector.complete_measurement(); - - for (auto &input : input_buffers) { - auto exec_done_cb = input.get_exec_done_cb(); - exec_done_cb(post_process_result); - } - acquired_buffer->set_action_status(post_process_result); - - CHECK_SUCCESS_AS_EXPECTED(post_process_result); - return acquired_buffer; + return m_vstream->get_quant_infos(); } -Expected> NmsMuxElement::create(const std::vector &nms_infos, - const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, - hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, - std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction, bool is_last_copy_element, - std::shared_ptr async_pipeline) +const hailo_format_t &InputVStream::get_user_buffer_format() const { - const auto &fused_info = fuse_nms_info(nms_infos); - auto buffer_pool = BufferPool::create(HailoRTCommon::get_nms_hw_frame_size(fused_info), - buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element); - CHECK_EXPECTED(buffer_pool, "Failed creating BufferPool"); - - auto duration_collector = DurationCollector::create(elem_flags); - CHECK_EXPECTED(duration_collector); - - auto nms_elem_ptr = make_shared_nothrow(nms_infos, fused_info, buffer_pool.release(), - name, timeout, duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline); - CHECK_AS_EXPECTED(nullptr != nms_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); - - LOGGER__INFO("Created {}", nms_elem_ptr->name()); - - return nms_elem_ptr; + return m_vstream->get_user_buffer_format(); } -Expected> NmsMuxElement::create(const std::vector &nms_infos, const std::string &name, - const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr async_pipeline) +std::string InputVStream::name() const { - return NmsMuxElement::create(nms_infos, name, std::chrono::milliseconds(vstream_params.timeout_ms), vstream_params.queue_size, - vstream_params.pipeline_elements_stats_flags, vstream_params.vstream_stats_flags, shutdown_event, pipeline_status, pipeline_direction, - is_last_copy_element, async_pipeline); + return m_vstream->name(); } -Expected> NmsMuxElement::create(const std::vector &nms_infos, - const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element, - std::shared_ptr async_pipeline) +std::string InputVStream::network_name() const { - return NmsMuxElement::create(nms_infos, name, build_params.timeout, build_params.buffer_pool_size_edges, build_params.elem_stats_flags, - build_params.vstream_stats_flags, build_params.shutdown_event, build_params.pipeline_status, pipeline_direction, is_last_copy_element, - async_pipeline); + return m_vstream->network_name(); } -NmsMuxElement::NmsMuxElement(const std::vector &nms_infos, const hailo_nms_info_t &fused_nms_info, BufferPoolPtr &&pool, - const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, PipelineDirection pipeline_direction, - std::shared_ptr async_pipeline) : - BaseMuxElement(nms_infos.size(), name, timeout, std::move(duration_collector), std::move(pipeline_status), std::move(pool), pipeline_direction, async_pipeline), - m_nms_infos(nms_infos), - m_fused_nms_info(fused_nms_info) -{} - -const hailo_nms_info_t &NmsMuxElement::get_fused_nms_info() const +const std::map &InputVStream::get_fps_accumulators() const { - return m_fused_nms_info; + return m_vstream->get_fps_accumulators(); } -std::vector NmsMuxElement::get_queue_size_accumulators() +const std::map &InputVStream::get_latency_accumulators() const { - if (nullptr == m_pool->get_queue_size_accumulator()) { - return std::vector(); - } - return {m_pool->get_queue_size_accumulator()}; + return m_vstream->get_latency_accumulators(); } -Expected NmsMuxElement::action(std::vector &&inputs, PipelineBuffer &&optional) +const std::map> &InputVStream::get_queue_size_accumulators() const { - std::vector input_views; + return m_vstream->get_queue_size_accumulators(); +} - input_views.reserve(inputs.size()); - for (auto &input_buf : inputs) { - input_views.push_back(input_buf.as_view()); - } +AccumulatorPtr InputVStream::get_pipeline_latency_accumulator() const +{ + return m_vstream->get_pipeline_latency_accumulator(); +} - auto acquired_buffer = m_pool->get_available_buffer(std::move(optional), m_timeout); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == acquired_buffer.status()) { - return make_unexpected(acquired_buffer.status()); - } +const std::vector> &InputVStream::get_pipeline() const +{ + return m_vstream->get_pipeline(); +} - if (!acquired_buffer) { - for (auto &input : inputs) { - auto exec_done_cb = input.get_exec_done_cb(); - exec_done_cb(acquired_buffer.status()); - } - } - CHECK_AS_EXPECTED(HAILO_TIMEOUT != acquired_buffer.status(), HAILO_TIMEOUT, - "{} failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_timeout.count()); - CHECK_EXPECTED(acquired_buffer); - - m_duration_collector.start_measurement(); - const auto status = fuse_buffers(input_views, m_nms_infos, acquired_buffer.value().as_view()); - m_duration_collector.complete_measurement(); - - for (auto &input : inputs) { - auto exec_done_cb = input.get_exec_done_cb(); - exec_done_cb(status); - } - acquired_buffer->set_action_status(status); +hailo_status InputVStream::start_vstream() +{ + return m_vstream->start_vstream(); +} - CHECK_SUCCESS_AS_EXPECTED(status); +hailo_status InputVStream::stop_vstream() +{ + return m_vstream->stop_vstream(); +} - return acquired_buffer.release(); +hailo_status InputVStream::stop_and_clear() +{ + return m_vstream->stop_and_clear(); } -Expected> TransformDemuxElement::create(std::shared_ptr demuxer, - const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags, - hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr async_pipeline) +std::string InputVStream::get_pipeline_description() const { - std::vector pools; - pools.reserve(demuxer->get_edges_stream_info().size()); - for (const auto& mux_edge : demuxer->get_edges_stream_info()) { - auto buffer_pool = BufferPool::create(mux_edge.hw_frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element); - CHECK_EXPECTED(buffer_pool, "Failed creating BufferPool"); - pools.push_back(buffer_pool.release()); - } + return m_vstream->get_pipeline_description(); +} - auto duration_collector = DurationCollector::create(elem_flags); - CHECK_EXPECTED(duration_collector); +bool InputVStream::is_aborted() +{ + return m_vstream->is_aborted(); +} +bool InputVStream::is_multi_planar() +{ + return m_vstream->is_multi_planar(); +} - auto demux_elem_ptr = make_shared_nothrow(demuxer, std::move(pools), name, timeout, - duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline); - CHECK_AS_EXPECTED(nullptr != demux_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); - return demux_elem_ptr; +hailo_status InputVStream::before_fork() +{ + return m_vstream->before_fork(); } -Expected> TransformDemuxElement::create(std::shared_ptr demuxer, - const std::string &name, const ElementBuildParams &build_params, - PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr async_pipeline) +hailo_status InputVStream::after_fork_in_parent() { - return TransformDemuxElement::create(demuxer, name, build_params.timeout, build_params.buffer_pool_size_edges, build_params.elem_stats_flags, - build_params.vstream_stats_flags, build_params.shutdown_event, build_params.pipeline_status, pipeline_direction, is_last_copy_element, async_pipeline); + return m_vstream->after_fork_in_parent(); } -TransformDemuxElement::TransformDemuxElement(std::shared_ptr demuxer, std::vector &&pools, - const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, PipelineDirection pipeline_direction, - std::shared_ptr async_pipeline) : - BaseDemuxElement(demuxer->get_edges_stream_info().size(), name, timeout, std::move(duration_collector), - std::move(pipeline_status), std::move(pools), pipeline_direction, async_pipeline), - m_demuxer(demuxer) -{} - -std::vector TransformDemuxElement::get_queue_size_accumulators() +hailo_status InputVStream::after_fork_in_child() { - std::vector result; - for (const auto &pool : m_pools) { - if (nullptr != pool->get_queue_size_accumulator()) { - result.emplace_back(pool->get_queue_size_accumulator()); - } - } - return result; + return m_vstream->after_fork_in_child(); } -Expected> TransformDemuxElement::action(PipelineBuffer &&input) +InputVStream::InputVStream(std::shared_ptr vstream) : m_vstream(std::move(vstream)) {} + +Expected OutputVStream::create( + const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, + std::shared_ptr pipeline_entry, std::vector> &&pipeline, + std::shared_ptr> &&pipeline_status, + EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator) { - std::vector outputs; - std::vector raw_buffers; + auto vstream_internal = OutputVStreamInternal::create(vstream_info, quant_infos, vstream_params, pipeline_entry, + std::move(pipeline), std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator); + CHECK_EXPECTED(vstream_internal); - auto mux_edges = m_demuxer->get_edges_stream_info(); - outputs.reserve(mux_edges.size()); - raw_buffers.reserve(mux_edges.size()); + OutputVStream vstream(vstream_internal.release()); + return vstream; +} - for (uint32_t i = 0; i < mux_edges.size(); i++) { - auto acquired_buffer = m_pools[i]->acquire_buffer(m_timeout); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == acquired_buffer.status()) { - return make_unexpected(acquired_buffer.status()); - } +hailo_status OutputVStream::read(MemoryView buffer) +{ + return m_vstream->read(std::move(buffer)); +} - if (!acquired_buffer) { - input.get_exec_done_cb()(acquired_buffer.status()); - } - CHECK_EXPECTED(acquired_buffer, "Failed to acquire buffer"); - outputs.emplace_back(acquired_buffer.release()); - raw_buffers.push_back(outputs.back().as_view()); +hailo_status OutputVStream::clear(std::vector &vstreams) +{ + for (auto &vstream : vstreams) { + auto status = vstream.stop_and_clear(); + CHECK_SUCCESS(status); } - - m_duration_collector.start_measurement(); - const auto status = m_demuxer->transform_demux(input.as_view(), raw_buffers); - m_duration_collector.complete_measurement(); - - auto exec_done_cb = input.get_exec_done_cb(); - exec_done_cb(status); - for (auto &output : outputs) { - output.set_action_status(status); + for (auto &vstream : vstreams) { + auto status = vstream.start_vstream(); + CHECK_SUCCESS(status); } - CHECK_SUCCESS_AS_EXPECTED(status); - - return outputs; + return HAILO_SUCCESS; } -PixBufferElement::PixBufferElement(const std::string &name, std::chrono::milliseconds timeout, - DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, - hailo_format_order_t order, std::shared_ptr async_pipeline) : - BaseDemuxElement(((order == HAILO_FORMAT_ORDER_I420) ? NUMBER_OF_PLANES_I420 : NUMBER_OF_PLANES_NV12_NV21), - name, timeout, std::move(duration_collector), std::move(pipeline_status), - {}, PipelineDirection::PUSH, async_pipeline), - m_order(order) -{} - -Expected PixBufferElement::can_push_buffer_upstream(const std::string &pad_name) +hailo_status OutputVStream::abort() { - return m_sinks[0].prev()->element().can_push_buffer_upstream(pad_name); + return m_vstream->abort(); } -Expected> PixBufferElement::create(const std::string &name, - std::chrono::milliseconds timeout, DurationCollector &&duration_collector, - std::shared_ptr> pipeline_status, hailo_format_order_t order, - std::shared_ptr async_pipeline) +hailo_status OutputVStream::resume() { - auto pix_buffer_splitter_elem_ptr = make_shared_nothrow(name, timeout, - std::move(duration_collector), std::move(pipeline_status), order, async_pipeline); - CHECK_AS_EXPECTED(nullptr != pix_buffer_splitter_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); - return pix_buffer_splitter_elem_ptr; + return m_vstream->resume(); } -Expected> PixBufferElement::action(PipelineBuffer &&input) +hailo_status OutputVStream::clear(std::vector> &vstreams) { - // splits the planes into buffers - m_duration_collector.start_measurement(); - std::vector outputs; - - auto input_pix_buffer_expected = input.as_hailo_pix_buffer(m_order); - - if (!input_pix_buffer_expected) { - input.get_exec_done_cb()(input_pix_buffer_expected.status()); + for (auto &vstream : vstreams) { + auto status = vstream.get().stop_and_clear(); + CHECK_SUCCESS(status); } - CHECK_EXPECTED(input_pix_buffer_expected); - auto input_pix_buffer = input_pix_buffer_expected.release(); - - if (PipelineBuffer::Type::FLUSH == input.get_type()) { - for (uint32_t i = 0; i < input_pix_buffer.number_of_planes; i++) { - outputs.emplace_back(PipelineBuffer(PipelineBuffer::Type::FLUSH)); - } - } else { - auto shared_counter = make_shared_nothrow(input_pix_buffer.number_of_planes); - if (!shared_counter) { - input.get_exec_done_cb()(HAILO_OUT_OF_HOST_MEMORY); - } - CHECK_NOT_NULL_AS_EXPECTED(shared_counter, HAILO_OUT_OF_HOST_MEMORY); - - for (uint32_t i = 0; i < input_pix_buffer.number_of_planes; i++) { - outputs.emplace_back(MemoryView(input_pix_buffer.planes[i].user_ptr, input_pix_buffer.planes[i].bytes_used), - [shared_counter, input_cb = input.get_exec_done_cb()](hailo_status status) - { - if (--*shared_counter == 0) { - input_cb(status); - } - }); - } + for (auto &vstream : vstreams) { + auto status = vstream.get().start_vstream(); + CHECK_SUCCESS(status); } - m_duration_collector.complete_measurement(); - return outputs; + return HAILO_SUCCESS; } -Expected> ArgmaxPostProcessElement::create(std::shared_ptr argmax_op, - const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, - std::shared_ptr> pipeline_status, - size_t buffer_pool_size, std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, - EventPtr shutdown_event, PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr async_pipeline) +size_t OutputVStream::get_frame_size() const { - auto out_metadata = argmax_op->outputs_metadata().begin()->second; - auto buffer_size = HailoRTCommon::get_frame_size(out_metadata.shape, out_metadata.format); - auto buffer_pool = BufferPool::create(buffer_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element); - CHECK_EXPECTED(buffer_pool, "Failed creating BufferPool for {}", name); - - auto duration_collector = DurationCollector::create(elem_flags); - CHECK_EXPECTED(duration_collector); - auto argmax_elem_ptr = make_shared_nothrow(argmax_op, - name, duration_collector.release(), std::move(pipeline_status), timeout, buffer_pool.release(), pipeline_direction, async_pipeline); - CHECK_AS_EXPECTED(nullptr != argmax_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); - LOGGER__INFO("Created {}", argmax_elem_ptr->name()); - return argmax_elem_ptr; + return m_vstream->get_frame_size(); } -Expected> ArgmaxPostProcessElement::create(std::shared_ptr argmax_op, - const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element, - std::shared_ptr async_pipeline) +const hailo_vstream_info_t &OutputVStream::get_info() const { - return ArgmaxPostProcessElement::create(argmax_op, name, - build_params.elem_stats_flags, build_params.pipeline_status, build_params.buffer_pool_size_edges, build_params.timeout, - build_params.vstream_stats_flags, build_params.shutdown_event, pipeline_direction, is_last_copy_element, async_pipeline); + return m_vstream->get_info(); } -ArgmaxPostProcessElement::ArgmaxPostProcessElement(std::shared_ptr argmax_op, const std::string &name, - DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, - std::chrono::milliseconds timeout, BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction, - std::shared_ptr async_pipeline) : - FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline), - m_argmax_op(argmax_op) -{} - -Expected ArgmaxPostProcessElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source) +const std::vector &OutputVStream::get_quant_infos() const { - CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION, - "ArgmaxPostProcessElement {} does not support run_pull operation", name()); - return FilterElement::run_pull(std::move(optional), source); + return m_vstream->get_quant_infos(); } -hailo_status ArgmaxPostProcessElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink) +const hailo_format_t &OutputVStream::get_user_buffer_format() const { - CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION, - "ArgmaxPostProcessElement {} does not support run_push operation", name()); - return FilterElement::run_push(std::move(buffer), sink); + return m_vstream->get_user_buffer_format(); } -PipelinePad &ArgmaxPostProcessElement::next_pad() +std::string OutputVStream::name() const { - if (PipelineDirection::PUSH == m_pipeline_direction){ - return *m_sources[0].next(); - } - return *m_sinks[0].prev(); + return m_vstream->name(); } -std::string ArgmaxPostProcessElement::description() const +std::string OutputVStream::network_name() const { - std::stringstream element_description; - element_description << "(" << this->name() << " | " << m_argmax_op->metadata()->get_op_description() << ")"; - return element_description.str(); + return m_vstream->network_name(); } -Expected ArgmaxPostProcessElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +const std::map &OutputVStream::get_fps_accumulators() const { - auto buffer = m_pool->get_available_buffer(std::move(optional), m_timeout); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { - return make_unexpected(buffer.status()); - } - - if (!buffer) { - input.get_exec_done_cb()(buffer.status()); - } - CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status()); + return m_vstream->get_fps_accumulators(); +} - std::map inputs; - std::map outputs; - auto &input_name = m_argmax_op->inputs_metadata().begin()->first; - auto &output_name = m_argmax_op->outputs_metadata().begin()->first; - inputs.insert({input_name, input.as_view()}); - outputs.insert({output_name, buffer->as_view()}); - m_duration_collector.start_measurement(); - auto post_process_result = m_argmax_op->execute(inputs, outputs); - m_duration_collector.complete_measurement(); +const std::map &OutputVStream::get_latency_accumulators() const +{ + return m_vstream->get_latency_accumulators(); +} - auto exec_done_cb = input.get_exec_done_cb(); - exec_done_cb(post_process_result); - buffer->set_action_status(post_process_result); +const std::map> &OutputVStream::get_queue_size_accumulators() const +{ + return m_vstream->get_queue_size_accumulators(); +} - CHECK_SUCCESS_AS_EXPECTED(post_process_result); +AccumulatorPtr OutputVStream::get_pipeline_latency_accumulator() const +{ + return m_vstream->get_pipeline_latency_accumulator(); +} - return buffer.release(); +const std::vector> &OutputVStream::get_pipeline() const +{ + return m_vstream->get_pipeline(); } -Expected> SoftmaxPostProcessElement::create(std::shared_ptr softmax_op, - const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, - std::shared_ptr> pipeline_status, size_t buffer_pool_size, std::chrono::milliseconds timeout, - hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, PipelineDirection pipeline_direction, bool is_last_copy_element, - std::shared_ptr async_pipeline) +hailo_status OutputVStream::start_vstream() { - auto out_metadata = softmax_op->outputs_metadata().begin()->second; - auto buffer_size = HailoRTCommon::get_frame_size(out_metadata.shape, out_metadata.format); - auto buffer_pool = BufferPool::create(buffer_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element); - CHECK_EXPECTED(buffer_pool, "Failed creating BufferPool for {}", name); + return m_vstream->start_vstream(); +} - auto duration_collector = DurationCollector::create(elem_flags); - CHECK_EXPECTED(duration_collector); - auto softmax_elem_ptr = make_shared_nothrow(softmax_op, - name, duration_collector.release(), std::move(pipeline_status), timeout, buffer_pool.release(), pipeline_direction, async_pipeline); - CHECK_AS_EXPECTED(nullptr != softmax_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); - LOGGER__INFO("Created {}", softmax_elem_ptr->name()); - return softmax_elem_ptr; +hailo_status OutputVStream::stop_vstream() +{ + return m_vstream->stop_vstream(); } -Expected> SoftmaxPostProcessElement::create(std::shared_ptr softmax_op, - const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element, - std::shared_ptr async_pipeline) +hailo_status OutputVStream::stop_and_clear() { - return SoftmaxPostProcessElement::create(softmax_op, name, build_params.elem_stats_flags, build_params.pipeline_status, build_params.buffer_pool_size_edges, - build_params.timeout, build_params.vstream_stats_flags, build_params.shutdown_event, pipeline_direction, is_last_copy_element, async_pipeline); + return m_vstream->stop_and_clear(); } -SoftmaxPostProcessElement::SoftmaxPostProcessElement(std::shared_ptr softmax_op, const std::string &name, - DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, - std::chrono::milliseconds timeout, BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : - FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline), - m_softmax_op(softmax_op) -{} +std::string OutputVStream::get_pipeline_description() const +{ + return m_vstream->get_pipeline_description(); +} -Expected SoftmaxPostProcessElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source) +bool OutputVStream::is_aborted() { - CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION, - "SoftmaxPostProcessElement {} does not support run_pull operation", name()); - return FilterElement::run_pull(std::move(optional), source); + return m_vstream->is_aborted(); } -hailo_status SoftmaxPostProcessElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink) +hailo_status OutputVStream::before_fork() { - CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION, - "SoftmaxPostProcessElement {} does not support run_push operation", name()); - return FilterElement::run_push(std::move(buffer), sink); + return m_vstream->before_fork(); } -PipelinePad &SoftmaxPostProcessElement::next_pad() +hailo_status OutputVStream::after_fork_in_parent() { - if (PipelineDirection::PUSH == m_pipeline_direction){ - return *m_sources[0].next(); - } - return *m_sinks[0].prev(); + return m_vstream->after_fork_in_parent(); } -std::string SoftmaxPostProcessElement::description() const +hailo_status OutputVStream::after_fork_in_child() { - std::stringstream element_description; - element_description << "(" << this->name() << " | " << m_softmax_op->metadata()->get_op_description() << ")"; - return element_description.str(); + return m_vstream->after_fork_in_child(); } -Expected SoftmaxPostProcessElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +hailo_status OutputVStream::set_nms_score_threshold(float32_t threshold) { - auto buffer = m_pool->get_available_buffer(std::move(optional), m_timeout); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { - return make_unexpected(buffer.status()); - } + return m_vstream->set_nms_score_threshold(threshold); +} - if (!buffer) { - input.get_exec_done_cb()(buffer.status()); - } - CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status()); +hailo_status OutputVStream::set_nms_iou_threshold(float32_t threshold) +{ + return m_vstream->set_nms_iou_threshold(threshold); +} - std::map inputs; - std::map outputs; - auto &input_name = m_softmax_op->inputs_metadata().begin()->first; - auto &output_name = m_softmax_op->outputs_metadata().begin()->first; - inputs.insert({input_name, input.as_view()}); - outputs.insert({output_name, buffer->as_view()}); - m_duration_collector.start_measurement(); - auto post_process_result = m_softmax_op->execute(inputs, outputs); - m_duration_collector.complete_measurement(); +hailo_status OutputVStream::set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) +{ + return m_vstream->set_nms_max_proposals_per_class(max_proposals_per_class); +} - auto exec_done_cb = input.get_exec_done_cb(); - exec_done_cb(post_process_result); - buffer->set_action_status(post_process_result); +hailo_status OutputVStream::set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size) +{ + return m_vstream->set_nms_max_accumulated_mask_size(max_accumulated_mask_size); +} - CHECK_SUCCESS_AS_EXPECTED(post_process_result); +OutputVStream::OutputVStream(std::shared_ptr vstream) : m_vstream(std::move(vstream)) {} - return buffer.release(); -} +std::map get_pipeline_accumulators_by_type( + const std::vector> &pipeline, AccumulatorType accumulator_type) +{ + std::map result; + for (const auto &elem : pipeline) { + if (nullptr == elem) { + continue; + } -BaseVStream::BaseVStream(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, - std::shared_ptr pipeline_entry, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, - EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator, EventPtr &&core_op_activated_event, - hailo_status &output_status) : - m_vstream_info(vstream_info), - m_quant_infos(quant_infos), - m_vstream_params(vstream_params), - m_measure_pipeline_latency((vstream_params.vstream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_LATENCY) != 0), - m_entry_element(pipeline_entry), - m_pipeline(std::move(pipeline)), - m_is_activated(false), - m_is_aborted(false), - m_pipeline_status(std::move(pipeline_status)), - m_shutdown_event(shutdown_event), - m_core_op_activated_event(std::move(core_op_activated_event)), - m_fps_accumulators(get_pipeline_accumulators_by_type(m_pipeline, AccumulatorType::FPS)), - m_latency_accumulators(get_pipeline_accumulators_by_type(m_pipeline, AccumulatorType::LATENCY)), - m_queue_size_accumulators(get_pipeline_queue_size_accumulators(m_pipeline)), - m_pipeline_latency_accumulator(pipeline_latency_accumulator) -{ - output_status = start_vstream(); -} - -BaseVStream::BaseVStream(BaseVStream &&other) noexcept : - m_vstream_info(std::move(other.m_vstream_info)), - m_vstream_params(std::move(other.m_vstream_params)), - m_measure_pipeline_latency(std::move(other.m_measure_pipeline_latency)), - m_entry_element(std::move(other.m_entry_element)), - m_pipeline(std::move(other.m_pipeline)), - m_is_activated(std::exchange(other.m_is_activated, false)), - m_is_aborted(std::exchange(other.m_is_aborted, false)), - m_pipeline_status(std::move(other.m_pipeline_status)), - m_shutdown_event(std::move(other.m_shutdown_event)), - m_core_op_activated_event(std::move(other.m_core_op_activated_event)), - m_fps_accumulators(std::move(other.m_fps_accumulators)), - m_latency_accumulators(std::move(other.m_latency_accumulators)), - m_queue_size_accumulators(std::move(other.m_queue_size_accumulators)), - m_pipeline_latency_accumulator(std::move(other.m_pipeline_latency_accumulator)) -{} - -BaseVStream& BaseVStream::operator=(BaseVStream &&other) noexcept -{ - if (this != &other) { - // operator= is used only for vstream creation BEFORE activation. otherwise we should deactivate vstream here - assert(!m_is_activated); - m_vstream_info = std::move(other.m_vstream_info); - m_quant_infos = std::move(other.m_quant_infos); - m_vstream_params = std::move(other.m_vstream_params); - m_measure_pipeline_latency = std::move(other.m_measure_pipeline_latency); - m_entry_element = std::move(other.m_entry_element); - m_pipeline = std::move(other.m_pipeline); - m_is_activated = std::exchange(other.m_is_activated, false); - m_is_aborted = std::exchange(other.m_is_aborted, false); - m_pipeline_status = std::move(other.m_pipeline_status); - m_shutdown_event = std::move(other.m_shutdown_event); - m_core_op_activated_event = std::move(other.m_core_op_activated_event); - m_fps_accumulators = std::move(other.m_fps_accumulators); - m_latency_accumulators = std::move(other.m_latency_accumulators); - m_queue_size_accumulators = std::move(other.m_queue_size_accumulators); - m_pipeline_latency_accumulator = std::move(other.m_pipeline_latency_accumulator); - } - return *this; -} - -hailo_status BaseVStream::start_vstream() -{ - auto status = m_shutdown_event->reset(); - CHECK_SUCCESS(status); - - status = resume(); - CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status, - "Failed to resume stream in {}", name()); - - LOGGER__DEBUG("Activating {}...", name()); - status = m_entry_element->activate(); - CHECK_SUCCESS(status); - - m_is_activated = true; - return HAILO_SUCCESS; -} - -hailo_status BaseVStream::abort() -{ - auto status = m_entry_element->abort(); - CHECK_SUCCESS(status); - m_is_aborted = true; - - return HAILO_SUCCESS; -} - -hailo_status BaseVStream::resume() -{ - auto status = m_entry_element->clear_abort(); - CHECK_SUCCESS(status); - m_is_aborted = false; - - if (m_is_activated) { - status = m_entry_element->activate(); - CHECK_SUCCESS(status); - } - return HAILO_SUCCESS; -} - -hailo_status BaseVStream::stop_vstream() -{ - hailo_status status = HAILO_SUCCESS; - if (m_is_activated) { - m_is_activated = false; - status = m_entry_element->deactivate(); - if (HAILO_SUCCESS != status) { - LOGGER__WARNING("Failed deactivate of vstream {} status {}", name(), status); - } - - // If VStream was aborted, do not clear low-level stream abortion, - // otherwise flush would be called on low-level stream d-tor when there is no receiver. - auto should_clear_abort = (!m_is_aborted); - status = m_entry_element->post_deactivate(should_clear_abort); - if (HAILO_SUCCESS != status) { - LOGGER__WARNING("Failed post deactivate of vstream {} status {}", name(), status); - } - } - return status; -} - -hailo_status BaseVStream::stop_and_clear() -{ - auto status = HAILO_SUCCESS; - if (nullptr != m_core_op_activated_event) { - status = m_core_op_activated_event->wait(std::chrono::milliseconds(0)); - CHECK(HAILO_TIMEOUT == status, HAILO_INVALID_OPERATION, - "Trying to clear {} vstream before its network group is deactivated", name()); - } - - status = stop_vstream(); - CHECK_SUCCESS(status); - - status = m_entry_element->clear(); - CHECK_SUCCESS(status, "Failed clearing vstream {}", name()); - - const auto curr_pipeline_status = m_pipeline_status->load(); - if (HAILO_SUCCESS != curr_pipeline_status) { - LOGGER__TRACE("Overwritting current pipeline status {}", curr_pipeline_status); - m_pipeline_status->store(HAILO_SUCCESS); - } - - return status; -} - -hailo_status BaseVStream::before_fork() -{ - return HAILO_SUCCESS; -} - -hailo_status BaseVStream::after_fork_in_parent() -{ - return HAILO_SUCCESS; -} - -hailo_status BaseVStream::after_fork_in_child() -{ - return HAILO_SUCCESS; -} - -size_t BaseVStream::get_frame_size() const -{ - return HailoRTCommon::get_frame_size(m_vstream_info, m_vstream_params.user_buffer_format); -} - -const hailo_vstream_info_t &BaseVStream::get_info() const -{ - return m_vstream_info; -} - -const std::vector &BaseVStream::get_quant_infos() const -{ - return m_quant_infos; -} - -const hailo_format_t &BaseVStream::get_user_buffer_format() const -{ - return m_vstream_params.user_buffer_format; -} - -std::string BaseVStream::name() const -{ - return std::string(m_vstream_info.name); -} - -std::string BaseVStream::network_name() const -{ - return std::string(m_vstream_info.network_name); -} - -const std::map &BaseVStream::get_fps_accumulators() const -{ - return m_fps_accumulators; -} - -const std::map &BaseVStream::get_latency_accumulators() const -{ - return m_latency_accumulators; -} - -const std::map> &BaseVStream::get_queue_size_accumulators() const -{ - return m_queue_size_accumulators; -} - -AccumulatorPtr BaseVStream::get_pipeline_latency_accumulator() const -{ - return m_pipeline_latency_accumulator; -} - - -const std::vector> &BaseVStream::get_pipeline() const -{ - return m_pipeline; -} - -Expected InputVStream::create(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, - const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, - std::shared_ptr pipeline_exit, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, EventPtr core_op_activated_event, - AccumulatorPtr pipeline_latency_accumulator) -{ - auto vstream_internal = InputVStreamInternal::create(vstream_info, quant_infos, vstream_params, pipeline_entry, pipeline_exit, - std::move(pipeline), std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator); - CHECK_EXPECTED(vstream_internal); - - InputVStream vstream(vstream_internal.release()); - return vstream; -} - -hailo_status InputVStream::write(const MemoryView &buffer) -{ - return m_vstream->write(std::move(buffer)); -} - -hailo_status InputVStream::write(const hailo_pix_buffer_t &buffer) -{ - // If only one plane is passed, address it as memview - if (1 == buffer.number_of_planes) { - return write(MemoryView(buffer.planes[0].user_ptr, buffer.planes[0].bytes_used)); - } - - // If model is multi planar, pass the pix buffer - if (m_vstream->is_multi_planar()){ - return m_vstream->write(buffer); - } - - // Other cases - allocate a contiguous buffer to hold all plains - bool is_contiguous = true; - uint32_t planes_total_size = 0; - /* assuming contiguous memory. If not, this will be overriden by the coming loop */ - void *data_ptr = buffer.planes[0].user_ptr; - - /* calculate total data size by summing the planes' sizes and check if the planes are contiguous */ - for (uint32_t plane_index = 0; plane_index < buffer.number_of_planes; plane_index++){ - auto &plane = buffer.planes[plane_index]; - planes_total_size += plane.bytes_used; - - if (is_contiguous && (plane_index + 1 < buffer.number_of_planes)){ - auto &next_plane = buffer.planes[plane_index+1]; - if ((static_cast(plane.user_ptr) + plane.bytes_used) != next_plane.user_ptr){ - is_contiguous = false; - } - } - } - - BufferPtr contiguous_buffer = nullptr; - if (! is_contiguous) { - /* copy to a contiguous buffer, and then pass it */ - auto expected_buffer = Buffer::create_shared(planes_total_size); - CHECK_EXPECTED_AS_STATUS(expected_buffer); - contiguous_buffer = expected_buffer.release(); - uint32_t copied_bytes = 0; - - for (uint32_t plane_index = 0; plane_index < buffer.number_of_planes; plane_index++){ - auto &plane = buffer.planes[plane_index]; - std::memcpy(contiguous_buffer->data() + copied_bytes, plane.user_ptr, plane.bytes_used); - copied_bytes += plane.bytes_used; - } - - data_ptr = contiguous_buffer->data(); - } - - return m_vstream->write(std::move(MemoryView(data_ptr, planes_total_size))); -} - -hailo_status InputVStream::flush() -{ - return m_vstream->flush(); -} - -hailo_status InputVStream::clear(std::vector &vstreams) -{ - for (auto &vstream : vstreams) { - auto status = vstream.stop_and_clear(); - CHECK_SUCCESS(status); - } - for (auto &vstream : vstreams) { - auto status = vstream.start_vstream(); - CHECK_SUCCESS(status); - } - - return HAILO_SUCCESS; -} - -hailo_status InputVStream::clear(std::vector> &vstreams) -{ - for (auto &vstream : vstreams) { - auto status = vstream.get().stop_and_clear(); - CHECK_SUCCESS(status); - } - for (auto &vstream : vstreams) { - auto status = vstream.get().start_vstream(); - CHECK_SUCCESS(status); - } - - return HAILO_SUCCESS; -} - -hailo_status InputVStream::abort() -{ - return m_vstream->abort(); -} - -hailo_status InputVStream::resume() -{ - return m_vstream->resume(); -} - -size_t InputVStream::get_frame_size() const -{ - return m_vstream->get_frame_size(); -} - -const hailo_vstream_info_t &InputVStream::get_info() const -{ - return m_vstream->get_info(); -} - -const std::vector &InputVStream::get_quant_infos() const -{ - return m_vstream->get_quant_infos(); -} - -const hailo_format_t &InputVStream::get_user_buffer_format() const -{ - return m_vstream->get_user_buffer_format(); -} - -std::string InputVStream::name() const -{ - return m_vstream->name(); -} - -std::string InputVStream::network_name() const -{ - return m_vstream->network_name(); -} - -const std::map &InputVStream::get_fps_accumulators() const -{ - return m_vstream->get_fps_accumulators(); -} - -const std::map &InputVStream::get_latency_accumulators() const -{ - return m_vstream->get_latency_accumulators(); -} - -const std::map> &InputVStream::get_queue_size_accumulators() const -{ - return m_vstream->get_queue_size_accumulators(); -} - -AccumulatorPtr InputVStream::get_pipeline_latency_accumulator() const -{ - return m_vstream->get_pipeline_latency_accumulator(); -} - -const std::vector> &InputVStream::get_pipeline() const -{ - return m_vstream->get_pipeline(); -} - -hailo_status InputVStream::start_vstream() -{ - return m_vstream->start_vstream(); -} - -hailo_status InputVStream::stop_vstream() -{ - return m_vstream->stop_vstream(); -} - -hailo_status InputVStream::stop_and_clear() -{ - return m_vstream->stop_and_clear(); -} - -std::string InputVStream::get_pipeline_description() const -{ - return m_vstream->get_pipeline_description(); -} - -bool InputVStream::is_aborted() -{ - return m_vstream->is_aborted(); -} - -bool InputVStream::is_multi_planar() -{ - return m_vstream->is_multi_planar(); -} - - -hailo_status InputVStream::before_fork() -{ - return m_vstream->before_fork(); -} - -hailo_status InputVStream::after_fork_in_parent() -{ - return m_vstream->after_fork_in_parent(); -} - -hailo_status InputVStream::after_fork_in_child() -{ - return m_vstream->after_fork_in_child(); -} - -InputVStream::InputVStream(std::shared_ptr vstream) : m_vstream(std::move(vstream)) {} - -Expected OutputVStream::create( - const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, - std::shared_ptr pipeline_entry, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, - EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator) -{ - auto vstream_internal = OutputVStreamInternal::create(vstream_info, quant_infos, vstream_params, pipeline_entry, - std::move(pipeline), std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator); - CHECK_EXPECTED(vstream_internal); - - OutputVStream vstream(vstream_internal.release()); - return vstream; -} - -hailo_status OutputVStream::read(MemoryView buffer) -{ - return m_vstream->read(std::move(buffer)); -} - -hailo_status OutputVStream::clear(std::vector &vstreams) -{ - for (auto &vstream : vstreams) { - auto status = vstream.stop_and_clear(); - CHECK_SUCCESS(status); - } - for (auto &vstream : vstreams) { - auto status = vstream.start_vstream(); - CHECK_SUCCESS(status); - } - - return HAILO_SUCCESS; -} - -hailo_status OutputVStream::abort() -{ - return m_vstream->abort(); -} - -hailo_status OutputVStream::resume() -{ - return m_vstream->resume(); -} - -hailo_status OutputVStream::clear(std::vector> &vstreams) -{ - for (auto &vstream : vstreams) { - auto status = vstream.get().stop_and_clear(); - CHECK_SUCCESS(status); - } - for (auto &vstream : vstreams) { - auto status = vstream.get().start_vstream(); - CHECK_SUCCESS(status); - } - - return HAILO_SUCCESS; -} - -size_t OutputVStream::get_frame_size() const -{ - return m_vstream->get_frame_size(); -} - -const hailo_vstream_info_t &OutputVStream::get_info() const -{ - return m_vstream->get_info(); -} - -const std::vector &OutputVStream::get_quant_infos() const -{ - return m_vstream->get_quant_infos(); -} - -const hailo_format_t &OutputVStream::get_user_buffer_format() const -{ - return m_vstream->get_user_buffer_format(); -} - -std::string OutputVStream::name() const -{ - return m_vstream->name(); -} - -std::string OutputVStream::network_name() const -{ - return m_vstream->network_name(); -} - -const std::map &OutputVStream::get_fps_accumulators() const -{ - return m_vstream->get_fps_accumulators(); -} - -const std::map &OutputVStream::get_latency_accumulators() const -{ - return m_vstream->get_latency_accumulators(); -} - -const std::map> &OutputVStream::get_queue_size_accumulators() const -{ - return m_vstream->get_queue_size_accumulators(); -} - -AccumulatorPtr OutputVStream::get_pipeline_latency_accumulator() const -{ - return m_vstream->get_pipeline_latency_accumulator(); -} - -const std::vector> &OutputVStream::get_pipeline() const -{ - return m_vstream->get_pipeline(); -} - -hailo_status OutputVStream::start_vstream() -{ - return m_vstream->start_vstream(); -} - -hailo_status OutputVStream::stop_vstream() -{ - return m_vstream->stop_vstream(); -} - -hailo_status OutputVStream::stop_and_clear() -{ - return m_vstream->stop_and_clear(); -} - -std::string OutputVStream::get_pipeline_description() const -{ - return m_vstream->get_pipeline_description(); -} - -bool OutputVStream::is_aborted() -{ - return m_vstream->is_aborted(); -} - -hailo_status OutputVStream::before_fork() -{ - return m_vstream->before_fork(); -} - -hailo_status OutputVStream::after_fork_in_parent() -{ - return m_vstream->after_fork_in_parent(); -} - -hailo_status OutputVStream::after_fork_in_child() -{ - return m_vstream->after_fork_in_child(); -} - -hailo_status OutputVStream::set_nms_score_threshold(float32_t threshold) -{ - return m_vstream->set_nms_score_threshold(threshold); -} - -hailo_status OutputVStream::set_nms_iou_threshold(float32_t threshold) -{ - return m_vstream->set_nms_iou_threshold(threshold); -} - -hailo_status OutputVStream::set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) -{ - return m_vstream->set_nms_max_proposals_per_class(max_proposals_per_class); -} - -OutputVStream::OutputVStream(std::shared_ptr vstream) : m_vstream(std::move(vstream)) {} - -std::map get_pipeline_accumulators_by_type( - const std::vector> &pipeline, AccumulatorType accumulator_type) -{ - std::map result; - for (const auto &elem : pipeline) { - if (nullptr == elem) { - continue; - } - - AccumulatorPtr accumulator = nullptr; - if (AccumulatorType::FPS == accumulator_type) { - accumulator = elem->get_fps_accumulator(); - } else if (AccumulatorType::LATENCY == accumulator_type) { - accumulator = elem->get_latency_accumulator(); - } else { - continue; - } - - if (nullptr != accumulator) { - result.emplace(elem->name(), accumulator); - } - } - - return result; -} - -std::map> get_pipeline_queue_size_accumulators( - const std::vector> &pipeline) -{ - std::map> result; - for (const auto &elem : pipeline) { - if (nullptr == elem) { - continue; - } - - const auto accumulators = elem->get_queue_size_accumulators(); - if (0 != accumulators.size()) { - result.emplace(elem->name(), accumulators); - } - } - - return result; -} - -Expected> InputVStreamInternal::create(const hailo_vstream_info_t &vstream_info, - const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, - std::shared_ptr pipeline_exit, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, EventPtr core_op_activated_event, - AccumulatorPtr pipeline_latency_accumulator) -{ - auto vstream = InputVStreamImpl::create(vstream_info, quant_infos, vstream_params, pipeline_entry, pipeline_exit, - std::move(pipeline), std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator); - CHECK_EXPECTED(vstream); - auto vstream_ptr = std::shared_ptr(vstream.release()); - return vstream_ptr; -} - -InputVStreamInternal::InputVStreamInternal(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, - std::shared_ptr pipeline_entry, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, - EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator, EventPtr &&core_op_activated_event, - hailo_status &output_status) : - BaseVStream(vstream_info, quant_infos, vstream_params, pipeline_entry, std::move(pipeline), std::move(pipeline_status), - shutdown_event, pipeline_latency_accumulator, std::move(core_op_activated_event), output_status){} - -Expected> InputVStreamImpl::create(const hailo_vstream_info_t &vstream_info, - const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, - std::shared_ptr pipeline_exit, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, EventPtr core_op_activated_event, - AccumulatorPtr pipeline_latency_accumulator) -{ - hailo_status status = HAILO_UNINITIALIZED; - - if (nullptr != pipeline_latency_accumulator) { - if (pipeline_exit) { - pipeline_exit->sink().set_push_complete_callback([pipeline_latency_accumulator](const PipelineBuffer::Metadata& metadata) { - const auto duration_sec = std::chrono::duration_cast>( - std::chrono::steady_clock::now() - metadata.get_start_time()).count(); - pipeline_latency_accumulator->add_data_point(duration_sec); - }); - } - } - - auto vstream_ptr = std::shared_ptr(new InputVStreamImpl(vstream_info, quant_infos, vstream_params, std::move(pipeline_entry), std::move(pipeline), - std::move(pipeline_status), shutdown_event, pipeline_latency_accumulator, std::move(core_op_activated_event), status)); - CHECK_SUCCESS_AS_EXPECTED(status, "Failed to create virtual stream"); - - return vstream_ptr; -} - -InputVStreamImpl::InputVStreamImpl(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, - std::shared_ptr pipeline_entry, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator, - EventPtr core_op_activated_event, hailo_status &output_status) : - InputVStreamInternal(vstream_info, quant_infos, vstream_params, pipeline_entry, std::move(pipeline), std::move(pipeline_status), - shutdown_event, pipeline_latency_accumulator, std::move(core_op_activated_event), output_status) -{ - // TODO: propagate a flag instead of using dynamic_pointer_cast (will be disabled when we'll disable RTTI) - m_is_multi_planar = (nullptr != std::dynamic_pointer_cast(pipeline_entry)); - - if (HAILO_SUCCESS != output_status) { - return; - } - - LOGGER__INFO("Creating {}...", name()); -} - -InputVStreamImpl::~InputVStreamImpl() -{ - (void)stop_vstream(); -} - -hailo_status InputVStreamImpl::write(const MemoryView &buffer) -{ - if (nullptr != m_core_op_activated_event) { - CHECK(m_is_activated, HAILO_VSTREAM_PIPELINE_NOT_ACTIVATED, "Failed to write buffer! Virtual stream {} is not activated!", name()); - auto status = m_core_op_activated_event->wait(std::chrono::milliseconds(0)); - CHECK(HAILO_TIMEOUT != status, HAILO_NETWORK_GROUP_NOT_ACTIVATED, - "Trying to write to vstream {} before its network group is activated", name()); - } - - assert(1 == m_entry_element->sinks().size()); - auto status = m_entry_element->sinks()[0].run_push(PipelineBuffer(buffer, false, nullptr, m_measure_pipeline_latency)); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { - LOGGER__INFO("Sending to VStream was shutdown!"); - status = m_pipeline_status->load(); - } - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("Sending to VStream was aborted!"); - return HAILO_STREAM_ABORTED_BY_USER; - } - return status; -} - -hailo_status InputVStreamImpl::write(const hailo_pix_buffer_t &buffer) -{ - if (nullptr != m_core_op_activated_event) { - CHECK(m_is_activated, HAILO_VSTREAM_PIPELINE_NOT_ACTIVATED, "Failed to write buffer! Virtual stream {} is not activated!", name()); - auto status = m_core_op_activated_event->wait(std::chrono::milliseconds(0)); - CHECK(HAILO_TIMEOUT != status, HAILO_NETWORK_GROUP_NOT_ACTIVATED, - "Trying to write to vstream {} before its network group is activated", name()); - } - - assert(1 == m_entry_element->sinks().size()); - auto status = m_entry_element->sinks()[0].run_push(PipelineBuffer(buffer)); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { - LOGGER__INFO("Sending to VStream was shutdown!"); - status = m_pipeline_status->load(); - } - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("Sending to VStream was aborted!"); - return HAILO_STREAM_ABORTED_BY_USER; - } - return status; -} - -hailo_status InputVStreamImpl::flush() -{ - assert(1 == m_entry_element->sinks().size()); - auto status = m_entry_element->sinks()[0].run_push(PipelineBuffer(PipelineBuffer::Type::FLUSH)); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("Sending to VStream was aborted!"); - return HAILO_STREAM_ABORTED_BY_USER; - } - CHECK_SUCCESS(status); - - status = m_entry_element->flush(); - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} - -bool InputVStreamImpl::is_multi_planar() const -{ - return m_is_multi_planar; -} - -#ifdef HAILO_SUPPORT_MULTI_PROCESS -Expected> InputVStreamClient::create(VStreamIdentifier &&identifier) -{ - grpc::ChannelArguments ch_args; - ch_args.SetMaxReceiveMessageSize(-1); - auto channel = grpc::CreateCustomChannel(hailort::HAILORT_SERVICE_ADDRESS, grpc::InsecureChannelCredentials(), ch_args); - CHECK_AS_EXPECTED(channel != nullptr, HAILO_INTERNAL_FAILURE); - - auto client = make_unique_nothrow(channel); - CHECK_AS_EXPECTED(client != nullptr, HAILO_OUT_OF_HOST_MEMORY); - - auto user_buffer_format = client->InputVStream_get_user_buffer_format(identifier); - CHECK_EXPECTED(user_buffer_format); - - auto vstream_info = client->InputVStream_get_info(identifier); - CHECK_EXPECTED(vstream_info); - - return std::shared_ptr(new InputVStreamClient(std::move(client), std::move(identifier), - user_buffer_format.release(), vstream_info.release())); -} - -InputVStreamClient::InputVStreamClient(std::unique_ptr client, VStreamIdentifier &&identifier, hailo_format_t &&user_buffer_format, - hailo_vstream_info_t &&info) : - m_client(std::move(client)), m_identifier(std::move(identifier)), m_user_buffer_format(user_buffer_format), m_info(info) {} - -InputVStreamClient::~InputVStreamClient() -{ - auto reply = m_client->InputVStream_release(m_identifier, OsUtils::get_curr_pid()); - if (reply != HAILO_SUCCESS) { - LOGGER__CRITICAL("InputVStream_release failed!"); - } -} - -hailo_status InputVStreamClient::write(const MemoryView &buffer) -{ - return m_client->InputVStream_write(m_identifier, buffer); -} - -hailo_status InputVStreamClient::write(const hailo_pix_buffer_t &buffer) -{ - return m_client->InputVStream_write(m_identifier, buffer); -} - -hailo_status InputVStreamClient::flush() -{ - return m_client->InputVStream_flush(m_identifier); -} - -bool InputVStreamClient::is_multi_planar() const -{ - auto is_multi_planar_exp = m_client->InputVStream_is_multi_planar(m_identifier); - if (!is_multi_planar_exp) { - LOGGER__CRITICAL("InputVStream_is_multi_planar failed with status={}", is_multi_planar_exp.status()); - return true; - } - return is_multi_planar_exp.release(); -} - -hailo_status InputVStreamClient::abort() -{ - auto expected_client = HailoRtRpcClientUtils::create_client(); - CHECK_EXPECTED_AS_STATUS(expected_client); - auto abort_client = expected_client.release(); - return abort_client->InputVStream_abort(m_identifier); -} - -hailo_status InputVStreamClient::resume() -{ - return m_client->InputVStream_resume(m_identifier); -} - -hailo_status InputVStreamClient::stop_and_clear() -{ - auto expected_client = HailoRtRpcClientUtils::create_client(); - CHECK_EXPECTED_AS_STATUS(expected_client); - auto stop_and_clear_client = expected_client.release(); - - return stop_and_clear_client->InputVStream_stop_and_clear(m_identifier); -} - -hailo_status InputVStreamClient::start_vstream() -{ - auto expected_client = HailoRtRpcClientUtils::create_client(); - CHECK_EXPECTED_AS_STATUS(expected_client); - auto start_vstream_client = expected_client.release(); - - return start_vstream_client->InputVStream_start_vstream(m_identifier); -} - -size_t InputVStreamClient::get_frame_size() const -{ - auto frame_size = m_client->InputVStream_get_frame_size(m_identifier); - if (!frame_size) { - LOGGER__CRITICAL("InputVStream_get_frame_size failed with status={}", frame_size.status()); - return 0; - } - return frame_size.release(); -} - -const hailo_vstream_info_t &InputVStreamClient::get_info() const -{ - return m_info; -} - -const hailo_format_t &InputVStreamClient::get_user_buffer_format() const -{ - return m_user_buffer_format; -} - -std::string InputVStreamClient::name() const -{ - auto expected_name = m_client->InputVStream_name(m_identifier); - if (!expected_name) { - LOGGER__CRITICAL("InputVStream_name failed with status={}", expected_name.status()); - return ""; - } - return expected_name.release(); -} + AccumulatorPtr accumulator = nullptr; + if (AccumulatorType::FPS == accumulator_type) { + accumulator = elem->get_fps_accumulator(); + } else if (AccumulatorType::LATENCY == accumulator_type) { + accumulator = elem->get_latency_accumulator(); + } else { + continue; + } -std::string InputVStreamClient::network_name() const -{ - auto expected_name = m_client->InputVStream_network_name(m_identifier); - if (!expected_name) { - LOGGER__CRITICAL("InputVStream_name failed with status={}", expected_name.status()); - return ""; + if (nullptr != accumulator) { + result.emplace(elem->name(), accumulator); + } } - return expected_name.release(); -} - -const std::map &InputVStreamClient::get_fps_accumulators() const -{ - LOGGER__ERROR("InputVStream::get_fps_accumulators function is not supported when using multi-process service"); - return m_fps_accumulators; -} -const std::map &InputVStreamClient::get_latency_accumulators() const -{ - LOGGER__ERROR("InputVStream::get_latency_accumulators function is not supported when using multi-process service"); - return m_latency_accumulators; -} - -const std::map> &InputVStreamClient::get_queue_size_accumulators() const -{ - LOGGER__ERROR("InputVStream::get_queue_size_accumulators function is not supported when using multi-process service"); - return m_queue_size_accumulators; -} -AccumulatorPtr InputVStreamClient::get_pipeline_latency_accumulator() const -{ - LOGGER__ERROR("InputVStream::get_pipeline_latency_accumulator function is not supported when using multi-process service"); - return m_pipeline_latency_accumulator; -} -const std::vector> &InputVStreamClient::get_pipeline() const -{ - LOGGER__ERROR("InputVStream::get_pipeline function is not supported when using multi-process service"); - return m_pipeline; -} - -hailo_status InputVStreamClient::create_client() -{ - auto expected_client = HailoRtRpcClientUtils::create_client(); - CHECK_EXPECTED_AS_STATUS(expected_client); - m_client = expected_client.release(); - return HAILO_SUCCESS; -} - -hailo_status InputVStreamClient::before_fork() -{ - m_client.reset(); - return HAILO_SUCCESS; -} -hailo_status InputVStreamClient::after_fork_in_parent() -{ - return create_client(); + return result; } -hailo_status InputVStreamClient::after_fork_in_child() +std::map> get_pipeline_queue_size_accumulators( + const std::vector> &pipeline) { - return create_client(); -} + std::map> result; + for (const auto &elem : pipeline) { + if (nullptr == elem) { + continue; + } -bool InputVStreamClient::is_aborted() -{ - auto is_aborted_exp = m_client->InputVStream_is_aborted(m_identifier); - if (!is_aborted_exp) { - LOGGER__CRITICAL("InputVStream_is_aborted failed with status={}", is_aborted_exp.status()); - return true; + const auto accumulators = elem->get_queue_size_accumulators(); + if (0 != accumulators.size()) { + result.emplace(elem->name(), accumulators); + } } - return is_aborted_exp.release(); -} - -#endif // HAILO_SUPPORT_MULTI_PROCESS -std::string InputVStreamInternal::get_pipeline_description() const -{ - std::stringstream pipeline_str; - pipeline_str << "Input pipeline '" << name() << "': "; - for (const auto &element : m_pipeline) { - pipeline_str << element->description() << " >> "; - } - pipeline_str << "HW"; - return pipeline_str.str(); + return result; } -Expected> OutputVStreamInternal::create( - const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, - std::shared_ptr pipeline_entry, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, - EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator) +Expected> InputVStreamInternal::create(const hailo_vstream_info_t &vstream_info, + const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, + std::shared_ptr pipeline_exit, std::vector> &&pipeline, + std::shared_ptr> &&pipeline_status, EventPtr core_op_activated_event, + AccumulatorPtr pipeline_latency_accumulator) { - auto vstream = OutputVStreamImpl::create(vstream_info, quant_infos, vstream_params, pipeline_entry, - std::move(pipeline), std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator); + auto vstream = InputVStreamImpl::create(vstream_info, quant_infos, vstream_params, pipeline_entry, pipeline_exit, + std::move(pipeline), std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator); CHECK_EXPECTED(vstream); - auto vstream_ptr = std::shared_ptr(vstream.release()); + auto vstream_ptr = std::shared_ptr(vstream.release()); return vstream_ptr; } -OutputVStreamInternal::OutputVStreamInternal(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, - std::shared_ptr pipeline_entry, - std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, - AccumulatorPtr pipeline_latency_accumulator, - EventPtr core_op_activated_event, hailo_status &output_status) : +InputVStreamInternal::InputVStreamInternal(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, + const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, std::vector> &&pipeline, + std::shared_ptr> &&pipeline_status, AccumulatorPtr pipeline_latency_accumulator, EventPtr &&core_op_activated_event, + hailo_status &output_status) : BaseVStream(vstream_info, quant_infos, vstream_params, pipeline_entry, std::move(pipeline), std::move(pipeline_status), - shutdown_event, pipeline_latency_accumulator, std::move(core_op_activated_event), output_status) -{ - // Reversing the order of pipeline-elements, for the destruction flow to work in the right order (from user-side to hw-side) - std::reverse(m_pipeline.begin(), m_pipeline.end()); -} + pipeline_latency_accumulator, std::move(core_op_activated_event), output_status){} -Expected> OutputVStreamImpl::create(const hailo_vstream_info_t &vstream_info, - const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, - std::shared_ptr pipeline_entry, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, - EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator) +Expected> InputVStreamImpl::create(const hailo_vstream_info_t &vstream_info, + const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, + std::shared_ptr pipeline_exit, std::vector> &&pipeline, + std::shared_ptr> &&pipeline_status, EventPtr core_op_activated_event, + AccumulatorPtr pipeline_latency_accumulator) { hailo_status status = HAILO_UNINITIALIZED; - CHECK_AS_EXPECTED(1 == pipeline_entry->sources().size(), HAILO_INVALID_ARGUMENT, - "OutputVStream's entry element is expected to have one source"); - if (nullptr != pipeline_latency_accumulator) { - pipeline_entry->sources()[0].set_pull_complete_callback([pipeline_latency_accumulator](const PipelineBuffer::Metadata& metadata) { - const auto duration_sec = std::chrono::duration_cast>( - std::chrono::steady_clock::now() - metadata.get_start_time()).count(); - pipeline_latency_accumulator->add_data_point(duration_sec); - }); + if (pipeline_exit) { + pipeline_exit->sink().set_push_complete_callback([pipeline_latency_accumulator](const PipelineBuffer::Metadata& metadata) { + const auto duration_sec = std::chrono::duration_cast>( + std::chrono::steady_clock::now() - metadata.get_start_time()).count(); + pipeline_latency_accumulator->add_data_point(duration_sec); + }); + } } - auto vstream_ptr = std::shared_ptr(new OutputVStreamImpl(vstream_info, quant_infos, vstream_params, std::move(pipeline_entry), std::move(pipeline), - std::move(pipeline_status), shutdown_event, pipeline_latency_accumulator, std::move(core_op_activated_event), status)); + auto vstream_ptr = std::shared_ptr(new InputVStreamImpl(vstream_info, quant_infos, vstream_params, std::move(pipeline_entry), std::move(pipeline), + std::move(pipeline_status), pipeline_latency_accumulator, std::move(core_op_activated_event), status)); CHECK_SUCCESS_AS_EXPECTED(status, "Failed to create virtual stream"); return vstream_ptr; } -std::string OutputVStreamInternal::get_pipeline_description() const +InputVStreamImpl::InputVStreamImpl(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, + std::shared_ptr pipeline_entry, std::vector> &&pipeline, + std::shared_ptr> &&pipeline_status, AccumulatorPtr pipeline_latency_accumulator, + EventPtr core_op_activated_event, hailo_status &output_status) : + InputVStreamInternal(vstream_info, quant_infos, vstream_params, pipeline_entry, std::move(pipeline), std::move(pipeline_status), + pipeline_latency_accumulator, std::move(core_op_activated_event), output_status) { - std::stringstream pipeline_str; - pipeline_str << "Output pipeline '" << name() << "': HW"; - for (const auto &element : m_pipeline) { - pipeline_str << " >> " << element->description(); - } - return pipeline_str.str(); -} + // TODO: propagate a flag instead of using dynamic_pointer_cast (will be disabled when we'll disable RTTI) + m_is_multi_planar = (nullptr != std::dynamic_pointer_cast(pipeline_entry)); -OutputVStreamImpl::OutputVStreamImpl(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, - const hailo_vstream_params_t &vstream_params, - std::shared_ptr pipeline_entry, - std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, - AccumulatorPtr pipeline_latency_accumulator, - EventPtr core_op_activated_event, hailo_status &output_status) : - OutputVStreamInternal(vstream_info, quant_infos, vstream_params, pipeline_entry, std::move(pipeline), std::move(pipeline_status), - shutdown_event, pipeline_latency_accumulator, std::move(core_op_activated_event), output_status) -{ if (HAILO_SUCCESS != output_status) { return; } @@ -2187,85 +754,80 @@ OutputVStreamImpl::OutputVStreamImpl(const hailo_vstream_info_t &vstream_info, c LOGGER__INFO("Creating {}...", name()); } -OutputVStreamImpl::~OutputVStreamImpl() +InputVStreamImpl::~InputVStreamImpl() { (void)stop_vstream(); } -hailo_status OutputVStreamImpl::read(MemoryView buffer) +hailo_status InputVStreamImpl::write(const MemoryView &buffer) { if (nullptr != m_core_op_activated_event) { - CHECK(m_is_activated, HAILO_VSTREAM_PIPELINE_NOT_ACTIVATED, "read() failed! Virtual stream {} is not activated!", name()); + CHECK(m_is_activated, HAILO_VSTREAM_PIPELINE_NOT_ACTIVATED, "Failed to write buffer! Virtual stream {} is not activated!", name()); auto status = m_core_op_activated_event->wait(std::chrono::milliseconds(0)); - if (HAILO_TIMEOUT == status) { - LOGGER__INFO("Trying to read from vstream {} before its network_group is activated", name()); - return HAILO_NETWORK_GROUP_NOT_ACTIVATED; - } - CHECK_SUCCESS(status); + CHECK(HAILO_TIMEOUT != status, HAILO_NETWORK_GROUP_NOT_ACTIVATED, + "Trying to write to vstream {} before its network group is activated", name()); } - assert(1 == m_entry_element->sources().size()); - auto recv_buffer = m_entry_element->sources()[0].run_pull(PipelineBuffer(buffer, false, nullptr, m_measure_pipeline_latency)); - auto status = recv_buffer.status(); + assert(1 == m_entry_element->sinks().size()); + auto status = m_entry_element->sinks()[0].run_push(PipelineBuffer(buffer, [](hailo_status){}, HAILO_SUCCESS, false, nullptr, m_measure_pipeline_latency)); if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { - LOGGER__INFO("Receiving to VStream was shutdown!"); + LOGGER__INFO("Sending to VStream was shutdown!"); status = m_pipeline_status->load(); } - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("Receiving to VStream was aborted!"); - m_entry_element->wait_for_finish(); - return HAILO_STREAM_ABORTED_BY_USER; + if (HAILO_STREAM_ABORT == status) { + LOGGER__INFO("Sending to VStream was aborted!"); + return HAILO_STREAM_ABORT; } return status; } -hailo_status OutputVStreamImpl::set_nms_score_threshold(float32_t threshold) +hailo_status InputVStreamImpl::write(const hailo_pix_buffer_t &buffer) { - auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element - for (auto &elem : m_pipeline) { - auto elem_status = elem->set_nms_score_threshold(threshold); - if (HAILO_SUCCESS == elem_status) { - status = elem_status; // 1 element is enough to call this setter successful - } - } - CHECK_SUCCESS(status, "Unable to set NMS score threshold in {}", name()); - - return HAILO_SUCCESS; -} + CHECK(HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR == buffer.memory_type, HAILO_NOT_SUPPORTED, "Memory type of pix buffer must be of type USERPTR!"); -hailo_status OutputVStreamImpl::set_nms_iou_threshold(float32_t threshold) -{ - auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element - for (auto &elem : m_pipeline) { - auto elem_status = elem->set_nms_iou_threshold(threshold); - if (HAILO_SUCCESS == elem_status) { - status = elem_status; // 1 element is enough to call this setter successful - } + if (nullptr != m_core_op_activated_event) { + CHECK(m_is_activated, HAILO_VSTREAM_PIPELINE_NOT_ACTIVATED, "Failed to write buffer! Virtual stream {} is not activated!", name()); + auto status = m_core_op_activated_event->wait(std::chrono::milliseconds(0)); + CHECK(HAILO_TIMEOUT != status, HAILO_NETWORK_GROUP_NOT_ACTIVATED, + "Trying to write to vstream {} before its network group is activated", name()); } - CHECK_SUCCESS(status, "Unable to set NMS IoU threshold in {}", name()); - return HAILO_SUCCESS; + assert(1 == m_entry_element->sinks().size()); + auto status = m_entry_element->sinks()[0].run_push(PipelineBuffer(buffer)); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { + LOGGER__INFO("Sending to VStream was shutdown!"); + status = m_pipeline_status->load(); + } + if (HAILO_STREAM_ABORT == status) { + LOGGER__INFO("Sending to VStream was aborted!"); + return HAILO_STREAM_ABORT; + } + return status; } -hailo_status OutputVStreamImpl::set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) +hailo_status InputVStreamImpl::flush() { - auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element - for (auto &elem : m_pipeline) { - auto elem_status = elem->set_nms_max_proposals_per_class(max_proposals_per_class); - if (HAILO_SUCCESS == elem_status) { - status = elem_status; // 1 element is enough to call this setter successful - } + assert(1 == m_entry_element->sinks().size()); + auto status = m_entry_element->sinks()[0].run_push(PipelineBuffer(PipelineBuffer::Type::FLUSH)); + if (HAILO_STREAM_ABORT == status) { + LOGGER__INFO("Sending to VStream was aborted!"); + return HAILO_STREAM_ABORT; } - CHECK_SUCCESS(status, "Unable to set NMS max proposals per class in {}", name()); + CHECK_SUCCESS(status); - // Update vstream info - m_vstream_info.nms_shape.max_bboxes_per_class = max_proposals_per_class; + status = m_entry_element->flush(); + CHECK_SUCCESS(status); return HAILO_SUCCESS; } +bool InputVStreamImpl::is_multi_planar() const +{ + return m_is_multi_planar; +} + #ifdef HAILO_SUPPORT_MULTI_PROCESS -Expected> OutputVStreamClient::create(const VStreamIdentifier &&identifier) +Expected> InputVStreamClient::create(VStreamIdentifier &&identifier) { grpc::ChannelArguments ch_args; ch_args.SetMaxReceiveMessageSize(-1); @@ -2275,132 +837,152 @@ Expected> OutputVStreamClient::create(const auto client = make_unique_nothrow(channel); CHECK_AS_EXPECTED(client != nullptr, HAILO_OUT_OF_HOST_MEMORY); - auto user_buffer_format = client->OutputVStream_get_user_buffer_format(identifier); + auto user_buffer_format = client->InputVStream_get_user_buffer_format(identifier); CHECK_EXPECTED(user_buffer_format); - auto info = client->OutputVStream_get_info(identifier); - CHECK_EXPECTED(info); + auto vstream_info = client->InputVStream_get_info(identifier); + CHECK_EXPECTED(vstream_info); - return std::shared_ptr(new OutputVStreamClient(std::move(client), std::move(identifier), - user_buffer_format.release(), info.release())); + return std::shared_ptr(new InputVStreamClient(std::move(client), std::move(identifier), + user_buffer_format.release(), vstream_info.release())); } -OutputVStreamClient::OutputVStreamClient(std::unique_ptr client, const VStreamIdentifier &&identifier, hailo_format_t &&user_buffer_format, +InputVStreamClient::InputVStreamClient(std::unique_ptr client, VStreamIdentifier &&identifier, hailo_format_t &&user_buffer_format, hailo_vstream_info_t &&info) : m_client(std::move(client)), m_identifier(std::move(identifier)), m_user_buffer_format(user_buffer_format), m_info(info) {} -OutputVStreamClient::~OutputVStreamClient() +InputVStreamClient::~InputVStreamClient() { - auto reply = m_client->OutputVStream_release(m_identifier, OsUtils::get_curr_pid()); + auto reply = m_client->InputVStream_release(m_identifier, OsUtils::get_curr_pid()); if (reply != HAILO_SUCCESS) { - LOGGER__CRITICAL("OutputVStream_release failed!"); + LOGGER__CRITICAL("InputVStream_release failed!"); } } -hailo_status OutputVStreamClient::read(MemoryView buffer) +hailo_status InputVStreamClient::write(const MemoryView &buffer) { - return m_client->OutputVStream_read(m_identifier, buffer); + return m_client->InputVStream_write(m_identifier, buffer); } -hailo_status OutputVStreamClient::abort() +hailo_status InputVStreamClient::write(const hailo_pix_buffer_t &buffer) +{ + return m_client->InputVStream_write(m_identifier, buffer); +} + +hailo_status InputVStreamClient::flush() +{ + return m_client->InputVStream_flush(m_identifier); +} + +bool InputVStreamClient::is_multi_planar() const +{ + auto is_multi_planar_exp = m_client->InputVStream_is_multi_planar(m_identifier); + if (!is_multi_planar_exp) { + LOGGER__CRITICAL("InputVStream_is_multi_planar failed with status={}", is_multi_planar_exp.status()); + return true; + } + return is_multi_planar_exp.release(); +} + +hailo_status InputVStreamClient::abort() { auto expected_client = HailoRtRpcClientUtils::create_client(); CHECK_EXPECTED_AS_STATUS(expected_client); auto abort_client = expected_client.release(); - return abort_client->OutputVStream_abort(m_identifier); + return abort_client->InputVStream_abort(m_identifier); } -hailo_status OutputVStreamClient::resume() +hailo_status InputVStreamClient::resume() { - return m_client->OutputVStream_resume(m_identifier); + return m_client->InputVStream_resume(m_identifier); } -hailo_status OutputVStreamClient::stop_and_clear() +hailo_status InputVStreamClient::stop_and_clear() { auto expected_client = HailoRtRpcClientUtils::create_client(); CHECK_EXPECTED_AS_STATUS(expected_client); auto stop_and_clear_client = expected_client.release(); - return stop_and_clear_client->OutputVStream_stop_and_clear(m_identifier); + return stop_and_clear_client->InputVStream_stop_and_clear(m_identifier); } -hailo_status OutputVStreamClient::start_vstream() +hailo_status InputVStreamClient::start_vstream() { auto expected_client = HailoRtRpcClientUtils::create_client(); CHECK_EXPECTED_AS_STATUS(expected_client); auto start_vstream_client = expected_client.release(); - return start_vstream_client->OutputVStream_start_vstream(m_identifier); + return start_vstream_client->InputVStream_start_vstream(m_identifier); } -size_t OutputVStreamClient::get_frame_size() const +size_t InputVStreamClient::get_frame_size() const { - auto frame_size = m_client->OutputVStream_get_frame_size(m_identifier); + auto frame_size = m_client->InputVStream_get_frame_size(m_identifier); if (!frame_size) { - LOGGER__CRITICAL("OutputVStream_get_frame_size failed with status={}", frame_size.status()); + LOGGER__CRITICAL("InputVStream_get_frame_size failed with status={}", frame_size.status()); return 0; } return frame_size.release(); } -const hailo_vstream_info_t &OutputVStreamClient::get_info() const +const hailo_vstream_info_t &InputVStreamClient::get_info() const { return m_info; } -const hailo_format_t &OutputVStreamClient::get_user_buffer_format() const +const hailo_format_t &InputVStreamClient::get_user_buffer_format() const { return m_user_buffer_format; } -std::string OutputVStreamClient::name() const +std::string InputVStreamClient::name() const { - auto expected_name = m_client->OutputVStream_name(m_identifier); + auto expected_name = m_client->InputVStream_name(m_identifier); if (!expected_name) { - LOGGER__CRITICAL("OutputVStream_name failed with status={}", expected_name.status()); + LOGGER__CRITICAL("InputVStream_name failed with status={}", expected_name.status()); return ""; } return expected_name.release(); } -std::string OutputVStreamClient::network_name() const +std::string InputVStreamClient::network_name() const { - auto expected_name = m_client->OutputVStream_network_name(m_identifier); + auto expected_name = m_client->InputVStream_network_name(m_identifier); if (!expected_name) { - LOGGER__CRITICAL("OutputVStream_name failed with status={}", expected_name.status()); + LOGGER__CRITICAL("InputVStream_name failed with status={}", expected_name.status()); return ""; } return expected_name.release(); } -const std::map &OutputVStreamClient::get_fps_accumulators() const +const std::map &InputVStreamClient::get_fps_accumulators() const { - LOGGER__ERROR("OutputVStream::get_fps_accumulators function is not supported when using multi-process service"); + LOGGER__ERROR("InputVStream::get_fps_accumulators function is not supported when using multi-process service"); return m_fps_accumulators; } -const std::map &OutputVStreamClient::get_latency_accumulators() const +const std::map &InputVStreamClient::get_latency_accumulators() const { - LOGGER__ERROR("OutputVStream::get_latency_accumulators functoin is not supported when using multi-process service"); + LOGGER__ERROR("InputVStream::get_latency_accumulators function is not supported when using multi-process service"); return m_latency_accumulators; } -const std::map> &OutputVStreamClient::get_queue_size_accumulators() const +const std::map> &InputVStreamClient::get_queue_size_accumulators() const { - LOGGER__ERROR("OutputVStream::get_queue_size_accumulators function is not supported when using multi-process service"); + LOGGER__ERROR("InputVStream::get_queue_size_accumulators function is not supported when using multi-process service"); return m_queue_size_accumulators; } -AccumulatorPtr OutputVStreamClient::get_pipeline_latency_accumulator() const +AccumulatorPtr InputVStreamClient::get_pipeline_latency_accumulator() const { - LOGGER__ERROR("OutputVStream::get_pipeline_latency_accumulator function is not supported when using multi-process service"); + LOGGER__ERROR("InputVStream::get_pipeline_latency_accumulator function is not supported when using multi-process service"); return m_pipeline_latency_accumulator; } -const std::vector> &OutputVStreamClient::get_pipeline() const +const std::vector> &InputVStreamClient::get_pipeline() const { - LOGGER__ERROR("OutputVStream::get_pipeline function is not supported when using multi-process service"); + LOGGER__ERROR("InputVStream::get_pipeline function is not supported when using multi-process service"); return m_pipeline; } -hailo_status OutputVStreamClient::create_client() +hailo_status InputVStreamClient::create_client() { auto expected_client = HailoRtRpcClientUtils::create_client(); CHECK_EXPECTED_AS_STATUS(expected_client); @@ -2408,831 +990,448 @@ hailo_status OutputVStreamClient::create_client() return HAILO_SUCCESS; } -hailo_status OutputVStreamClient::before_fork() +hailo_status InputVStreamClient::before_fork() { m_client.reset(); return HAILO_SUCCESS; } -hailo_status OutputVStreamClient::after_fork_in_parent() +hailo_status InputVStreamClient::after_fork_in_parent() { return create_client(); } -hailo_status OutputVStreamClient::after_fork_in_child() +hailo_status InputVStreamClient::after_fork_in_child() { return create_client(); } -bool OutputVStreamClient::is_aborted() +bool InputVStreamClient::is_aborted() { - auto is_aborted_exp = m_client->OutputVStream_is_aborted(m_identifier); + auto is_aborted_exp = m_client->InputVStream_is_aborted(m_identifier); if (!is_aborted_exp) { - LOGGER__CRITICAL("OutputVStream_is_aborted failed with status={}", is_aborted_exp.status()); + LOGGER__CRITICAL("InputVStream_is_aborted failed with status={}", is_aborted_exp.status()); return true; } return is_aborted_exp.release(); } -hailo_status OutputVStreamClient::set_nms_score_threshold(float32_t threshold) -{ - auto expected_client = HailoRtRpcClientUtils::create_client(); - CHECK_EXPECTED_AS_STATUS(expected_client); - auto vstream_client = expected_client.release(); - - CHECK_SUCCESS(vstream_client->OutputVStream_set_nms_score_threshold(m_identifier, threshold)); - - return HAILO_SUCCESS; -} - -hailo_status OutputVStreamClient::set_nms_iou_threshold(float32_t threshold) -{ - auto expected_client = HailoRtRpcClientUtils::create_client(); - CHECK_EXPECTED_AS_STATUS(expected_client); - auto vstream_client = expected_client.release(); - - CHECK_SUCCESS(vstream_client->OutputVStream_set_nms_iou_threshold(m_identifier, threshold)); - - return HAILO_SUCCESS; -} - -hailo_status OutputVStreamClient::set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) -{ - auto expected_client = HailoRtRpcClientUtils::create_client(); - CHECK_EXPECTED_AS_STATUS(expected_client); - auto vstream_client = expected_client.release(); - - CHECK_SUCCESS(vstream_client->OutputVStream_set_nms_max_proposals_per_class(m_identifier, max_proposals_per_class)); - m_info.nms_shape.max_bboxes_per_class = max_proposals_per_class; - - return HAILO_SUCCESS; -} - #endif // HAILO_SUPPORT_MULTI_PROCESS -Expected> HwReadElement::create(std::shared_ptr stream, const std::string &name, std::chrono::milliseconds timeout, - size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, - std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction) -{ - auto buffer_pool = BufferPool::create(stream->get_frame_size(), buffer_pool_size, shutdown_event, elem_flags, vstream_flags); - CHECK_EXPECTED(buffer_pool, "Failed creating BufferPool for {}", name); - - // On HwReadElement the stream always owns the buffer, hence, we set the mode explicitly. - auto status = stream->set_buffer_mode(StreamBufferMode::OWNING); - CHECK_SUCCESS_AS_EXPECTED(status); - - auto duration_collector = DurationCollector::create(elem_flags); - CHECK_EXPECTED(duration_collector); - - auto hw_read_elem_ptr = make_shared_nothrow(stream, buffer_pool.release(), name, timeout, - duration_collector.release(), shutdown_event, std::move(pipeline_status), pipeline_direction); - CHECK_AS_EXPECTED(nullptr != hw_read_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); - - LOGGER__INFO("Created {}", hw_read_elem_ptr->name()); - - return hw_read_elem_ptr; -} - -HwReadElement::HwReadElement(std::shared_ptr stream, BufferPoolPtr buffer_pool, const std::string &name, - std::chrono::milliseconds timeout, DurationCollector &&duration_collector, - EventPtr shutdown_event, std::shared_ptr> &&pipeline_status, - PipelineDirection pipeline_direction) : - SourceElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, nullptr), - m_stream(stream), - m_pool(buffer_pool), - m_timeout(timeout), - m_shutdown_event(shutdown_event), - m_activation_wait_or_shutdown(stream->get_core_op_activated_event(), shutdown_event) -{} - -uint32_t HwReadElement::get_invalid_frames_count() -{ - return m_stream->get_invalid_frames_count(); -} - -std::string HwReadElement::description() const -{ - std::stringstream element_description; - element_description << "(" << this->name() << " | hw_frame_size: " << m_stream->get_info().hw_frame_size << ")"; - - return element_description.str(); -} - -hailo_status HwReadElement::execute_post_deactivate(bool should_clear_abort) -{ - if (should_clear_abort) { - auto status = execute_clear_abort(); - CHECK(((HAILO_SUCCESS == status) || (HAILO_STREAM_NOT_ACTIVATED == status)), status, - "Failed to clear abort stream in {}", name()); - } - return HAILO_SUCCESS; -} - -hailo_status HwReadElement::execute_clear() -{ - return HAILO_SUCCESS; -} - -hailo_status HwReadElement::execute_flush() -{ - return HAILO_INVALID_OPERATION; -} - -hailo_status HwReadElement::execute_abort() -{ - return m_stream->abort_impl(); -} - -hailo_status HwReadElement::execute_clear_abort() -{ - return m_stream->clear_abort_impl(); -} - -hailo_status HwReadElement::execute_wait_for_finish() -{ - return HAILO_SUCCESS; -} - -std::vector HwReadElement::get_queue_size_accumulators() +std::string InputVStreamInternal::get_pipeline_description() const { - if (nullptr == m_pool->get_queue_size_accumulator()) { - return std::vector(); + std::stringstream pipeline_str; + pipeline_str << "Input pipeline '" << name() << "': "; + for (const auto &element : m_pipeline) { + pipeline_str << element->description() << " >> "; } - return {m_pool->get_queue_size_accumulator()}; -} - -void HwReadElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) -{ - LOGGER__ERROR("run_push_async is not supported for {}", name()); - assert(false); + pipeline_str << "HW"; + return pipeline_str.str(); } -hailo_status HwReadElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) +Expected> OutputVStreamInternal::create(const hailo_vstream_info_t &vstream_info, + const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, + std::shared_ptr pipeline_entry, std::vector> &&pipeline, + std::shared_ptr> &&pipeline_status, + EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator) { - return HAILO_INVALID_OPERATION; + auto vstream = OutputVStreamImpl::create(vstream_info, quant_infos, vstream_params, pipeline_entry, + std::move(pipeline), std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator); + CHECK_EXPECTED(vstream); + auto vstream_ptr = std::shared_ptr(vstream.release()); + return vstream_ptr; } -Expected HwReadElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/) +OutputVStreamInternal::OutputVStreamInternal(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, + const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, + std::vector> &&pipeline, std::shared_ptr> &&pipeline_status, + AccumulatorPtr pipeline_latency_accumulator, EventPtr core_op_activated_event, hailo_status &output_status) : + BaseVStream(vstream_info, quant_infos, vstream_params, pipeline_entry, std::move(pipeline), std::move(pipeline_status), + pipeline_latency_accumulator, std::move(core_op_activated_event), output_status) { - auto buffer = m_pool->get_available_buffer(std::move(optional), m_timeout); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { - return make_unexpected(buffer.status()); - } - CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status()); - - while (true) { - if (!m_stream->is_scheduled()) { - auto status = m_activation_wait_or_shutdown.wait(m_timeout); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { - return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); - } - if (HAILO_TIMEOUT == status) { - return make_unexpected(HAILO_NETWORK_GROUP_NOT_ACTIVATED); - } - CHECK_SUCCESS_AS_EXPECTED(status); - } else { - auto status = m_activation_wait_or_shutdown.wait(std::chrono::milliseconds(0)); - if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { - return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); - } - } - - MemoryView buffer_view(buffer.value().as_view()); - m_duration_collector.start_measurement(); - auto status = m_stream->read(buffer_view); - if (HAILO_INVALID_FRAME == status) { - m_stream->increase_invalid_frames_count(1); - status = HAILO_SUCCESS; - } - if (HAILO_STREAM_NOT_ACTIVATED == status) { - // Try again - continue; - } - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("Reading from stream was aborted!"); - return make_unexpected(HAILO_STREAM_ABORTED_BY_USER); - } - CHECK_SUCCESS_AS_EXPECTED(status, "{} (D2H) failed with status={}", name(), status); - m_duration_collector.complete_measurement(); - - return buffer.release(); - } + // Reversing the order of pipeline-elements, for the destruction flow to work in the right order (from user-side to hw-side) + std::reverse(m_pipeline.begin(), m_pipeline.end()); } -hailo_status HwReadElement::execute_activate() +Expected> OutputVStreamImpl::create(const hailo_vstream_info_t &vstream_info, + const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, + std::shared_ptr pipeline_entry, std::vector> &&pipeline, + std::shared_ptr> &&pipeline_status, + EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator) { - return HAILO_SUCCESS; -} + hailo_status status = HAILO_UNINITIALIZED; -hailo_status HwReadElement::execute_deactivate() -{ - auto signal_shutdown_status = m_shutdown_event->signal(); - if (HAILO_SUCCESS != signal_shutdown_status) { - LOGGER__ERROR("Signaling {} shutdown event failed with {}", name(), signal_shutdown_status); - } + CHECK_AS_EXPECTED(1 == pipeline_entry->sources().size(), HAILO_INVALID_ARGUMENT, + "OutputVStream's entry element is expected to have one source"); - auto abort_status = execute_abort(); - if ((HAILO_SUCCESS != abort_status) && (HAILO_STREAM_NOT_ACTIVATED != abort_status)) { - LOGGER__ERROR("Abort {} failed with {}", name(), abort_status); - return abort_status; + if (nullptr != pipeline_latency_accumulator) { + pipeline_entry->sources()[0].set_pull_complete_callback([pipeline_latency_accumulator](const PipelineBuffer::Metadata& metadata) { + const auto duration_sec = std::chrono::duration_cast>( + std::chrono::steady_clock::now() - metadata.get_start_time()).count(); + pipeline_latency_accumulator->add_data_point(duration_sec); + }); } - return signal_shutdown_status; + auto vstream_ptr = std::shared_ptr(new OutputVStreamImpl(vstream_info, quant_infos, vstream_params, std::move(pipeline_entry), + std::move(pipeline), std::move(pipeline_status), pipeline_latency_accumulator, std::move(core_op_activated_event), status)); + CHECK_SUCCESS_AS_EXPECTED(status, "Failed to create virtual stream"); + + return vstream_ptr; } -Expected> HwWriteElement::create(std::shared_ptr stream, const std::string &name, - hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction) +std::string OutputVStreamInternal::get_pipeline_description() const { - auto duration_collector = DurationCollector::create(elem_flags); - CHECK_EXPECTED(duration_collector); - - auto got_flush_event = Event::create_shared(Event::State::not_signalled); - CHECK_EXPECTED(got_flush_event); + // We save elements in a reverse order for destruction order, so we reverse again befor printing. + std::vector> reversed_pipeline; + std::reverse_copy(m_pipeline.begin(), m_pipeline.end(), std::back_inserter(reversed_pipeline)); - // On HwWriteElement the stream always owns the buffer, hence, we set the mode explicitly. - auto status = stream->set_buffer_mode(StreamBufferMode::OWNING); - CHECK_SUCCESS_AS_EXPECTED(status); - - auto hw_write_elem_ptr = make_shared_nothrow(stream, name, - duration_collector.release(), std::move(pipeline_status), got_flush_event.release(), pipeline_direction); - CHECK_AS_EXPECTED(nullptr != hw_write_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + std::stringstream pipeline_str; + pipeline_str << "Output pipeline '" << name() << "': HW"; + for (const auto &element : reversed_pipeline) { + pipeline_str << " >> " << element->description(); + } + return pipeline_str.str(); +} - LOGGER__INFO("Created {}", hw_write_elem_ptr->name()); +OutputVStreamImpl::OutputVStreamImpl(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, + const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, std::vector> &&pipeline, + std::shared_ptr> &&pipeline_status, AccumulatorPtr pipeline_latency_accumulator, + EventPtr core_op_activated_event, hailo_status &output_status) : + OutputVStreamInternal(vstream_info, quant_infos, vstream_params, pipeline_entry, std::move(pipeline), std::move(pipeline_status), + pipeline_latency_accumulator, std::move(core_op_activated_event), output_status) +{ + if (HAILO_SUCCESS != output_status) { + return; + } - return hw_write_elem_ptr; + LOGGER__INFO("Creating {}...", name()); } -HwWriteElement::HwWriteElement(std::shared_ptr stream, const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, EventPtr got_flush_event, PipelineDirection pipeline_direction) : - SinkElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, nullptr), - m_stream(stream), m_got_flush_event(got_flush_event) -{} - -Expected HwWriteElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/) +OutputVStreamImpl::~OutputVStreamImpl() { - return make_unexpected(HAILO_INVALID_OPERATION); + (void)stop_vstream(); } -hailo_status HwWriteElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/) +hailo_status OutputVStreamImpl::read(MemoryView buffer) { - if (PipelineBuffer::Type::FLUSH == buffer.get_type()) { - hailo_status flush_status = m_stream->flush(); - if (HAILO_STREAM_ABORTED_BY_USER == flush_status) { - LOGGER__INFO("Failed flushing input stream {} because stream was aborted", m_stream->to_string()); - } else if (HAILO_SUCCESS != flush_status) { - LOGGER__ERROR("flush has failed in {} with status {}", name(), flush_status); + if (nullptr != m_core_op_activated_event) { + CHECK(m_is_activated, HAILO_VSTREAM_PIPELINE_NOT_ACTIVATED, "read() failed! Virtual stream {} is not activated!", name()); + auto status = m_core_op_activated_event->wait(std::chrono::milliseconds(0)); + if (HAILO_TIMEOUT == status) { + LOGGER__INFO("Trying to read from vstream {} before its network_group is activated", name()); + return HAILO_NETWORK_GROUP_NOT_ACTIVATED; } - hailo_status status = m_got_flush_event->signal(); CHECK_SUCCESS(status); - return HAILO_SUCCESS; } - m_duration_collector.start_measurement(); - const auto status = m_stream->write(MemoryView(buffer.data(), buffer.size())); - m_duration_collector.complete_measurement(); - - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("Failed to send on input stream {} because stream was aborted", m_stream->to_string()); - return HAILO_STREAM_ABORTED_BY_USER; + assert(1 == m_entry_element->sources().size()); + auto recv_buffer = m_entry_element->sources()[0].run_pull(PipelineBuffer(buffer, [](hailo_status){}, HAILO_SUCCESS, false, nullptr, m_measure_pipeline_latency)); + auto status = recv_buffer.status(); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { + LOGGER__INFO("Receiving to VStream was shutdown!"); + status = m_pipeline_status->load(); } - CHECK_SUCCESS(status, "{} (H2D) failed with status={}", name(), status); - - return HAILO_SUCCESS; -} - -void HwWriteElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) -{ - LOGGER__ERROR("run_push_async is not supported for {}", name()); - assert(false); -} -hailo_status HwWriteElement::execute_activate() -{ - return HAILO_SUCCESS; + return status; } -hailo_status HwWriteElement::execute_deactivate() +hailo_status OutputVStreamImpl::set_nms_score_threshold(float32_t threshold) { - // The flush operation will block until all buffers currently in the pipeline will be processed. - // We assume that no buffers are sent after the call for deactivate. - hailo_status flush_status = m_stream->flush(); - if (HAILO_STREAM_ABORTED_BY_USER == flush_status) { - LOGGER__INFO("Failed flushing input stream {} because stream was aborted", m_stream->to_string()); - return HAILO_SUCCESS; - } else if (HAILO_STREAM_NOT_ACTIVATED == flush_status) { - LOGGER__INFO("Failed flushing input stream {} because stream is not activated", m_stream->to_string()); - return HAILO_SUCCESS; - } else if (HAILO_SUCCESS != flush_status) { - LOGGER__ERROR("flush has failed in {} with status {}", name(), flush_status); + auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element + for (auto &elem : m_pipeline) { + auto elem_status = elem->set_nms_score_threshold(threshold); + if (HAILO_SUCCESS == elem_status) { + status = elem_status; // 1 element is enough to call this setter successful + } } + CHECK_SUCCESS(status, "Unable to set NMS score threshold in {}", name()); - auto abort_status = execute_abort(); - CHECK(((abort_status == HAILO_SUCCESS) || (abort_status == HAILO_STREAM_NOT_ACTIVATED)), abort_status, - "Failed to abort stream in {}", name()); return HAILO_SUCCESS; } -hailo_status HwWriteElement::execute_post_deactivate(bool should_clear_abort) +hailo_status OutputVStreamImpl::set_nms_iou_threshold(float32_t threshold) { - if (should_clear_abort) { - auto status = execute_clear_abort(); - CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status, - "Failed to clear abort stream in {}", name()); + auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element + for (auto &elem : m_pipeline) { + auto elem_status = elem->set_nms_iou_threshold(threshold); + if (HAILO_SUCCESS == elem_status) { + status = elem_status; // 1 element is enough to call this setter successful + } } - return HAILO_SUCCESS; -} + CHECK_SUCCESS(status, "Unable to set NMS IoU threshold in {}", name()); -hailo_status HwWriteElement::execute_clear() -{ return HAILO_SUCCESS; } -hailo_status HwWriteElement::execute_flush() +hailo_status OutputVStreamImpl::set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) { - hailo_status status = m_got_flush_event->wait(m_stream->get_timeout()); - CHECK_SUCCESS(status); - - status = m_got_flush_event->reset(); - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} + auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element + std::shared_ptr user_buffer_queue_element = nullptr; + for (auto &elem : m_pipeline) { + if (nullptr != std::dynamic_pointer_cast(elem)) { + user_buffer_queue_element = std::dynamic_pointer_cast(elem); + } -hailo_status HwWriteElement::execute_abort() -{ - return m_stream->abort_impl(); -} + auto elem_status = elem->set_nms_max_proposals_per_class(max_proposals_per_class); + if (HAILO_SUCCESS == elem_status) { + status = elem_status; // 1 element is enough to call this setter successful -hailo_status HwWriteElement::execute_clear_abort() -{ - return m_stream->clear_abort_impl(); -} + // Update vstream info and frame size + m_vstream_info.nms_shape.max_bboxes_per_class = max_proposals_per_class; + auto set_buffer_size_status = user_buffer_queue_element->set_buffer_pool_buffer_size(HailoRTCommon::get_frame_size(m_vstream_info, + m_vstream_params.user_buffer_format)); + CHECK_SUCCESS(set_buffer_size_status, "Failed to update buffer size in {}", name()); + } + } + CHECK_SUCCESS(status, "Unable to set NMS max proposals per class in {}", name()); -hailo_status HwWriteElement::execute_wait_for_finish() -{ return HAILO_SUCCESS; } -std::string HwWriteElement::description() const -{ - std::stringstream element_description; - element_description << "(" << this->name() << " | hw_frame_size: " << m_stream->get_info().hw_frame_size << ")"; - - return element_description.str(); -} - -Expected> LastAsyncElement::create(const std::string &name, - hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, - std::shared_ptr async_pipeline, PipelineDirection pipeline_direction) +hailo_status OutputVStreamImpl::set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size) { - auto duration_collector = DurationCollector::create(elem_flags); - CHECK_EXPECTED(duration_collector); + auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element + std::shared_ptr user_buffer_queue_element = nullptr; + for (auto &elem : m_pipeline) { + if (nullptr != std::dynamic_pointer_cast(elem)) { + user_buffer_queue_element = std::dynamic_pointer_cast(elem); + } - auto last_async_elem_ptr = make_shared_nothrow(name, - duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline); - CHECK_NOT_NULL_AS_EXPECTED(last_async_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + auto elem_status = elem->set_nms_max_accumulated_mask_size(max_accumulated_mask_size); + if (HAILO_SUCCESS == elem_status) { + status = elem_status; // 1 element is enough to call this setter successful - LOGGER__INFO("Created {}", last_async_elem_ptr->name()); + // Update vstream info and frame size + m_vstream_info.nms_shape.max_accumulated_mask_size = max_accumulated_mask_size; + auto set_buffer_size_status = user_buffer_queue_element->set_buffer_pool_buffer_size(HailoRTCommon::get_frame_size(m_vstream_info, + m_vstream_params.user_buffer_format)); + CHECK_SUCCESS(set_buffer_size_status, "Failed to update buffer size in {}", name()); + } + } + CHECK_SUCCESS(status, "Unable to set NMS max accumulated mask size in {}", name()); - return last_async_elem_ptr; -} -Expected> LastAsyncElement::create(const std::string &name, - const ElementBuildParams &build_params, std::shared_ptr async_pipeline, PipelineDirection pipeline_direction) -{ - return LastAsyncElement::create(name, build_params.elem_stats_flags, - build_params.pipeline_status, async_pipeline, pipeline_direction); + return HAILO_SUCCESS; } -LastAsyncElement::LastAsyncElement(const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline): - SinkElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline) -{} - -Expected LastAsyncElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/) +#ifdef HAILO_SUPPORT_MULTI_PROCESS +Expected> OutputVStreamClient::create(const VStreamIdentifier &&identifier) { - return make_unexpected(HAILO_INVALID_OPERATION); -} + grpc::ChannelArguments ch_args; + ch_args.SetMaxReceiveMessageSize(-1); + auto channel = grpc::CreateCustomChannel(hailort::HAILORT_SERVICE_ADDRESS, grpc::InsecureChannelCredentials(), ch_args); + CHECK_AS_EXPECTED(channel != nullptr, HAILO_INTERNAL_FAILURE); -hailo_status LastAsyncElement::run_push(PipelineBuffer &&/*optional*/, const PipelinePad &/*sink*/) -{ - return HAILO_INVALID_OPERATION; -} + auto client = make_unique_nothrow(channel); + CHECK_AS_EXPECTED(client != nullptr, HAILO_OUT_OF_HOST_MEMORY); -void LastAsyncElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/) -{ - auto exec_done_cb = buffer.get_exec_done_cb(); - exec_done_cb(buffer.action_status()); -} + auto user_buffer_format = client->OutputVStream_get_user_buffer_format(identifier); + CHECK_EXPECTED(user_buffer_format); -std::string LastAsyncElement::description() const -{ - std::stringstream element_description; - element_description << "(" << this->name() << ")"; + auto info = client->OutputVStream_get_info(identifier); + CHECK_EXPECTED(info); - return element_description.str(); + return std::shared_ptr(new OutputVStreamClient(std::move(client), std::move(identifier), + user_buffer_format.release(), info.release())); } -hailo_status LastAsyncElement::execute_activate() -{ - return HAILO_SUCCESS; -} +OutputVStreamClient::OutputVStreamClient(std::unique_ptr client, const VStreamIdentifier &&identifier, hailo_format_t &&user_buffer_format, + hailo_vstream_info_t &&info) : + m_client(std::move(client)), m_identifier(std::move(identifier)), m_user_buffer_format(user_buffer_format), m_info(info) {} -hailo_status LastAsyncElement::execute_wait_for_finish() +OutputVStreamClient::~OutputVStreamClient() { - return HAILO_SUCCESS; + auto reply = m_client->OutputVStream_release(m_identifier, OsUtils::get_curr_pid()); + if (reply != HAILO_SUCCESS) { + LOGGER__CRITICAL("OutputVStream_release failed!"); + } } -hailo_status LastAsyncElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) +hailo_status OutputVStreamClient::read(MemoryView buffer) { - (void)source_name; - return m_sinks[0].prev()->element().enqueue_execution_buffer(mem_view, exec_done, m_sinks[0].prev()->name()); + return m_client->OutputVStream_read(m_identifier, buffer); } -Expected LastAsyncElement::can_push_buffer_upstream(const uint32_t /*source_index*/) +hailo_status OutputVStreamClient::abort() { - auto source_index = m_sinks[0].prev()->element().get_source_index_from_source_name(m_sinks[0].prev()->name()); - CHECK_EXPECTED(source_index); - return m_sinks[0].prev()->element().can_push_buffer_upstream(*source_index); + auto expected_client = HailoRtRpcClientUtils::create_client(); + CHECK_EXPECTED_AS_STATUS(expected_client); + auto abort_client = expected_client.release(); + return abort_client->OutputVStream_abort(m_identifier); } -hailo_status LastAsyncElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t /*source_index*/) +hailo_status OutputVStreamClient::resume() { - auto source_index = m_sinks[0].prev()->element().get_source_index_from_source_name(m_sinks[0].prev()->name()); - CHECK_EXPECTED_AS_STATUS(source_index); - return m_sinks[0].prev()->element().fill_buffer_pool(is_dma_able, num_of_buffers, *source_index); + return m_client->OutputVStream_resume(m_identifier); } -Expected LastAsyncElement::can_push_buffer_upstream(const std::string &/*source_name*/) +hailo_status OutputVStreamClient::stop_and_clear() { - return m_sinks[0].prev()->element().can_push_buffer_upstream(m_sinks[0].prev()->name()); -} + auto expected_client = HailoRtRpcClientUtils::create_client(); + CHECK_EXPECTED_AS_STATUS(expected_client); + auto stop_and_clear_client = expected_client.release(); -hailo_status LastAsyncElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &/*source_name*/) -{ - return m_sinks[0].prev()->element().fill_buffer_pool(is_dma_able, num_of_buffers, m_sinks[0].prev()->name()); + return stop_and_clear_client->OutputVStream_stop_and_clear(m_identifier); } -Expected> AsyncHwElement::create(const std::unordered_map &named_stream_infos, - std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags, - hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, const std::string &name, - std::shared_ptr> pipeline_status, std::shared_ptr net_group, - PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr async_pipeline) +hailo_status OutputVStreamClient::start_vstream() { - std::vector output_streams_pools; - for (const auto &stream_info_pair : named_stream_infos) { - if (HAILO_D2H_STREAM == stream_info_pair.second.direction) { - auto buffer_pool = BufferPool::create(stream_info_pair.second.hw_frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, - is_last_copy_element); - CHECK_EXPECTED(buffer_pool); - output_streams_pools.emplace_back(buffer_pool.release()); - } - } - - auto duration_collector = DurationCollector::create(elem_flags); - CHECK_EXPECTED(duration_collector); - - auto min_buffer_pool_size = net_group->get_min_buffer_pool_size(); - CHECK_EXPECTED(min_buffer_pool_size); - - auto elem_ptr = make_shared_nothrow(named_stream_infos, timeout, std::move(output_streams_pools), name, - duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline, net_group, - min_buffer_pool_size.release()); - CHECK_AS_EXPECTED(nullptr != elem_ptr, HAILO_OUT_OF_HOST_MEMORY); - - LOGGER__INFO("Created {}", elem_ptr->name()); + auto expected_client = HailoRtRpcClientUtils::create_client(); + CHECK_EXPECTED_AS_STATUS(expected_client); + auto start_vstream_client = expected_client.release(); - return elem_ptr; + return start_vstream_client->OutputVStream_start_vstream(m_identifier); } -AsyncHwElement::AsyncHwElement(const std::unordered_map &named_stream_infos, std::chrono::milliseconds timeout, - std::vector &&output_streams_pools, const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, PipelineDirection pipeline_direction, - std::shared_ptr async_pipeline, std::shared_ptr net_group, - const size_t max_ongoing_transfers) : - PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline), - m_timeout(timeout), - m_pools(std::move(output_streams_pools)), - m_net_group(net_group), - m_max_ongoing_transfers(max_ongoing_transfers) +size_t OutputVStreamClient::get_frame_size() const { - uint32_t sinks_count = 0; - uint32_t sources_count = 0; - for (const auto &stream_info_pair : named_stream_infos) { - if (HAILO_D2H_STREAM == stream_info_pair.second.direction) { - m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE); - const auto &source_name = m_sources[sources_count++].name(); - m_source_name_to_stream_name[source_name] = stream_info_pair.first; - - m_source_name_to_index[source_name] = static_cast(m_sources.size() - 1); - } else { - m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK); - const auto &sink_name = m_sinks[sinks_count++].name(); - m_sink_name_to_stream_name[sink_name] = stream_info_pair.first; - m_sink_name_to_index[sink_name] = static_cast(m_sinks.size() - 1); - m_sink_has_arrived[sink_name] = false; - } + auto frame_size = m_client->OutputVStream_get_frame_size(m_identifier); + if (!frame_size) { + LOGGER__CRITICAL("OutputVStream_get_frame_size failed with status={}", frame_size.status()); + return 0; } + return frame_size.release(); } -bool AsyncHwElement::has_all_sinks_arrived() +const hailo_vstream_info_t &OutputVStreamClient::get_info() const { - for (const auto ¤t_sink : m_sink_has_arrived) { - if (!current_sink.second) { - return false; - } - } - return true; + return m_info; } -// This func overides the regular dataflow of this element and calls all next elements run_push_async directly -// (normally, the run_push_async of the next elements will be called by the LL async read_done) -void AsyncHwElement::handle_error_in_hw_async_elem(hailo_status error_status) +const hailo_format_t &OutputVStreamClient::get_user_buffer_format() const { - for (auto &name_output_stream_pair : m_source_name_to_index) { - auto source_index = name_output_stream_pair.second; - assert(source_index < m_pools.size()); - assert(source_index < m_sources.size()); - auto expected_buffer = m_pools[source_index]->acquire_buffer_ptr(m_timeout); - if (HAILO_SUCCESS == expected_buffer.status()) { - expected_buffer->get()->set_action_status(error_status); - m_sources[source_index].next()->run_push_async(std::move(*expected_buffer.value())); - } else { - m_sources[source_index].next()->run_push_async(PipelineBuffer(error_status)); - } - } - - for (const auto &sink : m_sinks) { - m_sink_has_arrived[sink.name()] = false; - } - m_input_buffers.clear(); - - return; + return m_user_buffer_format; } -void AsyncHwElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) +std::string OutputVStreamClient::name() const { - assert(contains(m_sink_name_to_stream_name, sink.name())); - - std::unique_lock lock(m_mutex); - m_sink_has_arrived[sink.name()] = true; - m_input_buffers[sink.name()] = std::move(buffer); - - if (has_all_sinks_arrived()) { - hailo_status all_buffers_status = HAILO_SUCCESS; - for (auto &input_buffer : m_input_buffers) { - if (HAILO_SUCCESS != input_buffer.second.action_status()) { - all_buffers_status = input_buffer.second.action_status(); - break; // error from one buffer is enough - } - } - - if (HAILO_SUCCESS != all_buffers_status) { - handle_error_in_hw_async_elem(all_buffers_status); - // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again - lock.unlock(); - m_cv.notify_all(); - } else { - std::unordered_map> source_name_to_output_buffer; - for (auto &name_to_index_pair : m_source_name_to_index) { - auto expected_buffer = m_pools[name_to_index_pair.second]->acquire_buffer_ptr(m_timeout); - if (HAILO_SUCCESS != expected_buffer.status()) { - handle_non_recoverable_async_error(expected_buffer.status()); - m_input_buffers.clear(); - // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again - lock.unlock(); - m_cv.notify_all(); - return; - } - source_name_to_output_buffer[name_to_index_pair.first] = expected_buffer.release(); - } - - NamedBuffersCallbacks named_buffers_callbacks; - - for (auto &input_buffer : m_input_buffers) { - const auto &stream_name = m_sink_name_to_stream_name.at(input_buffer.first); - named_buffers_callbacks.emplace(stream_name, std::make_pair(input_buffer.second.as_view(), input_buffer.second.get_exec_done_cb())); - } - - for (auto &output_buffer : source_name_to_output_buffer) { - const auto &stream_name = m_source_name_to_stream_name.at(output_buffer.first); - named_buffers_callbacks.emplace(stream_name, std::make_pair(output_buffer.second->as_view(), - [this, buffer = output_buffer.second, source_name = output_buffer.first](hailo_status status){ - buffer->set_action_status(status); - if (HAILO_SUCCESS == m_pipeline_status->load()) { - assert(contains(m_source_name_to_index, source_name)); - // If pipeline_status is not success, someone already handled this error and no reason for this buffer to be pushed - assert(contains(m_source_name_to_index, source_name)); - m_sources[m_source_name_to_index[source_name]].next()->run_push_async(std::move(*buffer)); - } - })); - } - - auto done_cb = [](hailo_status){}; - auto status = m_net_group->wait_for_callbacks_to_maintain_below_threshold(m_max_ongoing_transfers); - if (HAILO_SUCCESS != status ) { - handle_non_recoverable_async_error(status); - } - - status = m_net_group->infer_async(named_buffers_callbacks, done_cb); - if (HAILO_SUCCESS != status ) { - handle_non_recoverable_async_error(status); - } - - for (const auto &curr_sink : m_sinks) { - m_sink_has_arrived[curr_sink.name()] = false; - } - m_input_buffers.clear(); - - // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again - lock.unlock(); - m_cv.notify_all(); - } - } else { - bool done = m_cv.wait_for(lock, m_timeout, [&](){ - if (m_pipeline_status->load() != HAILO_SUCCESS) { - return true; // so we can exit this flow - } - return !m_sink_has_arrived[sink.name()]; - }); - - if (!done) { - LOGGER__ERROR("Waiting for other threads in AsyncHwElement {} has reached a timeout (timeout={}ms)", name(), m_timeout.count()); - handle_non_recoverable_async_error(HAILO_TIMEOUT); - } - - if (m_pipeline_status->load() == HAILO_STREAM_ABORTED_BY_USER) { - lock.unlock(); - m_cv.notify_all(); - } + auto expected_name = m_client->OutputVStream_name(m_identifier); + if (!expected_name) { + LOGGER__CRITICAL("OutputVStream_name failed with status={}", expected_name.status()); + return ""; } + return expected_name.release(); } -hailo_status AsyncHwElement::run_push(PipelineBuffer &&/*optional*/, const PipelinePad &/*sink*/) -{ - return HAILO_INVALID_OPERATION; -} - -hailo_status AsyncHwElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) +std::string OutputVStreamClient::network_name() const { - CHECK(contains(m_source_name_to_index, source_name), HAILO_INTERNAL_FAILURE); - auto source_index = m_source_name_to_index[source_name]; - - auto status = m_pools[source_index]->enqueue_buffer(mem_view, exec_done); - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; + auto expected_name = m_client->OutputVStream_network_name(m_identifier); + if (!expected_name) { + LOGGER__CRITICAL("OutputVStream_name failed with status={}", expected_name.status()); + return ""; + } + return expected_name.release(); } -hailo_status AsyncHwElement::execute_dequeue_user_buffers(hailo_status error_status) +const std::map &OutputVStreamClient::get_fps_accumulators() const { - for (auto pool : m_pools) { - auto status = empty_buffer_pool(pool, error_status, m_timeout); - CHECK_SUCCESS(status); - } - return PipelineElement::execute_dequeue_user_buffers(error_status); + LOGGER__ERROR("OutputVStream::get_fps_accumulators function is not supported when using multi-process service"); + return m_fps_accumulators; } - -Expected AsyncHwElement::can_push_buffer_upstream(const uint32_t source_index) +const std::map &OutputVStreamClient::get_latency_accumulators() const { - CHECK_AS_EXPECTED(source_index < m_pools.size(), HAILO_NOT_FOUND); - return !m_pools[source_index]->is_full(); + LOGGER__ERROR("OutputVStream::get_latency_accumulators functoin is not supported when using multi-process service"); + return m_latency_accumulators; } -hailo_status AsyncHwElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) +const std::map> &OutputVStreamClient::get_queue_size_accumulators() const { - CHECK(source_index < m_pools.size(), HAILO_NOT_FOUND); - CHECK_SUCCESS(m_pools[source_index]->allocate_buffers(is_dma_able, num_of_buffers)); - - return HAILO_SUCCESS; + LOGGER__ERROR("OutputVStream::get_queue_size_accumulators function is not supported when using multi-process service"); + return m_queue_size_accumulators; } - -Expected AsyncHwElement::can_push_buffer_upstream(const std::string &source_name) +AccumulatorPtr OutputVStreamClient::get_pipeline_latency_accumulator() const { - auto source_index = get_source_index_from_source_name(source_name); - CHECK_EXPECTED(source_index); - return can_push_buffer_upstream(*source_index); + LOGGER__ERROR("OutputVStream::get_pipeline_latency_accumulator function is not supported when using multi-process service"); + return m_pipeline_latency_accumulator; } - -hailo_status AsyncHwElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) +const std::vector> &OutputVStreamClient::get_pipeline() const { - auto source_index = get_source_index_from_source_name(source_name); - CHECK_EXPECTED_AS_STATUS(source_index); - return fill_buffer_pool(is_dma_able, num_of_buffers, *source_index); + LOGGER__ERROR("OutputVStream::get_pipeline function is not supported when using multi-process service"); + return m_pipeline; } -Expected AsyncHwElement::get_source_index_from_output_stream_name(const std::string &output_stream_name) +hailo_status OutputVStreamClient::create_client() { - for (const auto &name_pair : m_source_name_to_stream_name) { - if (name_pair.second == output_stream_name) { - assert(contains(m_source_name_to_index, name_pair.first)); - uint32_t ret_val = m_source_name_to_index.at(name_pair.first); - return ret_val; - } - } - return make_unexpected(HAILO_NOT_FOUND); + auto expected_client = HailoRtRpcClientUtils::create_client(); + CHECK_EXPECTED_AS_STATUS(expected_client); + m_client = expected_client.release(); + return HAILO_SUCCESS; } -Expected AsyncHwElement::get_source_index_from_source_name(const std::string &source_name) +hailo_status OutputVStreamClient::before_fork() { - CHECK_AS_EXPECTED(contains(m_source_name_to_index, source_name), HAILO_NOT_FOUND, "couldnt find src '{}'", source_name); - auto ret_val = m_source_name_to_index.at(source_name); - return ret_val; + m_client.reset(); + return HAILO_SUCCESS; } -Expected AsyncHwElement::get_sink_index_from_input_stream_name(const std::string &input_stream_name) +hailo_status OutputVStreamClient::after_fork_in_parent() { - for (const auto &name_pair : m_sink_name_to_stream_name) { - if (name_pair.second == input_stream_name) { - return Expected(m_sink_name_to_index.at(name_pair.first)); - } - } - return make_unexpected(HAILO_INVALID_ARGUMENT); + return create_client(); } -Expected AsyncHwElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/) +hailo_status OutputVStreamClient::after_fork_in_child() { - return make_unexpected(HAILO_NOT_IMPLEMENTED); + return create_client(); } -std::vector AsyncHwElement::execution_pads() +bool OutputVStreamClient::is_aborted() { - std::vector result; - result.reserve(m_sources.size()); - for (auto& pad : m_sources) { - result.push_back(pad.next()); + auto is_aborted_exp = m_client->OutputVStream_is_aborted(m_identifier); + if (!is_aborted_exp) { + LOGGER__CRITICAL("OutputVStream_is_aborted failed with status={}", is_aborted_exp.status()); + return true; } - return result; + return is_aborted_exp.release(); } -hailo_status AsyncHwElement::execute_terminate(hailo_status error_status) +hailo_status OutputVStreamClient::set_nms_score_threshold(float32_t threshold) { - if (m_is_terminated) { - return HAILO_SUCCESS; - } - - if (!m_is_terminating_element) { - { - // There is a case where the other thread is halted (via context switch) before the wait_for() function, - // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout. - // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here. - std::unique_lock lock(m_mutex); - } - m_cv.notify_all(); - } + auto expected_client = HailoRtRpcClientUtils::create_client(); + CHECK_EXPECTED_AS_STATUS(expected_client); + auto vstream_client = expected_client.release(); - // Checking success of shutdown is best effort (terminate should be called even if shutdown fails) - auto shutdown_status = m_net_group->shutdown(); - auto wait_for_callbacks_finish_status = m_net_group->wait_for_callbacks_finish(); - auto terminate_status = PipelineElement::execute_terminate(error_status); - CHECK_SUCCESS(shutdown_status); - CHECK_SUCCESS(wait_for_callbacks_finish_status); - CHECK_SUCCESS(terminate_status); + CHECK_SUCCESS(vstream_client->OutputVStream_set_nms_score_threshold(m_identifier, threshold)); return HAILO_SUCCESS; } -Expected> CopyBufferElement::create(const std::string &name, - std::shared_ptr> pipeline_status, std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, - std::shared_ptr async_pipeline) +hailo_status OutputVStreamClient::set_nms_iou_threshold(float32_t threshold) { - auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE); - CHECK_EXPECTED(duration_collector); - auto elem_ptr = make_shared_nothrow(name, duration_collector.release(), std::move(pipeline_status), - timeout, pipeline_direction, async_pipeline); - CHECK_AS_EXPECTED(nullptr != elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + auto expected_client = HailoRtRpcClientUtils::create_client(); + CHECK_EXPECTED_AS_STATUS(expected_client); + auto vstream_client = expected_client.release(); - LOGGER__INFO("Created {}", elem_ptr->name()); + CHECK_SUCCESS(vstream_client->OutputVStream_set_nms_iou_threshold(m_identifier, threshold)); - return elem_ptr; + return HAILO_SUCCESS; } -CopyBufferElement::CopyBufferElement(const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> pipeline_status, std::chrono::milliseconds timeout, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline) : - FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, nullptr, timeout, async_pipeline) -{} - -PipelinePad &CopyBufferElement::next_pad() +hailo_status OutputVStreamClient::set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) { - if (PipelineDirection::PUSH == m_pipeline_direction){ - return *m_sources[0].next(); - } - return *m_sinks[0].prev(); + auto expected_client = HailoRtRpcClientUtils::create_client(); + CHECK_EXPECTED_AS_STATUS(expected_client); + auto vstream_client = expected_client.release(); + + CHECK_SUCCESS(vstream_client->OutputVStream_set_nms_max_proposals_per_class(m_identifier, max_proposals_per_class)); + m_info.nms_shape.max_bboxes_per_class = max_proposals_per_class; + + return HAILO_SUCCESS; } -Expected CopyBufferElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +hailo_status OutputVStreamClient::set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size) { - CHECK_AS_EXPECTED(optional, HAILO_INVALID_ARGUMENT, "Optional buffer must be passed to CopyBufferElement!"); + auto expected_client = HailoRtRpcClientUtils::create_client(); + CHECK_EXPECTED_AS_STATUS(expected_client); + auto vstream_client = expected_client.release(); - CHECK_AS_EXPECTED(optional.size() == input.size(), HAILO_INVALID_ARGUMENT, "Optional buffer size does not equal to the input buffer size!"); - memcpy(optional.data(), input.data(), optional.size()); + CHECK_SUCCESS(vstream_client->OutputVStream_set_nms_max_accumulated_mask_size(m_identifier, max_accumulated_mask_size)); + m_info.nms_shape.max_accumulated_mask_size = max_accumulated_mask_size; - return std::move(optional); + return HAILO_SUCCESS; } +#endif // HAILO_SUPPORT_MULTI_PROCESS + Expected, std::vector>> VStreamsBuilder::create_vstreams( ConfiguredNetworkGroup &net_group, bool /*unused*/, hailo_format_type_t format_type, const std::string &network_name) @@ -3273,15 +1472,6 @@ Expected, std::vector>> VStre expected_all_inputs.release(), expected_all_outputs.release()); } -static hailo_vstream_params_t expand_vstream_params_autos(const hailo_stream_info_t &stream_info, - const hailo_vstream_params_t &vstream_params) -{ - auto local_vstream_params = vstream_params; - local_vstream_params.user_buffer_format = HailoRTDefaults::expand_auto_format(vstream_params.user_buffer_format, - stream_info.format); - return local_vstream_params; -} - Expected> VStreamsBuilder::create_input_vstreams(ConfiguredNetworkGroup &net_group, const std::map &inputs_params) { @@ -3294,1201 +1484,4 @@ Expected> VStreamsBuilder::create_output_vstreams(Con return net_group.create_output_vstreams(outputs_params); } -Expected> VStreamsBuilderUtils::create_inputs( - std::vector> input_streams, const hailo_vstream_info_t &vstream_info, - const hailo_vstream_params_t &vstream_params) -{ - CHECK_AS_EXPECTED(!input_streams.empty(), HAILO_INVALID_ARGUMENT, "input streams can't be empty"); - // if input streams has more than 1 value, it will be handled by handle_pix_buffer_splitter_flow. For all other purposes, - // assuming there is only 1 stream is valid - std::shared_ptr input_stream = input_streams.front(); - - // TODO (HRT-4522): Support this measurement - CHECK_AS_EXPECTED(!(vstream_params.vstream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED, - "Pipeline FPS statistics measurement is not implemented"); - - std::vector> elements; - std::vector vstreams; - - EventPtr core_op_activated_event = nullptr; - if (!input_stream->is_scheduled()) { - core_op_activated_event = input_stream->get_core_op_activated_event(); - } - - auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled); - CHECK_EXPECTED(shutdown_event_exp); - auto shutdown_event = shutdown_event_exp.release(); - - auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); - CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); - - auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params); - CHECK_EXPECTED(pipeline_latency_accumulator); - - auto user_timeout = std::chrono::milliseconds(vstream_params.timeout_ms); - - if (input_streams.size() > 1) { - CHECK_SUCCESS_AS_EXPECTED(handle_pix_buffer_splitter_flow(input_streams, vstream_info, - std::move(elements), vstreams, vstream_params, shutdown_event, pipeline_status, core_op_activated_event, - pipeline_latency_accumulator.value())); - } else { - auto hw_write_elem = HwWriteElement::create(input_stream, - PipelineObject::create_element_name("HwWriteElement", input_stream->name(), input_stream->get_info().index), - vstream_params.pipeline_elements_stats_flags, pipeline_status); - CHECK_EXPECTED(hw_write_elem); - elements.insert(elements.begin(), hw_write_elem.value()); - - auto should_transform = InputTransformContext::is_transformation_required(input_stream->get_info().shape, - vstream_params.user_buffer_format, input_stream->get_info().hw_shape, input_stream->get_info().format, - input_stream->get_quant_infos()); - CHECK_EXPECTED(should_transform); - - if (should_transform.value()) { - std::shared_ptr elem_after_post_infer = hw_write_elem.value(); - auto queue_elem = PushQueueElement::create( - PipelineObject::create_element_name("PushQueueElement", input_stream->get_info().name, input_stream->get_info().index), - vstream_params, shutdown_event, pipeline_status); - CHECK_EXPECTED(queue_elem); - elements.insert(elements.begin(), queue_elem.value()); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(queue_elem.value(), hw_write_elem.value())); - - auto pre_infer_elem = PreInferElement::create(input_stream->get_info().shape, vstream_params.user_buffer_format, - input_stream->get_info().hw_shape, input_stream->get_info().format, input_stream->get_quant_infos(), - PipelineObject::create_element_name("PreInferElement", input_stream->get_info().name, input_stream->get_info().index), - vstream_params, shutdown_event, pipeline_status); - CHECK_EXPECTED(pre_infer_elem); - elements.insert(elements.begin(), pre_infer_elem.value()); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_infer_elem.value(), queue_elem.value())); - - input_stream->set_timeout(user_timeout); - auto vstream = InputVStream::create(vstream_info, input_stream->get_quant_infos(), vstream_params, pre_infer_elem.release(), hw_write_elem.release(), std::move(elements), - std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release()); - CHECK_EXPECTED(vstream); - vstreams.emplace_back(vstream.release()); - } else { - input_stream->set_timeout(user_timeout); - auto vstream = InputVStream::create(vstream_info, input_stream->get_quant_infos(), vstream_params, hw_write_elem.value(), hw_write_elem.value(), std::move(elements), - std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release()); - CHECK_EXPECTED(vstream); - vstreams.emplace_back(vstream.release()); - } - } - - for (const auto &vstream : vstreams) { - LOGGER__INFO("{}", vstream.get_pipeline_description()); - } - - return vstreams; -} - -Expected> VStreamsBuilderUtils::create_outputs(std::shared_ptr output_stream, - NameToVStreamParamsMap &vstreams_params_map, const std::map &output_vstream_infos) -{ - std::vector> elements; - std::vector vstreams; - - if (0 != (HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_stream->get_info().format.flags)) - { - LOGGER__WARNING("Using legacy implementation of Argmax in host. Please re-compile your model with latest DFC version"); - } - - EventPtr core_op_activated_event = nullptr; - if (!output_stream->is_scheduled()) { - core_op_activated_event = output_stream->get_core_op_activated_event(); - } - - auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled); - CHECK_EXPECTED(shutdown_event_exp); - auto shutdown_event = shutdown_event_exp.release(); - - auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); - CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); - - assert(!vstreams_params_map.empty()); - - // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the - // pipeline_elements_stats_flags for the hw_read_element as bitwise or of all the flags. - hailo_pipeline_elem_stats_flags_t hw_read_element_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE; - hailo_vstream_stats_flags_t hw_read_stream_stats_flags = HAILO_VSTREAM_STATS_NONE; - size_t buffer_pool_size = 0; - for (const auto &elem_name_params : vstreams_params_map) { - hw_read_element_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags; - hw_read_stream_stats_flags |= elem_name_params.second.vstream_stats_flags; - buffer_pool_size += elem_name_params.second.queue_size; - } - - // TODO (HRT-4522): Support this measurement - CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED, - "Pipeline FPS statistics measurement is not implemented"); - - auto hw_read_element = add_hw_read_element(output_stream, pipeline_status, elements, "HwReadElement", shutdown_event, - buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags); - CHECK_EXPECTED(hw_read_element); - - if (output_stream->get_info().is_mux) { - hailo_status status = add_demux(output_stream, vstreams_params_map, std::move(elements), vstreams, hw_read_element.value(), - shutdown_event, pipeline_status, output_vstream_infos); - CHECK_SUCCESS_AS_EXPECTED(status); - } else { - auto vstream_info = output_vstream_infos.find(output_stream->name()); - CHECK_AS_EXPECTED(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND, - "Failed to find vstream info of {}", output_stream->name()); - assert(1 == vstreams_params_map.size()); - auto vstream_params = expand_vstream_params_autos(output_stream->get_info(), vstreams_params_map.begin()->second); - - auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params); - CHECK_EXPECTED(pipeline_latency_accumulator); - - auto should_transform = OutputTransformContext::is_transformation_required(output_stream->get_info().hw_shape, - output_stream->get_info().format, output_stream->get_info().shape, - vstream_params.user_buffer_format, output_stream->get_quant_infos()); - CHECK_EXPECTED(should_transform); - - if (should_transform.value()) { - auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_hw_read", - shutdown_event, vstream_params); - CHECK_EXPECTED(hw_read_queue_element); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value())); - - auto post_infer_element = add_post_infer_element(output_stream, pipeline_status, elements, - "PostInferElement", vstream_params, shutdown_event); - CHECK_EXPECTED(post_infer_element); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), post_infer_element.value())); - auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements, - "UserBufferQueueElement", shutdown_event, vstream_params); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), user_buffer_queue_element.value())); - output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); - hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); - auto vstream = OutputVStream::create(vstream_info->second, output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements), - std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release()); - CHECK_EXPECTED(vstream); - vstreams.emplace_back(vstream.release()); - } else { - output_stream->set_timeout(std::chrono::milliseconds(vstream_params.timeout_ms)); - auto vstream = OutputVStream::create(vstream_info->second, output_stream->get_quant_infos(), vstream_params, hw_read_element.release(), std::move(elements), - std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release()); - CHECK_EXPECTED(vstream); - vstreams.emplace_back(vstream.release()); - } - } - - for (const auto &vstream : vstreams) { - LOGGER__INFO("{}", vstream.get_pipeline_description()); - } - - return vstreams; -} - -Expected> VStreamsBuilderUtils::create_output_post_process_iou(std::shared_ptr output_stream, - hailo_vstream_params_t vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata) -{ - std::vector> elements; - std::vector vstreams; - - EventPtr core_op_activated_event = nullptr; - if (!output_stream->is_scheduled()) { - core_op_activated_event = output_stream->get_core_op_activated_event(); - } - - auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled); - CHECK_AS_EXPECTED(shutdown_event_exp, HAILO_OUT_OF_HOST_MEMORY); - auto shutdown_event = shutdown_event_exp.release(); - - auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); - CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); - - vstream_params.user_buffer_format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(vstream_params.user_buffer_format, - iou_op_metadata->type()); - - auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params); - CHECK_EXPECTED(pipeline_latency_accumulator); - - auto hw_read_element = add_hw_read_element(output_stream, pipeline_status, elements, "HwReadElement", shutdown_event, - vstream_params.queue_size, vstream_params.pipeline_elements_stats_flags, vstream_params.vstream_stats_flags); - CHECK_EXPECTED(hw_read_element); - - auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_hw_read", - shutdown_event, vstream_params); - CHECK_EXPECTED(hw_read_queue_element); - hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value())); - - auto post_infer_element = add_post_infer_element(output_stream, pipeline_status, elements, - "PostInferElement", vstream_params, shutdown_event); - CHECK_EXPECTED(post_infer_element); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), post_infer_element.value())); - - auto pre_nms_convert_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_pre_nms_convert", - shutdown_event, vstream_params); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), pre_nms_convert_queue_element.value())); - - auto nms_to_detections_element = add_nms_to_detections_convert_element(output_stream, pipeline_status, elements, "NmsFormatToDetectionsElement", - vstream_params, iou_op_metadata, vstream_params.queue_size, std::chrono::milliseconds(HAILO_INFINITE), vstream_params.vstream_stats_flags, shutdown_event); - CHECK_EXPECTED(nms_to_detections_element); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_nms_convert_queue_element.value(), nms_to_detections_element.value())); - - auto pre_remove_overlapping_bboxes_element_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_pre_bboxes_removing", - shutdown_event, vstream_params); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(nms_to_detections_element.value(), pre_remove_overlapping_bboxes_element_queue_element.value())); - - auto remove_overlapping_bboxes_element = add_remove_overlapping_bboxes_element(output_stream, pipeline_status, elements, "RemoveOverlappingBboxesElement", - vstream_params, iou_op_metadata, vstream_params.queue_size, std::chrono::milliseconds(HAILO_INFINITE), vstream_params.vstream_stats_flags, shutdown_event); - CHECK_EXPECTED(remove_overlapping_bboxes_element); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_remove_overlapping_bboxes_element_queue_element.value(), remove_overlapping_bboxes_element.value())); - - auto pre_fill_nms_format_element_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_pre_fill_nms_format", - shutdown_event, vstream_params); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(remove_overlapping_bboxes_element.value(), pre_fill_nms_format_element_queue_element.value())); - - auto fill_nms_format_element = add_fill_nms_format_element(output_stream, pipeline_status, elements, "FillNmsFormatElement", - vstream_params, iou_op_metadata, vstream_params.queue_size, std::chrono::milliseconds(HAILO_INFINITE), vstream_params.vstream_stats_flags, shutdown_event); - CHECK_EXPECTED(fill_nms_format_element); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_fill_nms_format_element_queue_element.value(), fill_nms_format_element.value())); - - auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements, - "UserBufferQueueElement", shutdown_event, vstream_params); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(fill_nms_format_element.value(), user_buffer_queue_element.value())); - output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); - - auto output_vstream_info = iou_op_metadata->get_output_vstream_info(); - CHECK_EXPECTED(output_vstream_info); - - auto vstream = OutputVStream::create(output_vstream_info.value(), output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements), - std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release()); - CHECK_EXPECTED(vstream); - vstreams.emplace_back(vstream.release()); - - for (const auto &curr_vstream : vstreams) { - LOGGER__INFO("{}", curr_vstream.get_pipeline_description()); - } - - return vstreams; -} - -Expected> VStreamsBuilderUtils::create_output_post_process_softmax(std::shared_ptr output_stream, - const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info, - const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata) -{ - std::vector> elements; - std::vector vstreams; - - EventPtr core_op_activated_event = nullptr; - if (!output_stream->is_scheduled()) { - core_op_activated_event = output_stream->get_core_op_activated_event(); - } - - auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled); - CHECK_EXPECTED(shutdown_event_exp); - auto shutdown_event = shutdown_event_exp.release(); - - auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); - CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); - - assert(!vstreams_params_map.empty()); - - // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the - // pipeline_elements_stats_flags for the hw_read_element as bitwise or of all the flags. - hailo_pipeline_elem_stats_flags_t hw_read_element_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE; - hailo_vstream_stats_flags_t hw_read_stream_stats_flags = HAILO_VSTREAM_STATS_NONE; - size_t buffer_pool_size = 0; - for (const auto &elem_name_params : vstreams_params_map) { - hw_read_element_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags; - hw_read_stream_stats_flags |= elem_name_params.second.vstream_stats_flags; - buffer_pool_size += elem_name_params.second.queue_size; - } - - // TODO (HRT-4522): Support this measurement - CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED, - "Pipeline FPS statistics measurement is not implemented"); - - assert(1 == vstreams_params_map.size()); - auto op_input_format = softmax_op_metadata->inputs_metadata().begin()->second.format; - auto vstream_params = vstreams_params_map.begin()->second; - vstream_params.user_buffer_format = net_flow::SoftmaxOpMetadata::expand_output_format_autos(vstream_params.user_buffer_format, op_input_format); - - auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params); - CHECK_EXPECTED(pipeline_latency_accumulator); - - auto hw_read_element = add_hw_read_element(output_stream, pipeline_status, elements, "HwReadElement", shutdown_event, - buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags); - CHECK_EXPECTED(hw_read_element); - - auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_hw_read", - shutdown_event, vstream_params); - CHECK_EXPECTED(hw_read_queue_element); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value())); - - auto post_infer_element = add_post_infer_element(output_stream, pipeline_status, elements, - "PostInferElement", vstream_params, shutdown_event); - CHECK_EXPECTED(post_infer_element); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), post_infer_element.value())); - - auto pre_softmax_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_pre_softmax", - shutdown_event, vstream_params); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), pre_softmax_queue_element.value())); - - auto softmax_element = add_softmax_element(output_stream, pipeline_status, elements, "SoftmaxPostProcessElement", - vstream_params, softmax_op_metadata, buffer_pool_size, std::chrono::milliseconds(HAILO_INFINITE), hw_read_stream_stats_flags, shutdown_event); - CHECK_EXPECTED(softmax_element); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_softmax_queue_element.value(), softmax_element.value())); - auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements, - "UserBufferQueueElement", shutdown_event, vstream_params); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(softmax_element.value(), user_buffer_queue_element.value())); - output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); - hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); - - auto vstream = OutputVStream::create(output_vstream_info, output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements), - std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release()); - CHECK_EXPECTED(vstream); - vstreams.emplace_back(vstream.release()); - - for (const auto &curr_vstream : vstreams) { - LOGGER__INFO("{}", curr_vstream.get_pipeline_description()); - } - - return vstreams; -} - -InputVStream VStreamsBuilderUtils::create_input(std::shared_ptr input_vstream) -{ - return InputVStream(std::move(input_vstream)); -} - -OutputVStream VStreamsBuilderUtils::create_output(std::shared_ptr output_vstream) -{ - return OutputVStream(std::move(output_vstream)); -} - -static bool are_formats_equal(const hailo_format_t &format1, const hailo_format_t &format2) { - return ((format1.order == format2.order) && (format1.flags == format2.flags) && (format1.type == format2.type)); -} - -Expected> VStreamsBuilderUtils::create_output_vstreams_from_streams(const OutputStreamWithParamsVector &all_output_streams, - OutputStreamPtrVector &output_streams, const hailo_vstream_params_t &vstream_params, - const std::unordered_map &post_process_ops_metadata, - const std::unordered_map &op_inputs_to_op_name, const std::map &output_vstream_infos_map) -{ - auto first_stream_info = output_streams[0]->get_info(); - if ((HailoRTCommon::is_nms(first_stream_info)) && (first_stream_info.nms_info.is_defused)) { - // Case defuse NMS - return create_output_nms(output_streams, vstream_params, output_vstream_infos_map); - } else if (contains(op_inputs_to_op_name, static_cast(first_stream_info.name))) { - // Case post-process on host - auto &op_name = op_inputs_to_op_name.at(first_stream_info.name); - auto &op_metadata = post_process_ops_metadata.at(op_name); - switch (op_metadata->type()) { - case net_flow::OperationType::YOLOX: - case net_flow::OperationType::YOLOV8: - case net_flow::OperationType::SSD: - case net_flow::OperationType::YOLOV5: - case net_flow::OperationType::YOLOV5SEG: - case net_flow::OperationType::IOU: - { - assert(1 <= op_metadata->outputs_metadata().size()); - auto updated_outputs_metadata = op_metadata->outputs_metadata(); - updated_outputs_metadata.begin()->second.format = - net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(vstream_params.user_buffer_format, op_metadata->type()); - op_metadata->set_outputs_metadata(updated_outputs_metadata); - CHECK_SUCCESS_AS_EXPECTED(op_metadata->validate_format_info()); - - std::shared_ptr op; - switch (op_metadata->type()) { - case (net_flow::OperationType::YOLOX): - { - auto metadata = std::dynamic_pointer_cast(op_metadata); - assert(nullptr != metadata); - auto op_expected = net_flow::YOLOXPostProcessOp::create(metadata); - CHECK_EXPECTED(op_expected); - op = op_expected.release(); - break; - } - case (net_flow::OperationType::YOLOV8): - { - auto metadata = std::dynamic_pointer_cast(op_metadata); - assert(nullptr != metadata); - auto op_expected = net_flow::YOLOV8PostProcessOp::create(metadata); - CHECK_EXPECTED(op_expected); - op = op_expected.release(); - break; - } - case (net_flow::OperationType::YOLOV5): - { - auto metadata = std::dynamic_pointer_cast(op_metadata); - assert(nullptr != metadata); - auto op_expected = net_flow::YOLOv5PostProcessOp::create(metadata); - CHECK_EXPECTED(op_expected); - op = op_expected.release(); - break; - } - case (net_flow::OperationType::YOLOV5SEG): - { - auto metadata = std::dynamic_pointer_cast(op_metadata); - assert(nullptr != metadata); - auto op_expected = net_flow::Yolov5SegPostProcess::create(metadata); - CHECK_EXPECTED(op_expected); - op = op_expected.release(); - break; - } - case (net_flow::OperationType::SSD): - { - auto metadata = std::dynamic_pointer_cast(op_metadata); - assert(nullptr != metadata); - auto op_expected = net_flow::SSDPostProcessOp::create(metadata); - CHECK_EXPECTED(op_expected); - op = op_expected.release(); - break; - } - case (net_flow::OperationType::IOU): - { - return create_output_post_process_iou(output_streams[0], vstream_params, op_metadata); - } - default: - break; - } - - return create_output_post_process_nms(output_streams, vstream_params, output_vstream_infos_map, op); - } - - case net_flow::OperationType::ARGMAX: - { - assert(output_streams.size() == 1); - NameToVStreamParamsMap name_to_vstream_params_map; - for (auto &output_stream : all_output_streams) { - if (output_stream.first->get_info().name == output_streams[0]->get_info().name) { - for (auto &vstream : output_stream.second) { - name_to_vstream_params_map.insert(vstream); - } - } - } - auto output_vstream_info = op_metadata->get_output_vstream_info(); - CHECK_EXPECTED(output_vstream_info); - return create_output_post_process_argmax(output_streams[0], name_to_vstream_params_map, output_vstream_info.release(), op_metadata); - } - - case net_flow::OperationType::SOFTMAX: - { - assert(output_streams.size() == 1); - NameToVStreamParamsMap name_to_vstream_params_map; - for (auto &output_stream : all_output_streams) { - if (output_stream.first->get_info().name == output_streams[0]->get_info().name) { - for (auto &vstream : output_stream.second) { - name_to_vstream_params_map.insert(vstream); - } - } - } - auto output_vstream_info = op_metadata->get_output_vstream_info(); - CHECK_EXPECTED(output_vstream_info); - return create_output_post_process_softmax(output_streams[0], name_to_vstream_params_map, output_vstream_info.release(), op_metadata); - } - - default: - LOGGER__ERROR("op type {} of op {} is not in any of the supported post process OP types", net_flow::OpMetadata::get_operation_type_str(op_metadata->type()), op_name); - return make_unexpected(HAILO_INVALID_OPERATION); - } - } else { - // All other cases - assert(output_streams.size() == 1); - NameToVStreamParamsMap name_to_vstream_params_map; - for (auto &output_stream : all_output_streams) { - if (output_stream.first->get_info().name == output_streams[0]->get_info().name) { - for (auto &vstream : output_stream.second) { - name_to_vstream_params_map.insert(vstream); - } - } - } - return create_outputs(output_streams[0], name_to_vstream_params_map, output_vstream_infos_map); - } -} - -Expected> VStreamsBuilderUtils::create_output_nms(OutputStreamPtrVector &output_streams, - hailo_vstream_params_t vstreams_params, - const std::map &output_vstream_infos) -{ - for (const auto &out_stream : output_streams) { - CHECK_AS_EXPECTED(are_formats_equal(output_streams[0]->get_info().format, out_stream->get_info().format), - HAILO_INVALID_ARGUMENT, "All nms streams of the same virtual output must have the same format"); - } - - auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled); - CHECK_EXPECTED(shutdown_event_exp); - auto shutdown_event = shutdown_event_exp.release(); - - auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); - CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); - - std::vector> elements; - std::vector vstreams; - - hailo_status status = add_nms_fuse(output_streams, vstreams_params, elements, vstreams, shutdown_event, - pipeline_status, output_vstream_infos); - CHECK_SUCCESS_AS_EXPECTED(status); - - for (const auto &vstream : vstreams) { - LOGGER__INFO("{}", vstream.get_pipeline_description()); - } - - return vstreams; -} - -Expected> VStreamsBuilderUtils::create_output_post_process_nms(OutputStreamPtrVector &output_streams, - hailo_vstream_params_t vstreams_params, - const std::map &output_vstream_infos, - const std::shared_ptr &nms_op) -{ - auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled); - CHECK_EXPECTED(shutdown_event_exp); - auto shutdown_event = shutdown_event_exp.release(); - - auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); - CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); - - std::vector> elements; - std::vector vstreams; - - hailo_status status = add_nms_post_process(output_streams, vstreams_params, elements, vstreams, shutdown_event, - pipeline_status, output_vstream_infos, nms_op); - CHECK_SUCCESS_AS_EXPECTED(status); - - for (const auto &vstream : vstreams) { - LOGGER__INFO("{}", vstream.get_pipeline_description()); - } - - return vstreams; -} - -Expected> VStreamsBuilderUtils::add_hw_read_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, EventPtr &shutdown_event, size_t buffer_pool_size, - const hailo_pipeline_elem_stats_flags_t &hw_read_element_stats_flags, const hailo_vstream_stats_flags_t &hw_read_stream_stats_flags) -{ - auto hw_read_elem = HwReadElement::create(output_stream, - PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), - HAILO_INFINITE_TIMEOUT, buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags, shutdown_event, pipeline_status); - CHECK_EXPECTED(hw_read_elem); - elements.push_back(hw_read_elem.value()); - return hw_read_elem; -} - -Expected> VStreamsBuilderUtils::add_pull_queue_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params) -{ - auto pull_queue_elem = PullQueueElement::create( - PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), - vstream_params, shutdown_event, pipeline_status); - CHECK_EXPECTED(pull_queue_elem); - elements.push_back(pull_queue_elem.value()); - return pull_queue_elem; -} - -Expected> VStreamsBuilderUtils::add_argmax_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata, - size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event) -{ - // Updating metadata according to user request. TODO: HRT-9737 - auto updated_outputs_metadata = argmax_op_metadata.get()->outputs_metadata(); - updated_outputs_metadata.begin()->second.format = vstream_params.user_buffer_format; - auto metadata = std::dynamic_pointer_cast(argmax_op_metadata); - assert(nullptr != metadata); - metadata->set_outputs_metadata(updated_outputs_metadata); - CHECK_SUCCESS_AS_EXPECTED(metadata->validate_format_info()); - // Updating metadata according to use request. TODO: HRT-9737 - End - - auto op_expected = net_flow::ArgmaxPostProcessOp::create(metadata); - CHECK_EXPECTED(op_expected); - auto argmax_op = op_expected.release(); - - auto argmax_element = ArgmaxPostProcessElement::create(argmax_op, - PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), - vstream_params.pipeline_elements_stats_flags, pipeline_status, buffer_pool_size, timeout, vstream_flags, shutdown_event); - CHECK_EXPECTED(argmax_element); - elements.push_back(argmax_element.value()); - return argmax_element; -} - -Expected> VStreamsBuilderUtils::add_softmax_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata, - size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event) -{ - // Updating metadata according to user request. TODO: HRT-9737 - // Currently softmax only supports inputs to be float32 and order NHWC or NC - auto updated_inputs_metadata = softmax_op_metadata.get()->inputs_metadata(); - updated_inputs_metadata.begin()->second.format = vstream_params.user_buffer_format; - auto updated_outputs_metadata = softmax_op_metadata.get()->outputs_metadata(); - updated_outputs_metadata.begin()->second.format = vstream_params.user_buffer_format; - auto metadata = std::dynamic_pointer_cast(softmax_op_metadata); - assert(nullptr != metadata); - metadata->set_outputs_metadata(updated_outputs_metadata); - metadata->set_inputs_metadata(updated_inputs_metadata); - CHECK_SUCCESS_AS_EXPECTED(metadata->validate_format_info()); - // Updating metadata according to use request. TODO: HRT-9737 - End - - auto op_expected = net_flow::SoftmaxPostProcessOp::create(metadata); - CHECK_EXPECTED(op_expected); - auto softmax_op = op_expected.release(); - auto softmax_element = SoftmaxPostProcessElement::create(softmax_op, - PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), - vstream_params.pipeline_elements_stats_flags, pipeline_status, buffer_pool_size, timeout, vstream_flags, shutdown_event); - CHECK_EXPECTED(softmax_element); - elements.push_back(softmax_element.value()); - return softmax_element; -} - -Expected> VStreamsBuilderUtils::add_nms_to_detections_convert_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &op_metadata, - size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event) -{ - auto metadata = std::dynamic_pointer_cast(op_metadata); - assert(nullptr != metadata); - - auto nms_to_detections_element = ConvertNmsToDetectionsElement::create(metadata->nms_info(), - PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), - vstream_params.pipeline_elements_stats_flags, pipeline_status, timeout, vstream_flags, shutdown_event, buffer_pool_size); - CHECK_EXPECTED(nms_to_detections_element); - elements.push_back(nms_to_detections_element.value()); - return nms_to_detections_element; -} - -Expected> VStreamsBuilderUtils::add_remove_overlapping_bboxes_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &op_metadata, - size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event) -{ - auto metadata = std::dynamic_pointer_cast(op_metadata); - assert(nullptr != metadata); - - auto remove_overlapping_bboxes_element = RemoveOverlappingBboxesElement::create(metadata->nms_config(), - PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), - vstream_params.pipeline_elements_stats_flags, pipeline_status, timeout, vstream_flags, shutdown_event, buffer_pool_size); - CHECK_EXPECTED(remove_overlapping_bboxes_element); - elements.push_back(remove_overlapping_bboxes_element.value()); - return remove_overlapping_bboxes_element; -} - -Expected> VStreamsBuilderUtils::add_fill_nms_format_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &op_metadata, - size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event) -{ - auto metadata = std::dynamic_pointer_cast(op_metadata); - assert(nullptr != metadata); - - auto fill_nms_format_element = FillNmsFormatElement::create(metadata->nms_info(), vstream_params.user_buffer_format, metadata->nms_config(), - PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), - vstream_params.pipeline_elements_stats_flags, pipeline_status, timeout, vstream_flags, shutdown_event, buffer_pool_size); - CHECK_EXPECTED(fill_nms_format_element); - elements.push_back(fill_nms_format_element.value()); - return fill_nms_format_element; -} - -Expected> VStreamsBuilderUtils::add_user_buffer_queue_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params) -{ - auto post_argmax_queue_element = UserBufferQueueElement::create( - PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), - vstream_params, shutdown_event, pipeline_status); - CHECK_EXPECTED(post_argmax_queue_element); - elements.push_back(post_argmax_queue_element.value()); - return post_argmax_queue_element; -} - -Expected> VStreamsBuilderUtils::add_post_infer_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event) -{ - auto post_infer_element = PostInferElement::create(output_stream->get_info().hw_shape, output_stream->get_info().format, - output_stream->get_info().shape, vstream_params.user_buffer_format, output_stream->get_quant_infos(), output_stream->get_info().nms_info, - PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), - vstream_params, pipeline_status, shutdown_event); - CHECK_EXPECTED(post_infer_element); - elements.push_back(post_infer_element.value()); - return post_infer_element; -} - -Expected> VStreamsBuilderUtils::create_output_post_process_argmax(std::shared_ptr output_stream, - const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info, - const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata) -{ - std::vector> elements; - std::vector vstreams; - - EventPtr core_op_activated_event = nullptr; - if (!output_stream->is_scheduled()) { - core_op_activated_event = output_stream->get_core_op_activated_event(); - } - - auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled); - CHECK_EXPECTED(shutdown_event_exp); - auto shutdown_event = shutdown_event_exp.release(); - - auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); - CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); - - assert(!vstreams_params_map.empty()); - - // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the - // pipeline_elements_stats_flags for the hw_read_element as bitwise or of all the flags. - hailo_pipeline_elem_stats_flags_t hw_read_element_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE; - hailo_vstream_stats_flags_t hw_read_stream_stats_flags = HAILO_VSTREAM_STATS_NONE; - size_t buffer_pool_size = 0; - for (const auto &elem_name_params : vstreams_params_map) { - hw_read_element_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags; - hw_read_stream_stats_flags |= elem_name_params.second.vstream_stats_flags; - buffer_pool_size += elem_name_params.second.queue_size; - } - - // TODO (HRT-4522): Support this measurement - CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED, - "Pipeline FPS statistics measurement is not implemented"); - - auto hw_read_element = add_hw_read_element(output_stream, pipeline_status, elements, "HwReadElement", shutdown_event, - buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags); - CHECK_EXPECTED(hw_read_element); - - assert(1 == vstreams_params_map.size()); - auto op_input_format = argmax_op_metadata->inputs_metadata().begin()->second.format; - auto vstream_params = vstreams_params_map.begin()->second; - vstream_params.user_buffer_format = net_flow::ArgmaxOpMetadata::expand_output_format_autos(vstream_params.user_buffer_format, op_input_format); - - auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_hw_read", - shutdown_event, vstream_params); - CHECK_EXPECTED(hw_read_queue_element); - - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value())); - - auto argmax_element = add_argmax_element(output_stream, pipeline_status, elements, "ArgmaxPostProcessElement", - vstream_params, argmax_op_metadata, buffer_pool_size, std::chrono::milliseconds(HAILO_INFINITE), hw_read_stream_stats_flags, shutdown_event); - CHECK_EXPECTED(argmax_element); - - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), argmax_element.value())); - - auto post_argmax_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements, - "UserBufferQueueElement_post_argmax", shutdown_event, vstream_params); - CHECK_EXPECTED(post_argmax_queue_element); - - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(argmax_element.value(), post_argmax_queue_element.value())); - - auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params); - CHECK_EXPECTED(pipeline_latency_accumulator); - - output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); - hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); - auto vstream = OutputVStream::create(output_vstream_info, output_stream->get_quant_infos(), vstream_params, post_argmax_queue_element.release(), std::move(elements), - std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release()); - CHECK_EXPECTED(vstream); - vstreams.emplace_back(vstream.release()); - - for (const auto ¤t_vstream : vstreams) { - LOGGER__INFO("{}", current_vstream.get_pipeline_description()); - } - - return vstreams; -} - -hailo_status VStreamsBuilderUtils::handle_pix_buffer_splitter_flow(std::vector> streams, - const hailo_vstream_info_t &vstream_info, std::vector> &&base_elements, - std::vector &vstreams, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event, - std::shared_ptr> pipeline_status, EventPtr &core_op_activated_event, - AccumulatorPtr accumalator) -{ - // sorting the streams based on their plane index -> we count on order to know which plane belongs to which stream - auto compartor = [](std::shared_ptr a, std::shared_ptr b) { - return a->get_layer_info().plane_index < b->get_layer_info().plane_index; - }; - std::sort(streams.begin(), streams.end(), compartor); - - auto duration_collector_expected = DurationCollector::create(vstream_params.pipeline_elements_stats_flags); - CHECK_EXPECTED_AS_STATUS(duration_collector_expected); - - auto planes_splitter = PixBufferElement::create(PipelineObject::create_element_name("PixBufferElement", - vstream_info.name, 0), std::chrono::milliseconds(HAILO_INFINITE), duration_collector_expected.release(), - pipeline_status, vstream_info.format.order); - CHECK_EXPECTED_AS_STATUS(planes_splitter); - base_elements.push_back(planes_splitter.value()); - - uint32_t stream_number = 0; - - for (const auto &stream : streams){ - auto hw_write_elem = HwWriteElement::create(stream, - PipelineObject::create_element_name("HwWriteElement", stream->name(), stream->get_info().index), - vstream_params.pipeline_elements_stats_flags, pipeline_status); - CHECK_EXPECTED_AS_STATUS(hw_write_elem); - base_elements.insert(base_elements.begin(), hw_write_elem.value()); - - auto &stream_info = stream->get_info(); - auto &src_image_shape = stream_info.shape; - auto &dst_image_shape = stream_info.hw_shape; - auto &dst_format = stream_info.format; - auto src_format = vstream_params.user_buffer_format; - /* the format order of each plane (stream) is determined by the stream's order. - type and flags are determined by the vstream params */ - src_format.order = dst_format.order; - auto quant_infos = std::vector{stream_info.quant_info}; - - auto should_transform_expected = InputTransformContext::is_transformation_required(src_image_shape, src_format, - dst_image_shape, dst_format, quant_infos); - CHECK_EXPECTED_AS_STATUS(should_transform_expected); - - if(should_transform_expected.value()){ - auto pre_infer_elem = PreInferElement::create(src_image_shape, src_format, - dst_image_shape, dst_format, quant_infos, PipelineObject::create_element_name( "PreInferElement", - stream->get_info().name, stream->get_info().index), vstream_params, shutdown_event, pipeline_status); - - CHECK_EXPECTED_AS_STATUS(pre_infer_elem); - base_elements.push_back(pre_infer_elem.value()); - - auto queue_elem = PushQueueElement::create( - PipelineObject::create_element_name("PushQueueElement", stream_info.name, stream_info.index), - vstream_params, shutdown_event, pipeline_status); - - CHECK_EXPECTED_AS_STATUS(queue_elem); - base_elements.push_back((queue_elem.value())); - - CHECK_SUCCESS(PipelinePad::link_pads(planes_splitter.value(), pre_infer_elem.value(), stream_number, 0)); - CHECK_SUCCESS(PipelinePad::link_pads(pre_infer_elem.value(), queue_elem.value())); - CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), *hw_write_elem)); - } else { - CHECK_SUCCESS(PipelinePad::link_pads(planes_splitter.value(), *hw_write_elem, stream_number, 0)); - - } - stream_number++; - } - - auto vstream = InputVStream::create(vstream_info, { vstream_info.quant_info }, vstream_params, planes_splitter.value(), - nullptr, std::move(base_elements), std::move(pipeline_status), shutdown_event, - core_op_activated_event, accumalator); - CHECK_EXPECTED_AS_STATUS(vstream); - vstreams.emplace_back(vstream.release()); - - return HAILO_SUCCESS; -} - -hailo_status VStreamsBuilderUtils::add_demux(std::shared_ptr output_stream, NameToVStreamParamsMap &vstreams_params_map, - std::vector> &&base_elements, std::vector &vstreams, - std::shared_ptr hw_read_elem, EventPtr shutdown_event, std::shared_ptr> pipeline_status, - const std::map &output_vstream_infos) -{ - auto expected_demuxer = OutputDemuxer::create(*output_stream); - CHECK_EXPECTED_AS_STATUS(expected_demuxer); - - std::shared_ptr demuxer_ptr = expected_demuxer.release(); - CHECK(nullptr != demuxer_ptr, HAILO_OUT_OF_HOST_MEMORY); - - auto status = output_stream->set_timeout(HAILO_INFINITE_TIMEOUT); - CHECK_SUCCESS(status); - - // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the - // pipeline_elements_stats_flags for the demux_elem as bitwise or of all the flags. - hailo_pipeline_elem_stats_flags_t demux_elem_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE; - hailo_vstream_stats_flags_t demux_vstream_stats_flags = HAILO_VSTREAM_STATS_NONE; - size_t buffer_pool_size = 0; - for (const auto &elem_name_params : vstreams_params_map) { - demux_elem_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags; - demux_vstream_stats_flags |= elem_name_params.second.vstream_stats_flags; - buffer_pool_size += elem_name_params.second.queue_size; - } - - auto demux_elem = TransformDemuxElement::create(demuxer_ptr, - PipelineObject::create_element_name("TransformDemuxElement", output_stream->name(), output_stream->get_info().index), - std::chrono::milliseconds(HAILO_INFINITE), buffer_pool_size, demux_elem_stats_flags, demux_vstream_stats_flags, shutdown_event, pipeline_status); - CHECK_EXPECTED_AS_STATUS(demux_elem); - base_elements.push_back(demux_elem.value()); - CHECK_SUCCESS(PipelinePad::link_pads(hw_read_elem, demux_elem.value())); - - EventPtr core_op_activated_event = nullptr; - if (!output_stream->is_scheduled()) { - core_op_activated_event = output_stream->get_core_op_activated_event(); - } - - uint32_t i = 0; - for (auto &edge_info : demuxer_ptr->get_edges_stream_info()) { - auto name_params_pair = vstreams_params_map.find(edge_info.name); - CHECK(name_params_pair != vstreams_params_map.end(), HAILO_NOT_FOUND, - "Failed to find vstreams params of edge {}", edge_info.name); - - const auto vstream_info = output_vstream_infos.find(edge_info.name); - CHECK(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND, - "Failed to find vstream info of {}", edge_info.name); - - const auto vstream_params = expand_vstream_params_autos(output_stream->get_info(), name_params_pair->second); - - // For each mux vstream, we create a copy of the previous elements - auto current_vstream_elements = base_elements; - - // For muxed VStreams we use the same pipeline_status for all - auto pipeline_status_copy = pipeline_status; - auto demux_queue_elem = PullQueueElement::create( - PipelineObject::create_element_name("PullQueueElement_demux", edge_info.name, edge_info.index), - vstream_params, shutdown_event, pipeline_status); - CHECK_EXPECTED_AS_STATUS(demux_queue_elem); - current_vstream_elements.push_back(demux_queue_elem.value()); - CHECK_SUCCESS(PipelinePad::link_pads(demux_elem.value(), demux_queue_elem.value(), i, 0)); - - CHECK_SUCCESS(demux_queue_elem.value()->set_timeout(HAILO_INFINITE_TIMEOUT)); - - auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params); - CHECK_EXPECTED_AS_STATUS(pipeline_latency_accumulator); - auto should_transform = OutputTransformContext::is_transformation_required(edge_info.hw_shape, - edge_info.format, edge_info.shape, vstream_params.user_buffer_format, std::vector{edge_info.quant_info}); // TODO: Get quant vector (HRT-11077) - CHECK_EXPECTED_AS_STATUS(should_transform); - - if (should_transform.value()) { - auto post_infer_elem = PostInferElement::create(edge_info.hw_shape, edge_info.format, - edge_info.shape, vstream_params.user_buffer_format, { edge_info.quant_info }, edge_info.nms_info, // TODO: Get quant vector (HRT-11077) - PipelineObject::create_element_name("PostInferElement", edge_info.name, edge_info.index), - vstream_params, pipeline_status, shutdown_event); - CHECK_EXPECTED_AS_STATUS(post_infer_elem); - current_vstream_elements.push_back(post_infer_elem.value()); - CHECK_SUCCESS(PipelinePad::link_pads(demux_queue_elem.value(), post_infer_elem.value())); - - auto post_infer_queue_elem = UserBufferQueueElement::create( - PipelineObject::create_element_name("UserBufferQueueElement_post_infer", edge_info.name, edge_info.index), - vstream_params, shutdown_event, pipeline_status); - CHECK_EXPECTED_AS_STATUS(post_infer_queue_elem); - current_vstream_elements.push_back(post_infer_queue_elem.value()); - CHECK_SUCCESS(PipelinePad::link_pads(post_infer_elem.value(), post_infer_queue_elem.value())); - - // TODO: Replace output_stream->get_quant_infos() with mux quant info - auto vstream = OutputVStream::create(vstream_info->second, output_stream->get_quant_infos(), vstream_params, post_infer_queue_elem.release(), std::move(current_vstream_elements), // TODO: Get quant vector (HRT-11077) - std::move(pipeline_status_copy), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release()); - CHECK_EXPECTED_AS_STATUS(vstream); - vstreams.emplace_back(vstream.release()); - } else { - // TODO: HRT-4179 - auto user_copy_elem = CopyBufferElement::create( - PipelineObject::create_element_name("CopyBufferElement", edge_info.name, edge_info.index), - pipeline_status, std::chrono::milliseconds(vstream_params.timeout_ms)); - CHECK_EXPECTED_AS_STATUS(user_copy_elem); - current_vstream_elements.push_back(user_copy_elem.value()); - CHECK_SUCCESS(PipelinePad::link_pads(demux_queue_elem.value(), user_copy_elem.value())); - - // TODO: Replace output_stream->get_quant_infos() with mux quant info - auto vstream = OutputVStream::create(vstream_info->second, { edge_info.quant_info }, vstream_params, user_copy_elem.release(), std::move(current_vstream_elements), // TODO: Get quant vector (HRT-11077) - std::move(pipeline_status_copy), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release()); - CHECK_EXPECTED_AS_STATUS(vstream); - vstreams.emplace_back(vstream.release()); - } - i++; - } - return HAILO_SUCCESS; -} - -hailo_status VStreamsBuilderUtils::add_nms_fuse(OutputStreamPtrVector &output_streams, hailo_vstream_params_t &vstreams_params, - std::vector> &elements, std::vector &vstreams, - EventPtr shutdown_event, std::shared_ptr> pipeline_status, - const std::map &output_vstream_infos) -{ - std::vector nms_infos; - nms_infos.reserve(output_streams.size()); - for (const auto &out_stream : output_streams) { - CHECK(out_stream->get_info().nms_info.defuse_info.class_group_index <= output_streams.size(), - HAILO_INVALID_ARGUMENT, "Not all defused nms outputs were grouped correctly!"); - nms_infos.emplace_back(out_stream->get_info().nms_info); - } - - // To get the fused layer name and src stream format, we use the stream info of one of the defuses - auto first_defused_stream_info = output_streams[0]->get_info(); - auto fused_layer_name = first_defused_stream_info.nms_info.defuse_info.original_name; - auto src_stream_format = first_defused_stream_info.format; - - auto vstream_info = output_vstream_infos.find(fused_layer_name); - CHECK(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND, - "Failed to find vstream info of {}. Could be due to use of old HEF. Try to re-compile network with newer Dataflow Compiler version", fused_layer_name); - - vstreams_params = expand_vstream_params_autos(first_defused_stream_info, vstreams_params); - auto nms_elem = NmsMuxElement::create(nms_infos, - PipelineObject::create_element_name("NmsMuxElement", fused_layer_name, 0), - vstreams_params, shutdown_event, pipeline_status); - CHECK_EXPECTED_AS_STATUS(nms_elem); - auto fused_layer_nms_info = nms_elem.value()->get_fused_nms_info(); - - for (uint32_t i = 0; i < output_streams.size(); ++i) { - const auto &curr_stream_info = output_streams[i]->get_info(); - output_streams[i]->set_timeout(HAILO_INFINITE_TIMEOUT); - - auto hw_read_elem = HwReadElement::create(output_streams[i], - PipelineObject::create_element_name("HwReadElement", curr_stream_info.name, curr_stream_info.index), - HAILO_INFINITE_TIMEOUT, vstreams_params.queue_size, vstreams_params.pipeline_elements_stats_flags, - vstreams_params.vstream_stats_flags, shutdown_event, pipeline_status); - CHECK_EXPECTED_AS_STATUS(hw_read_elem); - elements.push_back(hw_read_elem.value()); - - auto nms_source_queue_elem = PullQueueElement::create( - PipelineObject::create_element_name("PullQueueElement_nms_source", curr_stream_info.name, curr_stream_info.index), - vstreams_params, shutdown_event, pipeline_status); - CHECK_EXPECTED_AS_STATUS(nms_source_queue_elem); - elements.push_back(nms_source_queue_elem.value()); - nms_source_queue_elem.value()->set_timeout(HAILO_INFINITE_TIMEOUT); - CHECK_SUCCESS(PipelinePad::link_pads(hw_read_elem.value(), nms_source_queue_elem.value())); - CHECK_SUCCESS(PipelinePad::link_pads(nms_source_queue_elem.value(), nms_elem.value(), 0, i)); - } - elements.push_back(nms_elem.value()); - - auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstreams_params); - CHECK_EXPECTED_AS_STATUS(pipeline_latency_accumulator); - - auto should_transform = OutputTransformContext::is_transformation_required({}, src_stream_format, {}, - vstreams_params.user_buffer_format, std::vector{vstream_info->second.quant_info}); // TODO: Get quant vector (HRT-11078) - CHECK_EXPECTED_AS_STATUS(should_transform); - - EventPtr core_op_activated_event = nullptr; - if (!output_streams[0]->is_scheduled()) { - core_op_activated_event = output_streams[0]->get_core_op_activated_event(); - } - - if (should_transform.value()) { - auto nms_queue_elem = PullQueueElement::create( - PipelineObject::create_element_name("PullQueueElement_nms", fused_layer_name, 0), - vstreams_params, shutdown_event, pipeline_status); - CHECK_EXPECTED_AS_STATUS(nms_queue_elem); - nms_queue_elem.value()->set_timeout(HAILO_INFINITE_TIMEOUT); - elements.push_back(nms_queue_elem.value()); - CHECK_SUCCESS(PipelinePad::link_pads(nms_elem.value(), nms_queue_elem.value())); - - auto post_infer_elem = PostInferElement::create({}, src_stream_format, - {}, vstreams_params.user_buffer_format, { vstream_info->second.quant_info }, fused_layer_nms_info, // TODO: Get quant vector (HRT-11078) - PipelineObject::create_element_name("PostInferElement", fused_layer_name, 0), vstreams_params, pipeline_status, - shutdown_event); - CHECK_EXPECTED_AS_STATUS(post_infer_elem); - - elements.push_back(post_infer_elem.value()); - CHECK_SUCCESS(PipelinePad::link_pads(nms_queue_elem.value(), post_infer_elem.value())); - - auto post_infer_queue_elem = UserBufferQueueElement::create( - PipelineObject::create_element_name("UserBufferQueueElement_post_infer", fused_layer_name, 0), - vstreams_params, shutdown_event, pipeline_status); - CHECK_EXPECTED_AS_STATUS(post_infer_queue_elem); - elements.push_back(post_infer_queue_elem.value()); - CHECK_SUCCESS(PipelinePad::link_pads(post_infer_elem.value(), post_infer_queue_elem.value())); - - // TODO: Check with SDK where should we take the quant infos from (output_streams[0]->get_quant_infos() might be good) (HRT-11078) - auto vstream = OutputVStream::create(vstream_info->second, output_streams[0]->get_quant_infos(), vstreams_params, post_infer_queue_elem.release(), std::move(elements), // TODO: Get quant vector (HRT-11078) - std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release()); - CHECK_EXPECTED_AS_STATUS(vstream); - vstreams.emplace_back(vstream.release()); - } else { - // TODO: Check with SDK where should we take the quant infos from (output_streams[0]->get_quant_infos() might be good) (HRT-11078) - auto vstream = OutputVStream::create(vstream_info->second, output_streams[0]->get_quant_infos(), vstreams_params, nms_elem.release(), std::move(elements), // TODO: Get quant vector (HRT-11078) - std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release()); - CHECK_EXPECTED_AS_STATUS(vstream); - vstreams.emplace_back(vstream.release()); - } - - return HAILO_SUCCESS; -} - -hailo_status VStreamsBuilderUtils::add_nms_post_process(OutputStreamPtrVector &output_streams, hailo_vstream_params_t &vstreams_params, - std::vector> &elements, std::vector &vstreams, - EventPtr shutdown_event, std::shared_ptr> pipeline_status, - const std::map &output_vstream_infos, - const std::shared_ptr &nms_op) -{ - auto first_stream_info = output_streams[0]->get_info(); - vstreams_params.user_buffer_format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type( - vstreams_params.user_buffer_format, nms_op->metadata()->type()); - CHECK(vstreams_params.user_buffer_format.type == HAILO_FORMAT_TYPE_FLOAT32, HAILO_INVALID_ARGUMENT, - "NMS output format type must be HAILO_FORMAT_TYPE_FLOAT32"); - CHECK(HailoRTCommon::is_nms(vstreams_params.user_buffer_format.order), HAILO_INVALID_ARGUMENT, - "NMS output format order must be HAILO_FORMAT_ORDER_HAILO_NMS or HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK"); - - std::unordered_map inputs_metadata; - std::unordered_map outputs_metadata; - for (uint32_t i = 0; i < output_streams.size(); ++i) { - const auto &curr_stream_info = output_streams[i]->get_info(); - net_flow::BufferMetaData input_metadata = { - curr_stream_info.shape, - curr_stream_info.hw_shape, - curr_stream_info.format, - curr_stream_info.quant_info - }; - inputs_metadata.insert({curr_stream_info.name, input_metadata}); - } - - const auto &output_pads = nms_op->outputs_metadata(); - assert(output_pads.size() == 1); - auto vstream_info = output_vstream_infos.find(output_pads.begin()->first); - CHECK(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND, - "Failed to find vstream info of {}", nms_op->metadata()->get_name()); - net_flow::BufferMetaData output_metadata = { - vstream_info->second.shape, - vstream_info->second.shape, - vstream_info->second.format, - vstream_info->second.quant_info - }; - outputs_metadata.insert({vstream_info->first, output_metadata}); - - auto op_metadata = std::dynamic_pointer_cast(nms_op->metadata()); - assert(nullptr != op_metadata); - auto nms_elem = NmsPostProcessMuxElement::create(nms_op, - PipelineObject::create_element_name("NmsPostProcessMuxElement", nms_op->get_name(), 0), - vstreams_params, shutdown_event, pipeline_status); - CHECK_EXPECTED_AS_STATUS(nms_elem); - - hailo_format_t nms_src_format; - nms_src_format.flags = HAILO_FORMAT_FLAGS_NONE; - nms_src_format.order = HAILO_FORMAT_ORDER_NHCW; - nms_src_format.type = first_stream_info.format.type; - - for (uint32_t i = 0; i < output_streams.size(); ++i) { - const auto &curr_stream_info = output_streams[i]->get_info(); - output_streams[i]->set_timeout(HAILO_INFINITE_TIMEOUT); - - auto should_transform = OutputTransformContext::is_transformation_required(curr_stream_info.hw_shape, curr_stream_info.format, - curr_stream_info.hw_shape, nms_src_format, output_streams[i]->get_quant_infos()); - CHECK_EXPECTED_AS_STATUS(should_transform); - - CHECK(!(should_transform.value()), HAILO_INVALID_ARGUMENT, "Unexpected transformation required for {}", curr_stream_info.name); - - auto hw_read_elem = HwReadElement::create(output_streams[i], - PipelineObject::create_element_name("HwReadElement", curr_stream_info.name, curr_stream_info.index), - HAILO_INFINITE_TIMEOUT, vstreams_params.queue_size, vstreams_params.pipeline_elements_stats_flags, - vstreams_params.vstream_stats_flags, shutdown_event, pipeline_status); - CHECK_EXPECTED_AS_STATUS(hw_read_elem); - elements.push_back(hw_read_elem.value()); - - auto nms_source_queue_elem = PullQueueElement::create( - PipelineObject::create_element_name("PullQueueElement_nms_source", curr_stream_info.name, curr_stream_info.index), - vstreams_params, shutdown_event, pipeline_status); - CHECK_EXPECTED_AS_STATUS(nms_source_queue_elem); - nms_source_queue_elem.value()->set_timeout(HAILO_INFINITE_TIMEOUT); - elements.push_back(nms_source_queue_elem.value()); - CHECK_SUCCESS(PipelinePad::link_pads(hw_read_elem.value(), nms_source_queue_elem.value())); - CHECK_SUCCESS(PipelinePad::link_pads(nms_source_queue_elem.value(), nms_elem.value(), 0, i)); - nms_elem.value()->add_sink_name(curr_stream_info.name); - } - elements.push_back(nms_elem.value()); - - auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstreams_params); - CHECK_EXPECTED_AS_STATUS(pipeline_latency_accumulator); - - EventPtr core_op_activated_event = nullptr; - if (!output_streams[0]->is_scheduled()) { - core_op_activated_event = output_streams[0]->get_core_op_activated_event(); - } - - // If user uses HailoRT++ we can assume he won't use Output Scale by Feature - auto vstream = OutputVStream::create(vstream_info->second, output_streams[0]->get_quant_infos(), vstreams_params, nms_elem.release(), std::move(elements), - std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release()); - CHECK_EXPECTED_AS_STATUS(vstream); - vstreams.emplace_back(vstream.release()); - - return HAILO_SUCCESS; -} - -Expected VStreamsBuilderUtils::create_pipeline_latency_accumulator(const hailo_vstream_params_t &vstreams_params) -{ - AccumulatorPtr pipeline_latency_accumulator = nullptr; - const auto measure_latency = ((vstreams_params.vstream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_LATENCY) != 0); - if (measure_latency) { - pipeline_latency_accumulator = make_shared_nothrow>("latency"); - CHECK_AS_EXPECTED(nullptr != pipeline_latency_accumulator, HAILO_OUT_OF_HOST_MEMORY); - } - - return pipeline_latency_accumulator; -} - } /* namespace hailort */ diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream_builder.cpp b/hailort/libhailort/src/net_flow/pipeline/vstream_builder.cpp new file mode 100644 index 00000000..522726af --- /dev/null +++ b/hailort/libhailort/src/net_flow/pipeline/vstream_builder.cpp @@ -0,0 +1,1291 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file vstream_builder.cpp + * @brief Vstream builder impl + **/ + +#include "vstream_builder.hpp" +#include "hailo/vstream.hpp" +#include "net_flow/ops/nms_post_process.hpp" +#include "net_flow/ops/ssd_post_process.hpp" +#include "net_flow/ops/yolox_post_process.hpp" +#include "net_flow/ops/yolov8_post_process.hpp" +#include "net_flow/ops/yolov5_post_process.hpp" +#include "net_flow/ops/yolov5_bbox_only_post_process.hpp" +#include "net_flow/ops/argmax_post_process.hpp" +#include "net_flow/ops/softmax_post_process.hpp" +#include "net_flow/ops/yolov5_seg_post_process.hpp" +#include "common/runtime_statistics_internal.hpp" + +namespace hailort +{ +Expected> VStreamsBuilderUtils::create_inputs( + std::vector> input_streams, const hailo_vstream_info_t &vstream_info, + const hailo_vstream_params_t &vstream_params) +{ + CHECK_AS_EXPECTED(!input_streams.empty(), HAILO_INVALID_ARGUMENT, "input streams can't be empty"); + // if input streams has more than 1 value, it will be handled by handle_pix_buffer_splitter_flow. For all other purposes, + // assuming there is only 1 stream is valid + std::shared_ptr input_stream = input_streams.front(); + + // TODO (HRT-4522): Support this measurement + CHECK_AS_EXPECTED(!(vstream_params.vstream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED, + "Pipeline FPS statistics measurement is not implemented"); + + std::vector> elements; + std::vector vstreams; + + EventPtr core_op_activated_event = nullptr; + if (!input_stream->is_scheduled()) { + core_op_activated_event = input_stream->get_core_op_activated_event(); + } + + auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); + CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); + + auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params); + CHECK_EXPECTED(pipeline_latency_accumulator); + + auto user_timeout = std::chrono::milliseconds(vstream_params.timeout_ms); + + if (input_streams.size() > 1) { + CHECK_SUCCESS_AS_EXPECTED(handle_pix_buffer_splitter_flow(input_streams, vstream_info, + std::move(elements), vstreams, vstream_params, pipeline_status, core_op_activated_event, + pipeline_latency_accumulator.value())); + } else { + auto hw_write_elem = HwWriteElement::create(input_stream, + PipelineObject::create_element_name("HwWriteEl", input_stream->name(), input_stream->get_info().index), + vstream_params.pipeline_elements_stats_flags, pipeline_status); + CHECK_EXPECTED(hw_write_elem); + elements.insert(elements.begin(), hw_write_elem.value()); + + auto should_transform = InputTransformContext::is_transformation_required(input_stream->get_info().shape, + vstream_params.user_buffer_format, input_stream->get_info().hw_shape, input_stream->get_info().format, + input_stream->get_quant_infos()); + CHECK_EXPECTED(should_transform); + + if (should_transform.value()) { + auto queue_elem = PushQueueElement::create( + PipelineObject::create_element_name("PushQEl", input_stream->get_info().name, input_stream->get_info().index), + vstream_params, input_stream->get_info().hw_frame_size, pipeline_status); + CHECK_EXPECTED(queue_elem); + elements.insert(elements.begin(), queue_elem.value()); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(queue_elem.value(), hw_write_elem.value())); + + auto pre_infer_elem = PreInferElement::create(input_stream->get_info().shape, vstream_params.user_buffer_format, + input_stream->get_info().hw_shape, input_stream->get_info().format, input_stream->get_quant_infos(), + PipelineObject::create_element_name("PreInferEl", input_stream->get_info().name, input_stream->get_info().index), + vstream_params, pipeline_status); + CHECK_EXPECTED(pre_infer_elem); + elements.insert(elements.begin(), pre_infer_elem.value()); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_infer_elem.value(), queue_elem.value())); + + input_stream->set_timeout(user_timeout); + auto vstream = InputVStream::create(vstream_info, input_stream->get_quant_infos(), vstream_params, pre_infer_elem.release(), + hw_write_elem.release(), std::move(elements), std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release()); + CHECK_EXPECTED(vstream); + vstreams.emplace_back(vstream.release()); + } else { + input_stream->set_timeout(user_timeout); + auto vstream = InputVStream::create(vstream_info, input_stream->get_quant_infos(), vstream_params, hw_write_elem.value(), hw_write_elem.value(), + std::move(elements), std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release()); + CHECK_EXPECTED(vstream); + vstreams.emplace_back(vstream.release()); + } + } + + for (const auto &vstream : vstreams) { + LOGGER__INFO("{}", vstream.get_pipeline_description()); + } + + return vstreams; +} + +static hailo_vstream_params_t expand_vstream_params_autos(const hailo_stream_info_t &stream_info, + const hailo_vstream_params_t &vstream_params) +{ + auto local_vstream_params = vstream_params; + local_vstream_params.user_buffer_format = HailoRTDefaults::expand_auto_format(vstream_params.user_buffer_format, + stream_info.format); + return local_vstream_params; +} + +Expected> VStreamsBuilderUtils::create_outputs(std::shared_ptr output_stream, + NameToVStreamParamsMap &vstreams_params_map, const std::map &output_vstream_infos) +{ + std::vector> elements; + std::vector vstreams; + + if (0 != (HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_stream->get_info().format.flags)) + { + LOGGER__WARNING("Using legacy implementation of Argmax in host. Please re-compile your model with latest DFC version"); + } + + EventPtr core_op_activated_event = nullptr; + if (!output_stream->is_scheduled()) { + core_op_activated_event = output_stream->get_core_op_activated_event(); + } + + auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); + CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); + + assert(!vstreams_params_map.empty()); + + // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the + // pipeline_elements_stats_flags for the hw_read_element as bitwise or of all the flags. + hailo_pipeline_elem_stats_flags_t hw_read_element_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE; + hailo_vstream_stats_flags_t hw_read_stream_stats_flags = HAILO_VSTREAM_STATS_NONE; + size_t buffer_pool_size = 0; + for (const auto &elem_name_params : vstreams_params_map) { + hw_read_element_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags; + hw_read_stream_stats_flags |= elem_name_params.second.vstream_stats_flags; + buffer_pool_size += elem_name_params.second.queue_size; + } + + // TODO (HRT-4522): Support this measurement + CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED, + "Pipeline FPS statistics measurement is not implemented"); + + ElementBuildParams build_params{}; + build_params.elem_stats_flags = hw_read_element_stats_flags; + build_params.pipeline_status = pipeline_status; + build_params.timeout = std::chrono::milliseconds(HAILO_INFINITE); + build_params.shutdown_event = nullptr; + build_params.vstream_stats_flags = hw_read_stream_stats_flags; + build_params.buffer_pool_size_edges = buffer_pool_size; + + auto hw_read_element = add_hw_read_element(output_stream, elements, "HwReadEl", build_params); + CHECK_EXPECTED(hw_read_element); + + if (output_stream->get_info().is_mux) { + hailo_status status = add_demux(output_stream, vstreams_params_map, std::move(elements), vstreams, hw_read_element.value(), + pipeline_status, output_vstream_infos); + CHECK_SUCCESS_AS_EXPECTED(status); + } else { + auto vstream_info = output_vstream_infos.find(output_stream->name()); + CHECK_AS_EXPECTED(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND, + "Failed to find vstream info of {}", output_stream->name()); + assert(1 == vstreams_params_map.size()); + auto vstream_params = expand_vstream_params_autos(output_stream->get_info(), vstreams_params_map.begin()->second); + + auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params); + CHECK_EXPECTED(pipeline_latency_accumulator); + + auto should_transform = OutputTransformContext::is_transformation_required(output_stream->get_info().hw_shape, + output_stream->get_info().format, output_stream->get_info().shape, + vstream_params.user_buffer_format, output_stream->get_quant_infos()); + CHECK_EXPECTED(should_transform); + + if (should_transform.value()) { + auto pull_queue = PullQueueElement::create( + PipelineObject::create_element_name("PullQEl_hw_read", output_stream->name(), output_stream->get_info().index), + build_params.timeout, buffer_pool_size, output_stream->get_frame_size(), + hw_read_element_stats_flags, hw_read_stream_stats_flags, pipeline_status); + CHECK_EXPECTED(pull_queue); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), pull_queue.value())); + elements.push_back(pull_queue.value()); + + auto post_infer_element = add_post_infer_element(output_stream, pipeline_status, elements, + "PostInferEl", vstream_params); + CHECK_EXPECTED(post_infer_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pull_queue.value(), post_infer_element.value())); + + auto post_transform_frame_size = HailoRTCommon::get_frame_size(vstream_info->second, vstream_params.user_buffer_format); + auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements, + "UserBuffQEl", vstream_params, post_transform_frame_size); + CHECK_EXPECTED(user_buffer_queue_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), user_buffer_queue_element.value())); + + output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); + pull_queue->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); + auto vstream = OutputVStream::create(vstream_info->second, output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements), + std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release()); + CHECK_EXPECTED(vstream); + vstreams.emplace_back(vstream.release()); + } else { + auto post_transform_frame_size = HailoRTCommon::get_frame_size(vstream_info->second, vstream_params.user_buffer_format); + auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements, + "UserBuffQEl", vstream_params, post_transform_frame_size); + CHECK_EXPECTED(user_buffer_queue_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), user_buffer_queue_element.value())); + + output_stream->set_timeout(std::chrono::milliseconds(vstream_params.timeout_ms)); + auto vstream = OutputVStream::create(vstream_info->second, output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements), + std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release()); + CHECK_EXPECTED(vstream); + vstreams.emplace_back(vstream.release()); + } + } + + for (const auto &vstream : vstreams) { + LOGGER__INFO("{}", vstream.get_pipeline_description()); + } + + return vstreams; +} + +Expected> VStreamsBuilderUtils::create_output_post_process_iou(std::shared_ptr output_stream, + hailo_vstream_params_t vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata) +{ + std::vector> elements; + std::vector vstreams; + + EventPtr core_op_activated_event = nullptr; + if (!output_stream->is_scheduled()) { + core_op_activated_event = output_stream->get_core_op_activated_event(); + } + + auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); + CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); + + auto nms_metadata = std::dynamic_pointer_cast(iou_op_metadata); + assert(nullptr != nms_metadata); + + vstream_params.user_buffer_format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(vstream_params.user_buffer_format, + iou_op_metadata->type(), nms_metadata->nms_config().bbox_only); + + auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params); + CHECK_EXPECTED(pipeline_latency_accumulator); + + ElementBuildParams build_params{}; + build_params.elem_stats_flags = vstream_params.pipeline_elements_stats_flags; + build_params.pipeline_status = pipeline_status; + build_params.timeout = std::chrono::milliseconds(HAILO_INFINITE); + build_params.shutdown_event = nullptr; + build_params.vstream_stats_flags = vstream_params.vstream_stats_flags; + build_params.buffer_pool_size_edges = vstream_params.queue_size; + + auto hw_read_element = add_hw_read_element(output_stream, elements, "HwReadEl", build_params); + CHECK_EXPECTED(hw_read_element); + + auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQEl_hw_read", + vstream_params, output_stream->get_frame_size()); + CHECK_EXPECTED(hw_read_queue_element); + hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value())); + + auto post_infer_element = add_post_infer_element(output_stream, pipeline_status, elements, + "PostInferEl", vstream_params); + CHECK_EXPECTED(post_infer_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), post_infer_element.value())); + + auto post_transform_frame_size = HailoRTCommon::get_nms_host_frame_size(output_stream->get_info().nms_info, vstream_params.user_buffer_format); + auto pre_nms_convert_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQEl_pre_nms_convert", + vstream_params, post_transform_frame_size); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), pre_nms_convert_queue_element.value())); + + auto nms_to_detections_element = add_nms_to_detections_convert_element(output_stream, elements, "NmsFormatToDetectionsEl", + iou_op_metadata, build_params); + CHECK_EXPECTED(nms_to_detections_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_nms_convert_queue_element.value(), nms_to_detections_element.value())); + + auto pre_remove_overlapping_bboxes_element_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQEl_pre_bboxes_removing", + vstream_params, 0); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(nms_to_detections_element.value(), pre_remove_overlapping_bboxes_element_queue_element.value())); + + auto remove_overlapping_bboxes_element = add_remove_overlapping_bboxes_element(output_stream, elements, "RemoveOverlappingBboxesEl", + iou_op_metadata, build_params); + CHECK_EXPECTED(remove_overlapping_bboxes_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_remove_overlapping_bboxes_element_queue_element.value(), remove_overlapping_bboxes_element.value())); + + auto pre_fill_nms_format_element_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQElt_pre_fill_nms_format", + vstream_params, 0); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(remove_overlapping_bboxes_element.value(), pre_fill_nms_format_element_queue_element.value())); + + auto fill_nms_format_element = add_fill_nms_format_element(output_stream, elements, "FillNmsFormatEl", + iou_op_metadata, build_params); + CHECK_EXPECTED(fill_nms_format_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_fill_nms_format_element_queue_element.value(), fill_nms_format_element.value())); + + auto output_vstream_info = iou_op_metadata->get_output_vstream_info(); + CHECK_EXPECTED(output_vstream_info); + const auto final_frame_size = HailoRTCommon::get_frame_size(*output_vstream_info, vstream_params.user_buffer_format); + + auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements, + "UserBuffQEl", vstream_params, final_frame_size); + CHECK_EXPECTED(user_buffer_queue_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(fill_nms_format_element.value(), user_buffer_queue_element.value())); + output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); + + auto vstream = OutputVStream::create(output_vstream_info.value(), output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), + std::move(elements), std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release()); + CHECK_EXPECTED(vstream); + vstreams.emplace_back(vstream.release()); + + for (const auto &curr_vstream : vstreams) { + LOGGER__INFO("{}", curr_vstream.get_pipeline_description()); + } + + return vstreams; +} + +Expected> VStreamsBuilderUtils::create_output_post_process_softmax(std::shared_ptr output_stream, + const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info, + const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata) +{ + std::vector> elements; + std::vector vstreams; + + EventPtr core_op_activated_event = nullptr; + if (!output_stream->is_scheduled()) { + core_op_activated_event = output_stream->get_core_op_activated_event(); + } + + auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); + CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); + + assert(!vstreams_params_map.empty()); + + // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the + // pipeline_elements_stats_flags for the hw_read_element as bitwise or of all the flags. + hailo_pipeline_elem_stats_flags_t hw_read_element_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE; + hailo_vstream_stats_flags_t hw_read_stream_stats_flags = HAILO_VSTREAM_STATS_NONE; + size_t buffer_pool_size = 0; + for (const auto &elem_name_params : vstreams_params_map) { + hw_read_element_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags; + hw_read_stream_stats_flags |= elem_name_params.second.vstream_stats_flags; + buffer_pool_size += elem_name_params.second.queue_size; + } + + // TODO (HRT-4522): Support this measurement + CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED, + "Pipeline FPS statistics measurement is not implemented"); + + assert(1 == vstreams_params_map.size()); + auto op_input_format = softmax_op_metadata->inputs_metadata().begin()->second.format; + auto vstream_params = vstreams_params_map.begin()->second; + vstream_params.user_buffer_format = net_flow::SoftmaxOpMetadata::expand_output_format_autos(vstream_params.user_buffer_format, op_input_format); + + auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params); + CHECK_EXPECTED(pipeline_latency_accumulator); + + ElementBuildParams build_params{}; + build_params.elem_stats_flags = hw_read_element_stats_flags; + build_params.pipeline_status = pipeline_status; + build_params.timeout = std::chrono::milliseconds(HAILO_INFINITE); + build_params.vstream_stats_flags = hw_read_stream_stats_flags; + build_params.shutdown_event = nullptr; + build_params.buffer_pool_size_edges = buffer_pool_size; + + auto hw_read_element = add_hw_read_element(output_stream, elements, "HwReadEl", build_params); + CHECK_EXPECTED(hw_read_element); + + auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQEl_hw_read", + vstream_params, output_stream->get_frame_size()); + CHECK_EXPECTED(hw_read_queue_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value())); + + auto post_infer_element = add_post_infer_element(output_stream, pipeline_status, elements, + "PostInferEl", vstream_params); + CHECK_EXPECTED(post_infer_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), post_infer_element.value())); + + auto post_transform_frame_size = HailoRTCommon::get_frame_size(output_vstream_info, vstream_params.user_buffer_format); + + auto pre_softmax_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQEl_pre_softmax", + vstream_params, post_transform_frame_size); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), pre_softmax_queue_element.value())); + + auto softmax_element = add_softmax_element(output_stream, elements, "SoftmaxPPEl", vstream_params, softmax_op_metadata, build_params); + CHECK_EXPECTED(softmax_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_softmax_queue_element.value(), softmax_element.value())); + + auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements, + "UserBuffQEl", vstream_params, post_transform_frame_size); + CHECK_EXPECTED(user_buffer_queue_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(softmax_element.value(), user_buffer_queue_element.value())); + + output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); + hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); + + auto vstream = OutputVStream::create(output_vstream_info, output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements), + std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release()); + CHECK_EXPECTED(vstream); + vstreams.emplace_back(vstream.release()); + + for (const auto &curr_vstream : vstreams) { + LOGGER__INFO("{}", curr_vstream.get_pipeline_description()); + } + + return vstreams; +} + +InputVStream VStreamsBuilderUtils::create_input(std::shared_ptr input_vstream) +{ + return InputVStream(std::move(input_vstream)); +} + +OutputVStream VStreamsBuilderUtils::create_output(std::shared_ptr output_vstream) +{ + return OutputVStream(std::move(output_vstream)); +} + +static bool are_formats_equal(const hailo_format_t &format1, const hailo_format_t &format2) { + return ((format1.order == format2.order) && (format1.flags == format2.flags) && (format1.type == format2.type)); +} + +Expected> VStreamsBuilderUtils::create_output_vstreams_from_streams(const OutputStreamWithParamsVector &all_output_streams, + OutputStreamPtrVector &output_streams, const hailo_vstream_params_t &vstream_params, + const std::unordered_map &post_process_ops_metadata, + const std::unordered_map &op_inputs_to_op_name, const std::map &output_vstream_infos_map) +{ + auto first_stream_info = output_streams[0]->get_info(); + if ((HailoRTCommon::is_nms(first_stream_info)) && (first_stream_info.nms_info.is_defused)) { + // Case defuse NMS + return create_output_nms(output_streams, vstream_params, output_vstream_infos_map); + } else if (contains(op_inputs_to_op_name, static_cast(first_stream_info.name))) { + // Case post-process on host + auto &op_name = op_inputs_to_op_name.at(first_stream_info.name); + auto &op_metadata = post_process_ops_metadata.at(op_name); + switch (op_metadata->type()) { + case net_flow::OperationType::YOLOX: + case net_flow::OperationType::YOLOV8: + case net_flow::OperationType::SSD: + case net_flow::OperationType::YOLOV5: + case net_flow::OperationType::YOLOV5SEG: + case net_flow::OperationType::IOU: + { + assert(1 <= op_metadata->outputs_metadata().size()); + auto updated_outputs_metadata = op_metadata->outputs_metadata(); + auto nms_metadata = std::dynamic_pointer_cast(op_metadata); + assert(nullptr != nms_metadata); + updated_outputs_metadata.begin()->second.format = + net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(vstream_params.user_buffer_format, op_metadata->type(), + nms_metadata->nms_config().bbox_only); + + op_metadata->set_outputs_metadata(updated_outputs_metadata); + CHECK_SUCCESS_AS_EXPECTED(op_metadata->validate_format_info()); + std::shared_ptr op; + switch (op_metadata->type()) { + case (net_flow::OperationType::YOLOX): + { + auto metadata = std::dynamic_pointer_cast(op_metadata); + assert(nullptr != metadata); + auto op_expected = net_flow::YOLOXPostProcessOp::create(metadata); + CHECK_EXPECTED(op_expected); + op = op_expected.release(); + break; + } + case (net_flow::OperationType::YOLOV8): + { + auto metadata = std::dynamic_pointer_cast(op_metadata); + assert(nullptr != metadata); + auto op_expected = net_flow::YOLOV8PostProcessOp::create(metadata); + CHECK_EXPECTED(op_expected); + op = op_expected.release(); + break; + } + case (net_flow::OperationType::YOLOV5): + { + auto metadata = std::dynamic_pointer_cast(op_metadata); + assert(nullptr != metadata); + if (metadata->nms_config().bbox_only) { + auto bbox_only_metadata = std::dynamic_pointer_cast(op_metadata); + assert(nullptr != bbox_only_metadata); + auto op_expected = net_flow::YOLOv5BboxOnlyPostProcessOp::create(bbox_only_metadata); + CHECK_EXPECTED(op_expected); + op = op_expected.release(); + break; + } else { + auto op_expected = net_flow::YOLOv5PostProcessOp::create(metadata); + CHECK_EXPECTED(op_expected); + op = op_expected.release(); + break; + } + } + case (net_flow::OperationType::YOLOV5SEG): + { + auto metadata = std::dynamic_pointer_cast(op_metadata); + assert(nullptr != metadata); + auto op_expected = net_flow::Yolov5SegPostProcess::create(metadata); + CHECK_EXPECTED(op_expected); + op = op_expected.release(); + break; + } + case (net_flow::OperationType::SSD): + { + auto metadata = std::dynamic_pointer_cast(op_metadata); + assert(nullptr != metadata); + auto op_expected = net_flow::SSDPostProcessOp::create(metadata); + CHECK_EXPECTED(op_expected); + op = op_expected.release(); + break; + } + case (net_flow::OperationType::IOU): + { + return create_output_post_process_iou(output_streams[0], vstream_params, op_metadata); + } + default: + break; + } + + return create_output_post_process_nms(output_streams, vstream_params, output_vstream_infos_map, op); + } + + case net_flow::OperationType::ARGMAX: + { + assert(output_streams.size() == 1); + NameToVStreamParamsMap name_to_vstream_params_map; + for (auto &output_stream : all_output_streams) { + if (strncmp(output_stream.first->get_info().name, output_streams[0]->get_info().name, HAILO_MAX_STREAM_NAME_SIZE) == 0) { + for (auto &vstream : output_stream.second) { + name_to_vstream_params_map.insert(vstream); + } + } + } + auto output_vstream_info = op_metadata->get_output_vstream_info(); + CHECK_EXPECTED(output_vstream_info); + return create_output_post_process_argmax(output_streams[0], name_to_vstream_params_map, output_vstream_info.release(), op_metadata); + } + + case net_flow::OperationType::SOFTMAX: + { + assert(output_streams.size() == 1); + NameToVStreamParamsMap name_to_vstream_params_map; + for (auto &output_stream : all_output_streams) { + if (strncmp(output_stream.first->get_info().name, output_streams[0]->get_info().name, HAILO_MAX_STREAM_NAME_SIZE) == 0) { + for (auto &vstream : output_stream.second) { + name_to_vstream_params_map.insert(vstream); + } + } + } + auto output_vstream_info = op_metadata->get_output_vstream_info(); + CHECK_EXPECTED(output_vstream_info); + return create_output_post_process_softmax(output_streams[0], name_to_vstream_params_map, output_vstream_info.release(), op_metadata); + } + + default: + LOGGER__ERROR("op type {} of op {} is not in any of the supported post process OP types", net_flow::OpMetadata::get_operation_type_str(op_metadata->type()), op_name); + return make_unexpected(HAILO_INVALID_OPERATION); + } + } else { + // All other cases + assert(output_streams.size() == 1); + NameToVStreamParamsMap name_to_vstream_params_map; + for (auto &output_stream : all_output_streams) { + if (strncmp(output_stream.first->get_info().name, output_streams[0]->get_info().name, HAILO_MAX_STREAM_NAME_SIZE) == 0) { + for (auto &vstream : output_stream.second) { + name_to_vstream_params_map.insert(vstream); + } + } + } + return create_outputs(output_streams[0], name_to_vstream_params_map, output_vstream_infos_map); + } +} + +Expected> VStreamsBuilderUtils::create_output_nms(OutputStreamPtrVector &output_streams, + hailo_vstream_params_t vstreams_params, + const std::map &output_vstream_infos) +{ + for (const auto &out_stream : output_streams) { + CHECK_AS_EXPECTED(are_formats_equal(output_streams[0]->get_info().format, out_stream->get_info().format), + HAILO_INVALID_ARGUMENT, "All nms streams of the same virtual output must have the same format"); + } + + auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); + CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); + + std::vector> elements; + std::vector vstreams; + + hailo_status status = add_nms_fuse(output_streams, vstreams_params, elements, vstreams, + pipeline_status, output_vstream_infos); + CHECK_SUCCESS_AS_EXPECTED(status); + + for (const auto &vstream : vstreams) { + LOGGER__INFO("{}", vstream.get_pipeline_description()); + } + + return vstreams; +} + +Expected> VStreamsBuilderUtils::create_output_post_process_nms(OutputStreamPtrVector &output_streams, + hailo_vstream_params_t vstreams_params, + const std::map &output_vstream_infos, + const std::shared_ptr &nms_op) +{ + auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); + CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); + + std::vector> elements; + std::vector vstreams; + + hailo_status status = add_nms_post_process(output_streams, vstreams_params, elements, vstreams, + pipeline_status, output_vstream_infos, nms_op); + CHECK_SUCCESS_AS_EXPECTED(status); + + for (const auto &vstream : vstreams) { + LOGGER__INFO("{}", vstream.get_pipeline_description()); + } + + return vstreams; +} + +Expected> VStreamsBuilderUtils::add_hw_read_element(std::shared_ptr &output_stream, + std::vector> &elements, const std::string &element_name, const ElementBuildParams &build_params) +{ + auto hw_read_elem = HwReadElement::create(output_stream, + PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), + build_params); + CHECK_EXPECTED(hw_read_elem); + elements.push_back(hw_read_elem.value()); + return hw_read_elem; +} + +Expected> VStreamsBuilderUtils::add_pull_queue_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, const hailo_vstream_params_t &vstream_params, size_t frame_size) +{ + auto pull_queue_elem = PullQueueElement::create( + PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), + vstream_params, frame_size, pipeline_status); + CHECK_EXPECTED(pull_queue_elem); + elements.push_back(pull_queue_elem.value()); + return pull_queue_elem; +} + +Expected> VStreamsBuilderUtils::add_argmax_element(std::shared_ptr &output_stream, + std::vector> &elements, const std::string &element_name, hailo_vstream_params_t &vstream_params, + const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata, const ElementBuildParams &build_params) +{ + // Updating metadata according to user request. TODO: HRT-9737 + auto updated_outputs_metadata = argmax_op_metadata.get()->outputs_metadata(); + updated_outputs_metadata.begin()->second.format = vstream_params.user_buffer_format; + auto metadata = std::dynamic_pointer_cast(argmax_op_metadata); + assert(nullptr != metadata); + metadata->set_outputs_metadata(updated_outputs_metadata); + CHECK_SUCCESS_AS_EXPECTED(metadata->validate_format_info()); + // Updating metadata according to use request. TODO: HRT-9737 - End + + auto op_expected = net_flow::ArgmaxPostProcessOp::create(metadata); + CHECK_EXPECTED(op_expected); + auto argmax_op = op_expected.release(); + + auto argmax_element = ArgmaxPostProcessElement::create(argmax_op, + PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), + build_params); + CHECK_EXPECTED(argmax_element); + elements.push_back(argmax_element.value()); + return argmax_element; +} + +Expected> VStreamsBuilderUtils::add_softmax_element(std::shared_ptr &output_stream, + std::vector> &elements, const std::string &element_name, hailo_vstream_params_t &vstream_params, + const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata, const ElementBuildParams &build_params) +{ + // Updating metadata according to user request. TODO: HRT-9737 + // Currently softmax only supports inputs to be float32 and order NHWC or NC + auto updated_inputs_metadata = softmax_op_metadata.get()->inputs_metadata(); + updated_inputs_metadata.begin()->second.format = vstream_params.user_buffer_format; + auto updated_outputs_metadata = softmax_op_metadata.get()->outputs_metadata(); + updated_outputs_metadata.begin()->second.format = vstream_params.user_buffer_format; + auto metadata = std::dynamic_pointer_cast(softmax_op_metadata); + assert(nullptr != metadata); + metadata->set_outputs_metadata(updated_outputs_metadata); + metadata->set_inputs_metadata(updated_inputs_metadata); + CHECK_SUCCESS_AS_EXPECTED(metadata->validate_format_info()); + // Updating metadata according to use request. TODO: HRT-9737 - End + + auto op_expected = net_flow::SoftmaxPostProcessOp::create(metadata); + CHECK_EXPECTED(op_expected); + auto softmax_op = op_expected.release(); + auto softmax_element = SoftmaxPostProcessElement::create(softmax_op, + PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), + build_params.elem_stats_flags, build_params.pipeline_status, build_params.timeout); + CHECK_EXPECTED(softmax_element); + elements.push_back(softmax_element.value()); + return softmax_element; +} + +Expected> VStreamsBuilderUtils::add_nms_to_detections_convert_element(std::shared_ptr &output_stream, + std::vector> &elements, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata, + const ElementBuildParams &build_params) +{ + auto metadata = std::dynamic_pointer_cast(op_metadata); + assert(nullptr != metadata); + + auto nms_to_detections_element = ConvertNmsToDetectionsElement::create(metadata->nms_info(), + PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), + build_params); + CHECK_EXPECTED(nms_to_detections_element); + elements.push_back(nms_to_detections_element.value()); + return nms_to_detections_element; +} + +Expected> VStreamsBuilderUtils::add_remove_overlapping_bboxes_element(std::shared_ptr &output_stream, + std::vector> &elements, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata, + const ElementBuildParams &build_params) +{ + auto metadata = std::dynamic_pointer_cast(op_metadata); + assert(nullptr != metadata); + + auto remove_overlapping_bboxes_element = RemoveOverlappingBboxesElement::create(metadata->nms_config(), + PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), + build_params); + CHECK_EXPECTED(remove_overlapping_bboxes_element); + elements.push_back(remove_overlapping_bboxes_element.value()); + return remove_overlapping_bboxes_element; +} + +Expected> VStreamsBuilderUtils::add_fill_nms_format_element(std::shared_ptr &output_stream, + std::vector> &elements, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata, + const ElementBuildParams &build_params) +{ + auto metadata = std::dynamic_pointer_cast(op_metadata); + assert(nullptr != metadata); + + auto fill_nms_format_element = FillNmsFormatElement::create(metadata->nms_config(), + PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), + build_params); + CHECK_EXPECTED(fill_nms_format_element); + elements.push_back(fill_nms_format_element.value()); + return fill_nms_format_element; +} + +Expected> VStreamsBuilderUtils::add_user_buffer_queue_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, const hailo_vstream_params_t &vstream_params, size_t frame_size) +{ + auto post_argmax_queue_element = UserBufferQueueElement::create( + PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), + vstream_params, frame_size, pipeline_status); + CHECK_EXPECTED(post_argmax_queue_element); + elements.push_back(post_argmax_queue_element.value()); + return post_argmax_queue_element; +} + +Expected> VStreamsBuilderUtils::add_post_infer_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, const hailo_vstream_params_t &vstream_params) +{ + auto post_infer_element = PostInferElement::create(output_stream->get_info().hw_shape, output_stream->get_info().format, + output_stream->get_info().shape, vstream_params.user_buffer_format, output_stream->get_quant_infos(), output_stream->get_info().nms_info, + PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), + vstream_params, pipeline_status); + CHECK_EXPECTED(post_infer_element); + elements.push_back(post_infer_element.value()); + return post_infer_element; +} + +Expected> VStreamsBuilderUtils::create_output_post_process_argmax(std::shared_ptr output_stream, + const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info, + const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata) +{ + std::vector> elements; + std::vector vstreams; + + EventPtr core_op_activated_event = nullptr; + if (!output_stream->is_scheduled()) { + core_op_activated_event = output_stream->get_core_op_activated_event(); + } + + auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); + CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); + + assert(!vstreams_params_map.empty()); + + // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the + // pipeline_elements_stats_flags for the hw_read_element as bitwise or of all the flags. + hailo_pipeline_elem_stats_flags_t hw_read_element_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE; + hailo_vstream_stats_flags_t hw_read_stream_stats_flags = HAILO_VSTREAM_STATS_NONE; + size_t buffer_pool_size = 0; + for (const auto &elem_name_params : vstreams_params_map) { + hw_read_element_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags; + hw_read_stream_stats_flags |= elem_name_params.second.vstream_stats_flags; + buffer_pool_size += elem_name_params.second.queue_size; + } + + // TODO (HRT-4522): Support this measurement + CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED, + "Pipeline FPS statistics measurement is not implemented"); + + ElementBuildParams build_params{}; + build_params.elem_stats_flags = hw_read_element_stats_flags; + build_params.pipeline_status = pipeline_status; + build_params.timeout = std::chrono::milliseconds(HAILO_INFINITE); + build_params.vstream_stats_flags = hw_read_stream_stats_flags; + build_params.shutdown_event = nullptr; + build_params.buffer_pool_size_edges = buffer_pool_size; + + auto hw_read_element = add_hw_read_element(output_stream, elements, "HwReadEl", build_params); + CHECK_EXPECTED(hw_read_element); + + assert(1 == vstreams_params_map.size()); + auto op_input_format = argmax_op_metadata->inputs_metadata().begin()->second.format; + auto vstream_params = vstreams_params_map.begin()->second; + vstream_params.user_buffer_format = net_flow::ArgmaxOpMetadata::expand_output_format_autos(vstream_params.user_buffer_format, op_input_format); + + auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQEl_hw_read", + vstream_params, output_stream->get_frame_size()); + CHECK_EXPECTED(hw_read_queue_element); + + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value())); + + auto argmax_element = add_argmax_element(output_stream, elements, "ArgmaxPPEl", + vstream_params, argmax_op_metadata, build_params); + CHECK_EXPECTED(argmax_element); + + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), argmax_element.value())); + + const auto final_frame_size = HailoRTCommon::get_frame_size(output_vstream_info, + vstream_params.user_buffer_format); + + auto post_argmax_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements, + "UserBuffQEl_post_argmax", vstream_params, final_frame_size); + CHECK_EXPECTED(post_argmax_queue_element); + + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(argmax_element.value(), post_argmax_queue_element.value())); + + auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params); + CHECK_EXPECTED(pipeline_latency_accumulator); + + output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); + hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); + auto vstream = OutputVStream::create(output_vstream_info, output_stream->get_quant_infos(), vstream_params, post_argmax_queue_element.release(), std::move(elements), + std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release()); + CHECK_EXPECTED(vstream); + vstreams.emplace_back(vstream.release()); + + for (const auto ¤t_vstream : vstreams) { + LOGGER__INFO("{}", current_vstream.get_pipeline_description()); + } + + return vstreams; +} + +hailo_status VStreamsBuilderUtils::handle_pix_buffer_splitter_flow(std::vector> streams, + const hailo_vstream_info_t &vstream_info, std::vector> &&base_elements, + std::vector &vstreams, const hailo_vstream_params_t &vstream_params, + std::shared_ptr> pipeline_status, EventPtr &core_op_activated_event, + AccumulatorPtr accumalator) +{ + // sorting the streams based on their plane index -> we count on order to know which plane belongs to which stream + auto compartor = [](std::shared_ptr a, std::shared_ptr b) { + return a->get_layer_info().plane_index < b->get_layer_info().plane_index; + }; + std::sort(streams.begin(), streams.end(), compartor); + + auto duration_collector_expected = DurationCollector::create(vstream_params.pipeline_elements_stats_flags); + CHECK_EXPECTED_AS_STATUS(duration_collector_expected); + + auto planes_splitter = PixBufferElement::create(PipelineObject::create_element_name("PixBufferEl", + vstream_info.name, 0), std::chrono::milliseconds(HAILO_INFINITE), duration_collector_expected.release(), + pipeline_status, vstream_info.format.order); + CHECK_EXPECTED_AS_STATUS(planes_splitter); + base_elements.push_back(planes_splitter.value()); + + uint32_t stream_number = 0; + + for (const auto &stream : streams){ + auto hw_write_elem = HwWriteElement::create(stream, + PipelineObject::create_element_name("HwWriteEl", stream->name(), stream->get_info().index), + vstream_params.pipeline_elements_stats_flags, pipeline_status); + CHECK_EXPECTED_AS_STATUS(hw_write_elem); + base_elements.insert(base_elements.begin(), hw_write_elem.value()); + + auto &stream_info = stream->get_info(); + auto &src_image_shape = stream_info.shape; + auto &dst_image_shape = stream_info.hw_shape; + auto &dst_format = stream_info.format; + auto src_format = vstream_params.user_buffer_format; + /* the format order of each plane (stream) is determined by the stream's order. + type and flags are determined by the vstream params */ + src_format.order = dst_format.order; + auto quant_infos = std::vector{stream_info.quant_info}; + + auto should_transform_expected = InputTransformContext::is_transformation_required(src_image_shape, src_format, + dst_image_shape, dst_format, quant_infos); + CHECK_EXPECTED_AS_STATUS(should_transform_expected); + + if(should_transform_expected.value()){ + auto pre_infer_elem = PreInferElement::create(src_image_shape, src_format, + dst_image_shape, dst_format, quant_infos, PipelineObject::create_element_name("PreInferEl", + stream->get_info().name, stream->get_info().index), vstream_params, pipeline_status); + + CHECK_EXPECTED_AS_STATUS(pre_infer_elem); + base_elements.push_back(pre_infer_elem.value()); + + auto queue_elem = PushQueueElement::create( + PipelineObject::create_element_name("PushQEl", stream_info.name, stream_info.index), + vstream_params, stream_info.hw_frame_size, pipeline_status); + + CHECK_EXPECTED_AS_STATUS(queue_elem); + base_elements.push_back((queue_elem.value())); + + CHECK_SUCCESS(PipelinePad::link_pads(planes_splitter.value(), pre_infer_elem.value(), stream_number, 0)); + CHECK_SUCCESS(PipelinePad::link_pads(pre_infer_elem.value(), queue_elem.value())); + CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), *hw_write_elem)); + } else { + CHECK_SUCCESS(PipelinePad::link_pads(planes_splitter.value(), *hw_write_elem, stream_number, 0)); + + } + stream_number++; + } + + auto vstream = InputVStream::create(vstream_info, { vstream_info.quant_info }, vstream_params, planes_splitter.value(), + nullptr, std::move(base_elements), std::move(pipeline_status), core_op_activated_event, accumalator); + CHECK_EXPECTED_AS_STATUS(vstream); + vstreams.emplace_back(vstream.release()); + + return HAILO_SUCCESS; +} + +hailo_status VStreamsBuilderUtils::add_demux(std::shared_ptr output_stream, NameToVStreamParamsMap &vstreams_params_map, + std::vector> &&base_elements, std::vector &vstreams, + std::shared_ptr last_elem, std::shared_ptr> pipeline_status, + const std::map &output_vstream_infos) +{ + auto expected_demuxer = OutputDemuxer::create(*output_stream); + CHECK_EXPECTED_AS_STATUS(expected_demuxer); + + std::shared_ptr demuxer_ptr = expected_demuxer.release(); + CHECK(nullptr != demuxer_ptr, HAILO_OUT_OF_HOST_MEMORY); + + auto status = output_stream->set_timeout(HAILO_INFINITE_TIMEOUT); + CHECK_SUCCESS(status); + + // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the + // pipeline_elements_stats_flags for the demux_elem as bitwise or of all the flags. + hailo_pipeline_elem_stats_flags_t demux_elem_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE; + hailo_vstream_stats_flags_t demux_vstream_stats_flags = HAILO_VSTREAM_STATS_NONE; + size_t buffer_pool_size = 0; + for (const auto &elem_name_params : vstreams_params_map) { + demux_elem_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags; + demux_vstream_stats_flags |= elem_name_params.second.vstream_stats_flags; + buffer_pool_size += elem_name_params.second.queue_size; + } + + auto pull_queue_elem = PullQueueElement::create("PreDemuxPullQEl", HAILO_INFINITE_TIMEOUT, + buffer_pool_size, output_stream->get_frame_size(), demux_elem_stats_flags, demux_vstream_stats_flags, + pipeline_status); + CHECK_EXPECTED_AS_STATUS(pull_queue_elem); + base_elements.push_back(pull_queue_elem.value()); + CHECK_SUCCESS(PipelinePad::link_pads(last_elem, pull_queue_elem.value())); + last_elem = pull_queue_elem.release(); + + auto demux_elem = TransformDemuxElement::create(demuxer_ptr, + PipelineObject::create_element_name("TransformDemuxEl", output_stream->name(), output_stream->get_info().index), + std::chrono::milliseconds(HAILO_INFINITE), demux_elem_stats_flags, pipeline_status); + CHECK_EXPECTED_AS_STATUS(demux_elem); + base_elements.push_back(demux_elem.value()); + CHECK_SUCCESS(PipelinePad::link_pads(last_elem, demux_elem.value())); + + EventPtr core_op_activated_event = nullptr; + if (!output_stream->is_scheduled()) { + core_op_activated_event = output_stream->get_core_op_activated_event(); + } + + uint32_t i = 0; + for (auto &edge_info : demuxer_ptr->get_edges_stream_info()) { + auto name_params_pair = vstreams_params_map.find(edge_info.name); + CHECK(name_params_pair != vstreams_params_map.end(), HAILO_NOT_FOUND, + "Failed to find vstreams params of edge {}", edge_info.name); + + const auto vstream_info = output_vstream_infos.find(edge_info.name); + CHECK(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND, + "Failed to find vstream info of {}", edge_info.name); + + const auto vstream_params = expand_vstream_params_autos(output_stream->get_info(), name_params_pair->second); + + // For each mux vstream, we create a copy of the previous elements + auto current_vstream_elements = base_elements; + + // For muxed VStreams we use the same pipeline_status for all + auto pipeline_status_copy = pipeline_status; + + auto demux_queue_elem = PullQueueElement::create( + PipelineObject::create_element_name("PullQueueEl_demux", edge_info.name, edge_info.index), + vstream_params, edge_info.hw_frame_size, pipeline_status); + CHECK_EXPECTED_AS_STATUS(demux_queue_elem); + current_vstream_elements.push_back(demux_queue_elem.value()); + CHECK_SUCCESS(PipelinePad::link_pads(demux_elem.value(), demux_queue_elem.value(), i, 0)); + + CHECK_SUCCESS(demux_queue_elem.value()->set_timeout(HAILO_INFINITE_TIMEOUT)); + + auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params); + CHECK_EXPECTED_AS_STATUS(pipeline_latency_accumulator); + auto should_transform = OutputTransformContext::is_transformation_required(edge_info.hw_shape, + edge_info.format, edge_info.shape, vstream_params.user_buffer_format, std::vector{edge_info.quant_info}); // TODO: Get quant vector (HRT-11077) + CHECK_EXPECTED_AS_STATUS(should_transform); + + if (should_transform.value()) { + auto post_infer_elem = PostInferElement::create(edge_info.hw_shape, edge_info.format, + edge_info.shape, vstream_params.user_buffer_format, { edge_info.quant_info }, edge_info.nms_info, // TODO: Get quant vector (HRT-11077) + PipelineObject::create_element_name("PostInferEl", edge_info.name, edge_info.index), + vstream_params, pipeline_status); + CHECK_EXPECTED_AS_STATUS(post_infer_elem); + current_vstream_elements.push_back(post_infer_elem.value()); + CHECK_SUCCESS(PipelinePad::link_pads(demux_queue_elem.value(), post_infer_elem.value())); + + auto post_transform_frame_size = HailoRTCommon::get_frame_size(edge_info.shape, vstream_params.user_buffer_format); + auto post_infer_queue_elem = UserBufferQueueElement::create( + PipelineObject::create_element_name("UserBuffQEl_post_infer", edge_info.name, edge_info.index), + vstream_params, post_transform_frame_size, pipeline_status); + CHECK_EXPECTED_AS_STATUS(post_infer_queue_elem); + current_vstream_elements.push_back(post_infer_queue_elem.value()); + CHECK_SUCCESS(PipelinePad::link_pads(post_infer_elem.value(), post_infer_queue_elem.value())); + + // TODO: Replace output_stream->get_quant_infos() with mux quant info + auto vstream = OutputVStream::create(vstream_info->second, output_stream->get_quant_infos(), vstream_params, post_infer_queue_elem.release(), std::move(current_vstream_elements), // TODO: Get quant vector (HRT-11077) + std::move(pipeline_status_copy), core_op_activated_event, pipeline_latency_accumulator.release()); + CHECK_EXPECTED_AS_STATUS(vstream); + vstreams.emplace_back(vstream.release()); + } else { + // TODO: HRT-4179 + auto user_copy_elem = CopyBufferElement::create( + PipelineObject::create_element_name("CopyBufferEl", edge_info.name, edge_info.index), + pipeline_status, std::chrono::milliseconds(vstream_params.timeout_ms)); + CHECK_EXPECTED_AS_STATUS(user_copy_elem); + current_vstream_elements.push_back(user_copy_elem.value()); + CHECK_SUCCESS(PipelinePad::link_pads(demux_queue_elem.value(), user_copy_elem.value())); + + // TODO: Replace output_stream->get_quant_infos() with mux quant info + auto vstream = OutputVStream::create(vstream_info->second, { edge_info.quant_info }, vstream_params, user_copy_elem.release(), std::move(current_vstream_elements), // TODO: Get quant vector (HRT-11077) + std::move(pipeline_status_copy), core_op_activated_event, pipeline_latency_accumulator.release()); + CHECK_EXPECTED_AS_STATUS(vstream); + vstreams.emplace_back(vstream.release()); + } + i++; + } + return HAILO_SUCCESS; +} + +hailo_status VStreamsBuilderUtils::add_nms_fuse(OutputStreamPtrVector &output_streams, hailo_vstream_params_t &vstreams_params, + std::vector> &elements, std::vector &vstreams, + std::shared_ptr> pipeline_status, + const std::map &output_vstream_infos) +{ + std::vector nms_infos; + nms_infos.reserve(output_streams.size()); + for (const auto &out_stream : output_streams) { + CHECK(out_stream->get_info().nms_info.defuse_info.class_group_index <= output_streams.size(), + HAILO_INVALID_ARGUMENT, "Not all defused nms outputs were grouped correctly!"); + nms_infos.emplace_back(out_stream->get_info().nms_info); + } + + // To get the fused layer name and src stream format, we use the stream info of one of the defuses + auto first_defused_stream_info = output_streams[0]->get_info(); + auto fused_layer_name = first_defused_stream_info.nms_info.defuse_info.original_name; + auto src_stream_format = first_defused_stream_info.format; + + auto vstream_info = output_vstream_infos.find(fused_layer_name); + CHECK(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND, + "Failed to find vstream info of {}. Could be due to use of old HEF. Try to re-compile network with newer Dataflow Compiler version", fused_layer_name); + + vstreams_params = expand_vstream_params_autos(first_defused_stream_info, vstreams_params); + auto nms_elem = NmsMuxElement::create(nms_infos, + PipelineObject::create_element_name("NmsMuxEl", fused_layer_name, 0), + vstreams_params, pipeline_status); + CHECK_EXPECTED_AS_STATUS(nms_elem); + auto fused_layer_nms_info = nms_elem.value()->get_fused_nms_info(); + + for (uint32_t i = 0; i < output_streams.size(); ++i) { + + ElementBuildParams build_params{}; + build_params.elem_stats_flags = vstreams_params.pipeline_elements_stats_flags; + build_params.pipeline_status = pipeline_status; + build_params.timeout = std::chrono::milliseconds(HAILO_INFINITE); + build_params.vstream_stats_flags = vstreams_params.vstream_stats_flags; + build_params.shutdown_event = nullptr; + build_params.buffer_pool_size_edges = vstreams_params.queue_size; + + const auto &curr_stream_info = output_streams[i]->get_info(); + output_streams[i]->set_timeout(HAILO_INFINITE_TIMEOUT); + + auto hw_read_elem = HwReadElement::create(output_streams[i], + PipelineObject::create_element_name("HwReadEl", curr_stream_info.name, curr_stream_info.index), + build_params); + CHECK_EXPECTED_AS_STATUS(hw_read_elem); + elements.push_back(hw_read_elem.value()); + + auto nms_source_queue_elem = PullQueueElement::create( + PipelineObject::create_element_name("PullQueueEl_nms_source", curr_stream_info.name, curr_stream_info.index), + vstreams_params, curr_stream_info.hw_frame_size, pipeline_status); + CHECK_EXPECTED_AS_STATUS(nms_source_queue_elem); + elements.push_back(nms_source_queue_elem.value()); + nms_source_queue_elem.value()->set_timeout(HAILO_INFINITE_TIMEOUT); + CHECK_SUCCESS(PipelinePad::link_pads(hw_read_elem.value(), nms_source_queue_elem.value())); + CHECK_SUCCESS(PipelinePad::link_pads(nms_source_queue_elem.value(), nms_elem.value(), 0, i)); + } + elements.push_back(nms_elem.value()); + + auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstreams_params); + CHECK_EXPECTED_AS_STATUS(pipeline_latency_accumulator); + + EventPtr core_op_activated_event = nullptr; + if (!output_streams[0]->is_scheduled()) { + core_op_activated_event = output_streams[0]->get_core_op_activated_event(); + } + + auto pre_transform_frame_size = HailoRTCommon::get_nms_hw_frame_size(fused_layer_nms_info); + + auto nms_queue_elem = PullQueueElement::create( + PipelineObject::create_element_name("PullQEl_post_infer", fused_layer_name, 0), + vstreams_params, pre_transform_frame_size, pipeline_status); + CHECK_EXPECTED_AS_STATUS(nms_queue_elem); + nms_queue_elem.value()->set_timeout(HAILO_INFINITE_TIMEOUT); + elements.push_back(nms_queue_elem.value()); + CHECK_SUCCESS(PipelinePad::link_pads(nms_elem.value(), nms_queue_elem.value())); + + auto post_infer_elem = PostInferElement::create({}, src_stream_format, + {}, vstreams_params.user_buffer_format, { vstream_info->second.quant_info }, fused_layer_nms_info, // TODO: Get quant vector (HRT-11078) + PipelineObject::create_element_name("PostInferEl", fused_layer_name, 0), vstreams_params, pipeline_status); + CHECK_EXPECTED_AS_STATUS(post_infer_elem); + + elements.push_back(post_infer_elem.value()); + CHECK_SUCCESS(PipelinePad::link_pads(nms_queue_elem.value(), post_infer_elem.value())); + + auto post_transform_frame_size = HailoRTCommon::get_nms_host_frame_size(fused_layer_nms_info, vstreams_params.user_buffer_format); + auto post_infer_queue_elem = UserBufferQueueElement::create( + PipelineObject::create_element_name("UserBufQEl_post_infer", fused_layer_name, 0), + vstreams_params, post_transform_frame_size, pipeline_status); + CHECK_EXPECTED_AS_STATUS(post_infer_queue_elem); + elements.push_back(post_infer_queue_elem.value()); + CHECK_SUCCESS(PipelinePad::link_pads(post_infer_elem.value(), post_infer_queue_elem.value())); + + // TODO: Check with SDK where should we take the quant infos from (output_streams[0]->get_quant_infos() might be good) (HRT-11078) + auto vstream = OutputVStream::create(vstream_info->second, output_streams[0]->get_quant_infos(), vstreams_params, post_infer_queue_elem.release(), std::move(elements), // TODO: Get quant vector (HRT-11078) + std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release()); + CHECK_EXPECTED_AS_STATUS(vstream); + vstreams.emplace_back(vstream.release()); + + return HAILO_SUCCESS; +} + +hailo_status VStreamsBuilderUtils::add_nms_post_process(OutputStreamPtrVector &output_streams, hailo_vstream_params_t &vstreams_params, + std::vector> &elements, std::vector &vstreams, + std::shared_ptr> pipeline_status, const std::map &output_vstream_infos, + const std::shared_ptr &nms_op) +{ + auto first_stream_info = output_streams[0]->get_info(); + auto op_metadata = std::dynamic_pointer_cast(nms_op->metadata()); + assert(nullptr != op_metadata); + vstreams_params.user_buffer_format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type( + vstreams_params.user_buffer_format, nms_op->metadata()->type(), op_metadata->nms_config().bbox_only); + CHECK(vstreams_params.user_buffer_format.type == HAILO_FORMAT_TYPE_FLOAT32, HAILO_INVALID_ARGUMENT, + "NMS output format type must be HAILO_FORMAT_TYPE_FLOAT32"); + + if (!op_metadata->nms_config().bbox_only) { + CHECK(HailoRTCommon::is_nms(vstreams_params.user_buffer_format.order), HAILO_INVALID_ARGUMENT, + "NMS output format order must be HAILO_FORMAT_ORDER_HAILO_NMS or HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK"); + } + + std::unordered_map inputs_metadata; + std::unordered_map outputs_metadata; + for (uint32_t i = 0; i < output_streams.size(); ++i) { + const auto &curr_stream_info = output_streams[i]->get_info(); + net_flow::BufferMetaData input_metadata = { + curr_stream_info.shape, + curr_stream_info.hw_shape, + curr_stream_info.format, + curr_stream_info.quant_info + }; + inputs_metadata.insert({curr_stream_info.name, input_metadata}); + } + + const auto &output_pads = nms_op->outputs_metadata(); + assert(output_pads.size() == 1); + auto vstream_info = output_vstream_infos.find(output_pads.begin()->first); + CHECK(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND, + "Failed to find vstream info of {}", nms_op->metadata()->get_name()); + net_flow::BufferMetaData output_metadata = { + vstream_info->second.shape, + vstream_info->second.shape, + vstream_info->second.format, + vstream_info->second.quant_info + }; + outputs_metadata.insert({vstream_info->first, output_metadata}); + + auto nms_elem = NmsPostProcessMuxElement::create(nms_op, + PipelineObject::create_element_name("NmsPPMuxEl", nms_op->get_name(), 0), + vstreams_params, pipeline_status); + CHECK_EXPECTED_AS_STATUS(nms_elem); + + hailo_format_t nms_src_format; + nms_src_format.flags = HAILO_FORMAT_FLAGS_NONE; + nms_src_format.order = HAILO_FORMAT_ORDER_NHCW; + nms_src_format.type = first_stream_info.format.type; + + for (uint32_t i = 0; i < output_streams.size(); ++i) { + + ElementBuildParams build_params{}; + build_params.elem_stats_flags = vstreams_params.pipeline_elements_stats_flags; + build_params.pipeline_status = pipeline_status; + build_params.timeout = std::chrono::milliseconds(HAILO_INFINITE); + build_params.vstream_stats_flags = vstreams_params.vstream_stats_flags; + build_params.shutdown_event = nullptr; + build_params.buffer_pool_size_edges = vstreams_params.queue_size; + + const auto &curr_stream_info = output_streams[i]->get_info(); + output_streams[i]->set_timeout(HAILO_INFINITE_TIMEOUT); + + auto should_transform = OutputTransformContext::is_transformation_required(curr_stream_info.hw_shape, curr_stream_info.format, + curr_stream_info.hw_shape, nms_src_format, output_streams[i]->get_quant_infos()); + CHECK_EXPECTED_AS_STATUS(should_transform); + + CHECK(!(should_transform.value()), HAILO_INVALID_ARGUMENT, "Unexpected transformation required for {}", curr_stream_info.name); + + auto hw_read_elem = HwReadElement::create(output_streams[i], + PipelineObject::create_element_name("HwReadEl", curr_stream_info.name, curr_stream_info.index), + build_params); + CHECK_EXPECTED_AS_STATUS(hw_read_elem); + elements.push_back(hw_read_elem.value()); + + auto nms_source_queue_elem = PullQueueElement::create( + PipelineObject::create_element_name("PullQEl_nms", curr_stream_info.name, curr_stream_info.index), + vstreams_params, curr_stream_info.hw_frame_size, pipeline_status); + CHECK_EXPECTED_AS_STATUS(nms_source_queue_elem); + nms_source_queue_elem.value()->set_timeout(HAILO_INFINITE_TIMEOUT); + elements.push_back(nms_source_queue_elem.value()); + CHECK_SUCCESS(PipelinePad::link_pads(hw_read_elem.value(), nms_source_queue_elem.value())); + CHECK_SUCCESS(PipelinePad::link_pads(nms_source_queue_elem.value(), nms_elem.value(), 0, i)); + nms_elem.value()->add_sink_name(curr_stream_info.name); + } + elements.push_back(nms_elem.value()); + + uint32_t post_transform_frame_size; + if (op_metadata->nms_config().bbox_only) { + post_transform_frame_size = HailoRTCommon::get_frame_size(vstream_info->second.shape, vstream_info->second.format); + } else { + post_transform_frame_size = HailoRTCommon::get_nms_host_frame_size(vstream_info->second.nms_shape, vstreams_params.user_buffer_format); + } + auto user_buffer_elem = UserBufferQueueElement::create( + PipelineObject::create_element_name("UserBufQEl_post_infer", vstream_info->first, 0), + vstreams_params, post_transform_frame_size, pipeline_status); + CHECK_EXPECTED_AS_STATUS(user_buffer_elem); + elements.push_back(user_buffer_elem.value()); + CHECK_SUCCESS(PipelinePad::link_pads(nms_elem.value(), user_buffer_elem.value())); + + auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstreams_params); + CHECK_EXPECTED_AS_STATUS(pipeline_latency_accumulator); + + EventPtr core_op_activated_event = nullptr; + if (!output_streams[0]->is_scheduled()) { + core_op_activated_event = output_streams[0]->get_core_op_activated_event(); + } + + // If user uses HailoRT++ we can assume he won't use Output Scale by Feature + auto vstream = OutputVStream::create(vstream_info->second, output_streams[0]->get_quant_infos(), vstreams_params, nms_elem.release(), std::move(elements), + std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release()); + CHECK_EXPECTED_AS_STATUS(vstream); + vstreams.emplace_back(vstream.release()); + + return HAILO_SUCCESS; +} + +Expected VStreamsBuilderUtils::create_pipeline_latency_accumulator(const hailo_vstream_params_t &vstreams_params) +{ + AccumulatorPtr pipeline_latency_accumulator = nullptr; + const auto measure_latency = ((vstreams_params.vstream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_LATENCY) != 0); + if (measure_latency) { + pipeline_latency_accumulator = make_shared_nothrow>("latency"); + CHECK_AS_EXPECTED(nullptr != pipeline_latency_accumulator, HAILO_OUT_OF_HOST_MEMORY); + } + + return pipeline_latency_accumulator; +} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream_builder.hpp b/hailort/libhailort/src/net_flow/pipeline/vstream_builder.hpp new file mode 100644 index 00000000..b269003a --- /dev/null +++ b/hailort/libhailort/src/net_flow/pipeline/vstream_builder.hpp @@ -0,0 +1,127 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file vstream_builder.hpp + * @brief Vstream Builder + **/ + +#ifndef _HAILO_VSTREAM_BUILDER_HPP_ +#define _HAILO_VSTREAM_BUILDER_HPP_ + +#include "net_flow/pipeline/vstream_internal.hpp" + +namespace hailort +{ + +class VStreamsBuilderUtils +{ +public: + static Expected> create_inputs(std::vector> input_streams, const hailo_vstream_info_t &input_vstream_infos, + const hailo_vstream_params_t &vstreams_params); + static Expected> create_outputs(std::shared_ptr output_stream, + NameToVStreamParamsMap &vstreams_params_map, const std::map &output_vstream_infos); + static InputVStream create_input(std::shared_ptr input_vstream); + static OutputVStream create_output(std::shared_ptr output_vstream); + static Expected> create_output_nms(OutputStreamPtrVector &output_streams, + hailo_vstream_params_t vstreams_params, + const std::map &output_vstream_infos); + static Expected> create_output_vstreams_from_streams(const OutputStreamWithParamsVector &all_output_streams, + OutputStreamPtrVector &output_streams, const hailo_vstream_params_t &vstream_params, + const std::unordered_map &post_process_ops, + const std::unordered_map &op_inputs_to_op_name, const std::map &output_vstream_infos_map); + static Expected> create_output_post_process_nms(OutputStreamPtrVector &output_streams, + hailo_vstream_params_t vstreams_params, + const std::map &output_vstream_infos, + const std::shared_ptr &nms_op); + static Expected> add_hw_read_element(std::shared_ptr &output_stream, + std::vector> &elements, const std::string &element_name, const ElementBuildParams &build_params); + + static Expected> add_pull_queue_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, const hailo_vstream_params_t &vstream_params, size_t frame_size); + + // Move all post-processes related elements to a dedicated model - HRT-11512 + static Expected> add_argmax_element(std::shared_ptr &output_stream, + std::vector> &elements, const std::string &element_name, hailo_vstream_params_t &vstream_params, + const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata, const ElementBuildParams &build_params); + + static Expected> add_softmax_element(std::shared_ptr &output_stream, + std::vector> &elements, const std::string &element_name, hailo_vstream_params_t &vstream_params, + const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata, const ElementBuildParams &build_params); + + static Expected> add_nms_to_detections_convert_element(std::shared_ptr &output_stream, + std::vector> &elements, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata, + const ElementBuildParams &build_params); + + static Expected> add_remove_overlapping_bboxes_element(std::shared_ptr &output_stream, + std::vector> &elements, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata, + const ElementBuildParams &build_params); + + static Expected> add_fill_nms_format_element(std::shared_ptr &output_stream, + std::vector> &elements, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata, + const ElementBuildParams &build_params); + + static Expected> add_user_buffer_queue_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, const hailo_vstream_params_t &vstream_params, size_t frame_size); + + static Expected> add_post_infer_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, const hailo_vstream_params_t &vstream_params); + + static hailo_status add_demux(std::shared_ptr output_stream, NameToVStreamParamsMap &vstreams_params_map, + std::vector> &&elements, std::vector &vstreams, + std::shared_ptr last_elem, std::shared_ptr> pipeline_status, + const std::map &output_vstream_infos); + + static hailo_status handle_pix_buffer_splitter_flow(std::vector> streams, + const hailo_vstream_info_t &vstream_info, std::vector> &&base_elements, + std::vector &vstreams, const hailo_vstream_params_t &vstream_params, + std::shared_ptr> pipeline_status, EventPtr &core_op_activated_event, + AccumulatorPtr accumaltor); + + static hailo_status add_nms_fuse(OutputStreamPtrVector &output_streams, hailo_vstream_params_t &vstreams_params, + std::vector> &elements, std::vector &vstreams, + std::shared_ptr> pipeline_status, + const std::map &output_vstream_infos); + + static hailo_status add_nms_post_process(OutputStreamPtrVector &output_streams, hailo_vstream_params_t &vstreams_params, + std::vector> &elements, std::vector &vstreams, + std::shared_ptr> pipeline_status, + const std::map &output_vstream_infos, + const std::shared_ptr &nms_op); + + static Expected create_pipeline_latency_accumulator(const hailo_vstream_params_t &vstreams_params); + + static hailo_format_t expand_user_buffer_format_autos_multi_planar(const hailo_vstream_info_t &vstream_info, + const hailo_format_t &user_buffer_format) + { + /* In multi planar case we compare to vstream_info instead of stream_info, + as the ll-streams formats doesnt indicate the format of the vstreams */ + auto expanded_user_buffer_format = user_buffer_format; + if (HAILO_FORMAT_TYPE_AUTO == expanded_user_buffer_format.type) { + expanded_user_buffer_format.type = vstream_info.format.type; + } + if (HAILO_FORMAT_ORDER_AUTO == expanded_user_buffer_format.order) { + expanded_user_buffer_format.order = vstream_info.format.order; + } + + return expanded_user_buffer_format; + } + +private: + static Expected> create_output_post_process_argmax(std::shared_ptr output_stream, + const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info, + const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata); + static Expected> create_output_post_process_softmax(std::shared_ptr output_stream, + const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info, + const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata); + static Expected> create_output_post_process_iou(std::shared_ptr output_stream, + hailo_vstream_params_t vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata); +}; + +} /* namespace hailort */ + +#endif /* _HAILO_VSTREAM_BUILDER_HPP_ */ diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp b/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp index a78932b7..779ced76 100644 --- a/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp +++ b/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp @@ -31,8 +31,13 @@ #include "stream_common/stream_internal.hpp" -#include "hef/hef_internal.hpp" +#include "common/barrier.hpp" + #include "net_flow/pipeline/pipeline.hpp" +#include "net_flow/pipeline/filter_elements.hpp" +#include "net_flow/pipeline/queue_elements.hpp" +#include "net_flow/pipeline/edge_elements.hpp" +#include "net_flow/pipeline/multi_io_elements.hpp" #include "net_flow/ops/yolov5_post_process.hpp" #include "network_group/network_group_internal.hpp" @@ -79,7 +84,7 @@ class BaseVStream protected: BaseVStream(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator, + std::shared_ptr> &&pipeline_status, AccumulatorPtr pipeline_latency_accumulator, EventPtr &&core_op_activated_event, hailo_status &output_status); BaseVStream() = default; @@ -94,7 +99,6 @@ class BaseVStream volatile bool m_is_activated; volatile bool m_is_aborted; std::shared_ptr> m_pipeline_status; - EventPtr m_shutdown_event; EventPtr m_core_op_activated_event; std::map m_fps_accumulators; std::map m_latency_accumulators; @@ -109,7 +113,7 @@ class InputVStreamInternal : public BaseVStream static Expected> create(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, std::shared_ptr pipeline_exit, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, EventPtr core_op_activated_event, + std::shared_ptr> &&pipeline_status, EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator); InputVStreamInternal(InputVStreamInternal &&other) noexcept = default; InputVStreamInternal &operator=(InputVStreamInternal &&other) noexcept = default; @@ -125,7 +129,7 @@ class InputVStreamInternal : public BaseVStream protected: InputVStreamInternal(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator, + std::shared_ptr> &&pipeline_status, AccumulatorPtr pipeline_latency_accumulator, EventPtr &&core_op_activated_event, hailo_status &output_status); InputVStreamInternal() = default; }; @@ -137,7 +141,7 @@ class OutputVStreamInternal : public BaseVStream static Expected> create( const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, + std::shared_ptr> &&pipeline_status, EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator); OutputVStreamInternal(OutputVStreamInternal &&other) noexcept = default; OutputVStreamInternal &operator=(OutputVStreamInternal &&other) noexcept = default; @@ -150,11 +154,12 @@ class OutputVStreamInternal : public BaseVStream virtual hailo_status set_nms_score_threshold(float32_t threshold) = 0; virtual hailo_status set_nms_iou_threshold(float32_t threshold) = 0; virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) = 0; + virtual hailo_status set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size) = 0; protected: OutputVStreamInternal(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator, + std::shared_ptr> &&pipeline_status, AccumulatorPtr pipeline_latency_accumulator, EventPtr core_op_activated_event, hailo_status &output_status); OutputVStreamInternal() = default; }; @@ -165,7 +170,7 @@ class InputVStreamImpl : public InputVStreamInternal static Expected> create(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, std::shared_ptr pipeline_exit, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, EventPtr core_op_activated_event, + std::shared_ptr> &&pipeline_status, EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator); InputVStreamImpl(InputVStreamImpl &&) noexcept = default; InputVStreamImpl(const InputVStreamImpl &) = delete; @@ -181,7 +186,7 @@ class InputVStreamImpl : public InputVStreamInternal private: InputVStreamImpl(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator, + std::shared_ptr> &&pipeline_status, AccumulatorPtr pipeline_latency_accumulator, EventPtr core_op_activated_event, hailo_status &output_status); bool m_is_multi_planar; @@ -193,7 +198,7 @@ class OutputVStreamImpl : public OutputVStreamInternal static Expected> create( const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, + std::shared_ptr> &&pipeline_status, EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator); OutputVStreamImpl(OutputVStreamImpl &&) noexcept = default; OutputVStreamImpl(const OutputVStreamImpl &) = delete; @@ -206,11 +211,12 @@ class OutputVStreamImpl : public OutputVStreamInternal virtual hailo_status set_nms_score_threshold(float32_t threshold) override; virtual hailo_status set_nms_iou_threshold(float32_t threshold) override; virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) override; + virtual hailo_status set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size) override; private: OutputVStreamImpl(const hailo_vstream_info_t &vstream_info, const std::vector &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, std::vector> &&pipeline, - std::shared_ptr> &&pipeline_status, EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator, + std::shared_ptr> &&pipeline_status, AccumulatorPtr pipeline_latency_accumulator, EventPtr core_op_activated_event, hailo_status &output_status); }; @@ -294,6 +300,7 @@ class OutputVStreamClient : public OutputVStreamInternal virtual hailo_status set_nms_score_threshold(float32_t threshold) override; virtual hailo_status set_nms_iou_threshold(float32_t threshold) override; virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) override; + virtual hailo_status set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size) override; private: OutputVStreamClient(std::unique_ptr client, const VStreamIdentifier &&identifier, hailo_format_t &&user_buffer_format, @@ -308,640 +315,6 @@ class OutputVStreamClient : public OutputVStreamInternal }; #endif // HAILO_SUPPORT_MULTI_PROCESS -class PreInferElement : public FilterElement -{ -public: - static Expected> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, - const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector &dst_quant_infos, - const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags, - hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction = PipelineDirection::PUSH, bool is_dma_able = false, std::shared_ptr async_pipeline = nullptr); - static Expected> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, - const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector &dst_quant_infos, const std::string &name, - const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction = PipelineDirection::PUSH, bool is_dma_able = false, std::shared_ptr async_pipeline = nullptr); - static Expected> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, - const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector &dst_quant_infos, - const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PUSH, bool is_dma_able = false, - std::shared_ptr async_pipeline = nullptr); - PreInferElement(std::unique_ptr &&transform_context, BufferPoolPtr buffer_pool, - const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, PipelineDirection pipeline_direction, - std::shared_ptr async_pipeline); - virtual ~PreInferElement() = default; - - virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; - virtual PipelinePad &next_pad() override; - virtual std::string description() const override; - -protected: - virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; - -private: - std::unique_ptr m_transform_context; -}; - -class RemoveOverlappingBboxesElement : public FilterElement -{ -public: - static Expected> create( - const net_flow::NmsPostProcessConfig nms_config, const std::string &name, - hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, - std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, - size_t buffer_pool_size, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, - std::shared_ptr async_pipeline = nullptr); - static Expected> create(const net_flow::NmsPostProcessConfig nms_config, - const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, - bool is_last_copy_element = false, std::shared_ptr async_pipeline = nullptr); - RemoveOverlappingBboxesElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); - virtual ~RemoveOverlappingBboxesElement() = default; - virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual PipelinePad &next_pad() override; - virtual std::string description() const override; - - virtual hailo_status set_nms_iou_threshold(float32_t threshold) - { - m_nms_config.nms_iou_th = threshold; - return HAILO_SUCCESS; - } - -protected: - virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; - -private: - net_flow::NmsPostProcessConfig m_nms_config; -}; - -class PostInferElement : public FilterElement -{ -public: - static Expected> create(const hailo_3d_image_shape_t &src_image_shape, - const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, - const std::vector &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name, - hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, - std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, - size_t buffer_pool_size, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, - std::shared_ptr async_pipeline = nullptr); - static Expected> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, - const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector &dst_quant_info, const hailo_nms_info_t &nms_info, - const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr> pipeline_status, EventPtr shutdown_event, - PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, - std::shared_ptr async_pipeline = nullptr); - static Expected> create(const hailo_3d_image_shape_t &src_image_shape, - const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, - const std::vector &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name, - const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, - std::shared_ptr async_pipeline = nullptr); - PostInferElement(std::unique_ptr &&transform_context, const std::string &name, - DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, BufferPoolPtr buffer_pool, - std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); - virtual ~PostInferElement() = default; - virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; - virtual PipelinePad &next_pad() override; - virtual std::string description() const override; - -protected: - virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; - -private: - std::unique_ptr m_transform_context; -}; - -class ConvertNmsToDetectionsElement : public FilterElement -{ -public: - static Expected> create(const hailo_nms_info_t &nms_info, const std::string &name, - hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, - std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, - size_t buffer_pool_size, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, - std::shared_ptr async_pipeline = nullptr); - static Expected> create( - const hailo_nms_info_t &nms_info, const std::string &name, const ElementBuildParams &build_params, - PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, std::shared_ptr async_pipeline = nullptr); - ConvertNmsToDetectionsElement(const hailo_nms_info_t &&nms_info, const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); - virtual ~ConvertNmsToDetectionsElement() = default; - virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual PipelinePad &next_pad() override; - virtual std::string description() const override; - -protected: - virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; - -private: - hailo_nms_info_t m_nms_info; -}; - -class FillNmsFormatElement : public FilterElement -{ -public: - static Expected> create(const hailo_nms_info_t nms_info, - const hailo_format_t &dst_format, const net_flow::NmsPostProcessConfig nms_config, const std::string &name, - hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, - std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, - size_t buffer_pool_size, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, - std::shared_ptr async_pipeline = nullptr); - static Expected> create(const hailo_nms_info_t nms_info, - const hailo_format_t &dst_format, const net_flow::NmsPostProcessConfig nms_config, const std::string &name, - const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, - std::shared_ptr async_pipeline = nullptr); - FillNmsFormatElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); - virtual ~FillNmsFormatElement() = default; - virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual PipelinePad &next_pad() override; - virtual std::string description() const override; - - virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) override - { - m_nms_config.max_proposals_per_class = max_proposals_per_class; - return HAILO_SUCCESS; - } - -protected: - virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; - -private: - net_flow::NmsPostProcessConfig m_nms_config; -}; - -class ArgmaxPostProcessElement : public FilterElement -{ -public: - static Expected> create(std::shared_ptr argmax_op, - const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, - std::shared_ptr> pipeline_status, size_t buffer_pool_size, std::chrono::milliseconds timeout, - hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, PipelineDirection pipeline_direction = PipelineDirection::PULL, - bool is_last_copy_element = false, std::shared_ptr async_pipeline = nullptr); - static Expected> create(std::shared_ptr argmax_op, - const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, - bool is_last_copy_element = false, std::shared_ptr async_pipeline = nullptr); - ArgmaxPostProcessElement(std::shared_ptr argmax_op, const std::string &name, - DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, - std::chrono::milliseconds timeout, BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction, - std::shared_ptr async_pipeline); - virtual ~ArgmaxPostProcessElement() = default; - virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; - virtual PipelinePad &next_pad() override; - virtual std::string description() const override; - -protected: - virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; - -private: - std::shared_ptr m_argmax_op; -}; - -class SoftmaxPostProcessElement : public FilterElement -{ -public: - static Expected> create(std::shared_ptr softmax_op, - const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, - std::shared_ptr> pipeline_status, size_t buffer_pool_size, std::chrono::milliseconds timeout, - hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, - PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, - std::shared_ptr async_pipeline = nullptr); - static Expected> create(std::shared_ptr softmax_op, - const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, - bool is_last_copy_element = false, std::shared_ptr async_pipeline = nullptr); - SoftmaxPostProcessElement(std::shared_ptr softmax_op, const std::string &name, - DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, - std::chrono::milliseconds timeout, BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction, - std::shared_ptr async_pipeline); - virtual ~SoftmaxPostProcessElement() = default; - virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; - virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual PipelinePad &next_pad() override; - virtual std::string description() const override; - -protected: - virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; - -private: - std::shared_ptr m_softmax_op; -}; - -class NmsPostProcessMuxElement : public BaseMuxElement -{ -public: - static Expected> create(std::shared_ptr nms_op, - const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, - hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, - std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, - std::shared_ptr async_pipeline = nullptr); - static Expected> create(std::shared_ptr nms_op, - const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, - std::shared_ptr async_pipeline = nullptr); - static Expected> create(std::shared_ptr nms_op, - const std::string &name, const hailo_vstream_params_t &vstream_params, - EventPtr shutdown_event, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, - std::shared_ptr async_pipeline = nullptr); - NmsPostProcessMuxElement(std::shared_ptr nms_op, BufferPoolPtr &&pool, const std::string &name, - std::chrono::milliseconds timeout, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, PipelineDirection pipeline_direction, - std::shared_ptr async_pipeline); - - virtual std::vector get_queue_size_accumulators() override; - void add_sink_name(const std::string &name) // TODO: remove this (HRT-8875) - { - m_sinks_names.push_back(name); - } - - std::shared_ptr get_op() { return m_nms_op; } - - virtual hailo_status set_nms_score_threshold(float32_t threshold) - { - auto nms_metadata = std::dynamic_pointer_cast(get_op()->metadata()); - assert(nullptr != nms_metadata); - nms_metadata->nms_config().nms_score_th = threshold; - - return HAILO_SUCCESS; - } - - virtual hailo_status set_nms_iou_threshold(float32_t threshold) - { - auto nms_metadata = std::dynamic_pointer_cast(get_op()->metadata()); - assert(nullptr != nms_metadata); - nms_metadata->nms_config().nms_iou_th = threshold; - - return HAILO_SUCCESS; - } - - virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) - { - auto nms_metadata = std::dynamic_pointer_cast(get_op()->metadata()); - assert(nullptr != nms_metadata); - nms_metadata->nms_config().max_proposals_per_class = max_proposals_per_class; - - return HAILO_SUCCESS; - } - -protected: - virtual Expected action(std::vector &&inputs, PipelineBuffer &&optional) override; - -private: - std::shared_ptr m_nms_op; - std::vector m_sinks_names; // TODO: remove this (HRT-8875) -}; - -class NmsMuxElement : public BaseMuxElement -{ -public: - static Expected> create(const std::vector &nms_infos, - const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags, - hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, std::shared_ptr async_pipeline = nullptr); - static Expected> create(const std::vector &nms_infos, const std::string &name, - const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, - std::shared_ptr async_pipeline = nullptr); - static Expected> create(const std::vector &nms_infos, - const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, - bool is_last_copy_element = false, std::shared_ptr async_pipeline = nullptr); - NmsMuxElement(const std::vector &nms_infos, const hailo_nms_info_t &fused_nms_info, BufferPoolPtr &&pool, const std::string &name, - std::chrono::milliseconds timeout, DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); - const hailo_nms_info_t &get_fused_nms_info() const; - - virtual std::vector get_queue_size_accumulators() override; - -protected: - virtual Expected action(std::vector &&inputs, PipelineBuffer &&optional) override; - -private: - std::vector m_nms_infos; - hailo_nms_info_t m_fused_nms_info; -}; - -class TransformDemuxElement : public BaseDemuxElement -{ -public: - static Expected> create(std::shared_ptr demuxer, - const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags, - hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, - std::shared_ptr async_pipeline = nullptr); - static Expected> create(std::shared_ptr demuxer, - const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, - bool is_last_copy_element = false, std::shared_ptr async_pipeline = nullptr); - TransformDemuxElement(std::shared_ptr demuxer, std::vector &&pools, const std::string &name, - std::chrono::milliseconds timeout, DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); - virtual std::vector get_queue_size_accumulators() override; - -protected: - virtual Expected> action(PipelineBuffer &&input) override; - -private: - std::shared_ptr m_demuxer; -}; - -class PixBufferElement : public BaseDemuxElement -{ -public: - static Expected> create(const std::string &name, - std::chrono::milliseconds timeout, DurationCollector &&duration_collector, - std::shared_ptr> pipeline_status, hailo_format_order_t order, - std::shared_ptr async_pipeline = nullptr); - - PixBufferElement(const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, hailo_format_order_t order, - std::shared_ptr async_pipeline); - - virtual Expected can_push_buffer_upstream(const std::string &pad_name) override; - -protected: - virtual Expected> action(PipelineBuffer &&input); - hailo_format_order_t m_order; -}; - - -class HwReadElement : public SourceElement -{ -public: - static Expected> create(std::shared_ptr stream, const std::string &name, std::chrono::milliseconds timeout, - size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, - std::shared_ptr> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PULL); - HwReadElement(std::shared_ptr stream, BufferPoolPtr buffer_pool, const std::string &name, std::chrono::milliseconds timeout, - DurationCollector &&duration_collector, EventPtr shutdown_event, std::shared_ptr> &&pipeline_status, - PipelineDirection pipeline_direction); - virtual ~HwReadElement() = default; - - virtual std::vector get_queue_size_accumulators() override; - - virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; - virtual hailo_status execute_activate() override; - virtual hailo_status execute_deactivate() override; - virtual hailo_status execute_post_deactivate(bool should_clear_abort) override; - virtual hailo_status execute_clear() override; - virtual hailo_status execute_flush() override; - virtual hailo_status execute_abort() override; - virtual hailo_status execute_clear_abort() override; - virtual hailo_status execute_wait_for_finish() override; - uint32_t get_invalid_frames_count(); - virtual std::string description() const override; - -private: - std::shared_ptr m_stream; - BufferPoolPtr m_pool; - std::chrono::milliseconds m_timeout; - EventPtr m_shutdown_event; - WaitOrShutdown m_activation_wait_or_shutdown; -}; - -class HwWriteElement : public SinkElement -{ -public: - static Expected> create(std::shared_ptr stream, const std::string &name, - hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, - PipelineDirection pipeline_direction = PipelineDirection::PUSH); - HwWriteElement(std::shared_ptr stream, const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, EventPtr got_flush_event, PipelineDirection pipeline_direction); - virtual ~HwWriteElement() = default; - - virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; - virtual hailo_status execute_activate() override; - virtual hailo_status execute_deactivate() override; - virtual hailo_status execute_post_deactivate(bool should_clear_abort) override; - virtual hailo_status execute_clear() override; - virtual hailo_status execute_flush() override; - virtual hailo_status execute_abort() override; - virtual hailo_status execute_clear_abort() override; - virtual hailo_status execute_wait_for_finish() override; - virtual std::string description() const override; - -private: - std::shared_ptr m_stream; - EventPtr m_got_flush_event; -}; - -class LastAsyncElement : public SinkElement -{ -public: - static Expected> create(const std::string &name, - hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, - std::shared_ptr async_pipeline, PipelineDirection pipeline_direction = PipelineDirection::PUSH); - static Expected> create(const std::string &name, const ElementBuildParams &build_params, - std::shared_ptr async_pipeline, PipelineDirection pipeline_direction = PipelineDirection::PUSH); - LastAsyncElement(const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, - PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); - virtual ~LastAsyncElement() = default; - - virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; - virtual std::string description() const override; - virtual hailo_status execute_activate() override; - virtual hailo_status execute_wait_for_finish() override; - - virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override; - - virtual Expected can_push_buffer_upstream(const uint32_t source_index) override; - virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override; - virtual Expected can_push_buffer_upstream(const std::string &source_name) override; - virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override; - - virtual hailo_status execute_post_deactivate(bool /*should_clear_abort*/) override { return HAILO_SUCCESS; }; - virtual hailo_status execute_deactivate() override { return HAILO_SUCCESS; }; -}; - -// Note: This element does infer - it sends writes to HW and reads the outputs -class AsyncHwElement : public PipelineElementInternal -{ -public: - static Expected> create(const std::unordered_map &named_stream_infos, - std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags, - hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, const std::string &name, - std::shared_ptr> pipeline_status, - std::shared_ptr net_group, PipelineDirection pipeline_direction = PipelineDirection::PUSH, - bool is_last_copy_element = false, std::shared_ptr async_pipeline = nullptr); - AsyncHwElement(const std::unordered_map &named_stream_infos, std::chrono::milliseconds timeout, - std::vector &&output_streams_pools, const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status, PipelineDirection pipeline_direction, - std::shared_ptr async_pipeline, std::shared_ptr net_group, - const size_t max_ongoing_transfers); - virtual ~AsyncHwElement() = default; - - virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override; - virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; - - virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override; - virtual Expected can_push_buffer_upstream(const uint32_t source_index) override; - virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override; - virtual Expected can_push_buffer_upstream(const std::string &source_name) override; - virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override; - - Expected get_source_index_from_output_stream_name(const std::string &output_stream_name); - Expected get_sink_index_from_input_stream_name(const std::string &input_stream_name); - virtual Expected get_source_index_from_source_name(const std::string &source_name) override; - -protected: - virtual std::vector execution_pads() override; - virtual hailo_status execute_terminate(hailo_status error_status) override; - -private: - void handle_error_in_hw_async_elem(hailo_status error_status); - bool has_all_sinks_arrived(); - virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override; - - std::chrono::milliseconds m_timeout; - std::vector m_pools; - std::shared_ptr m_net_group; - size_t m_max_ongoing_transfers; - - std::unordered_map m_sink_name_to_stream_name; - std::unordered_map m_source_name_to_stream_name; - std::unordered_map m_sink_has_arrived; - std::unordered_map m_input_buffers; - std::mutex m_mutex; - std::condition_variable m_cv; - std::unordered_map m_source_name_to_index; - std::unordered_map m_sink_name_to_index; -}; - -class CopyBufferElement : public FilterElement -{ -public: - static Expected> create(const std::string &name, std::shared_ptr> pipeline_status, - std::chrono::milliseconds timeout, PipelineDirection pipeline_direction = PipelineDirection::PULL, std::shared_ptr async_pipeline = nullptr); - CopyBufferElement(const std::string &name, DurationCollector &&duration_collector, std::shared_ptr> pipeline_status, - std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr async_pipeline); - virtual ~CopyBufferElement() = default; - virtual PipelinePad &next_pad() override; - -protected: - virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; -}; - -class VStreamsBuilderUtils -{ -public: - static Expected> create_inputs(std::vector> input_streams, const hailo_vstream_info_t &input_vstream_infos, - const hailo_vstream_params_t &vstreams_params); - static Expected> create_outputs(std::shared_ptr output_stream, - NameToVStreamParamsMap &vstreams_params_map, const std::map &output_vstream_infos); - static InputVStream create_input(std::shared_ptr input_vstream); - static OutputVStream create_output(std::shared_ptr output_vstream); - static Expected> create_output_nms(OutputStreamPtrVector &output_streams, - hailo_vstream_params_t vstreams_params, - const std::map &output_vstream_infos); - static Expected> create_output_vstreams_from_streams(const OutputStreamWithParamsVector &all_output_streams, - OutputStreamPtrVector &output_streams, const hailo_vstream_params_t &vstream_params, - const std::unordered_map &post_process_ops, - const std::unordered_map &op_inputs_to_op_name, const std::map &output_vstream_infos_map); - static Expected> create_output_post_process_nms(OutputStreamPtrVector &output_streams, - hailo_vstream_params_t vstreams_params, - const std::map &output_vstream_infos, - const std::shared_ptr &nms_op); - static Expected> add_hw_read_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, EventPtr &shutdown_event, size_t buffer_pool_size, - const hailo_pipeline_elem_stats_flags_t &hw_read_element_stats_flags, const hailo_vstream_stats_flags_t &hw_read_stream_stats_flags); - - static Expected> add_pull_queue_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params); - - // Move all post-processes related elements to a dedicated model - HRT-11512 - static Expected> add_argmax_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &argmax_op, - size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, - EventPtr &shutdown_event); - - static Expected> add_softmax_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &softmax_op, - size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, - EventPtr &shutdown_event); - - static Expected> add_nms_to_detections_convert_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, const std::string &element_name, - hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata, size_t buffer_pool_size, std::chrono::milliseconds timeout, - const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event); - - static Expected> add_remove_overlapping_bboxes_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata, - size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event); - - static Expected> add_fill_nms_format_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata, - size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event); - - static Expected> add_user_buffer_queue_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params); - - static Expected> add_post_infer_element(std::shared_ptr &output_stream, - std::shared_ptr> &pipeline_status, std::vector> &elements, - const std::string &element_name, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event); - - static hailo_status add_demux(std::shared_ptr output_stream, NameToVStreamParamsMap &vstreams_params_map, - std::vector> &&elements, std::vector &vstreams, - std::shared_ptr hw_read_elem, EventPtr shutdown_event, std::shared_ptr> pipeline_status, - const std::map &output_vstream_infos); - - static hailo_status handle_pix_buffer_splitter_flow(std::vector> streams, - const hailo_vstream_info_t &vstream_info, std::vector> &&base_elements, - std::vector &vstreams, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event, - std::shared_ptr> pipeline_status, EventPtr &core_op_activated_event, - AccumulatorPtr accumaltor); - - static hailo_status add_nms_fuse(OutputStreamPtrVector &output_streams, hailo_vstream_params_t &vstreams_params, - std::vector> &elements, std::vector &vstreams, - EventPtr shutdown_event, std::shared_ptr> pipeline_status, - const std::map &output_vstream_infos); - - static hailo_status add_nms_post_process(OutputStreamPtrVector &output_streams, hailo_vstream_params_t &vstreams_params, - std::vector> &elements, std::vector &vstreams, - EventPtr shutdown_event, std::shared_ptr> pipeline_status, - const std::map &output_vstream_infos, - const std::shared_ptr &nms_op); - - static Expected create_pipeline_latency_accumulator(const hailo_vstream_params_t &vstreams_params); - - static hailo_format_t expand_user_buffer_format_autos_multi_planar(const hailo_vstream_info_t &vstream_info, - const hailo_format_t &user_buffer_format) - { - /* In multi planar case we compare to vstream_info instead of stream_info, - as the ll-streams formats doesnt indicate the format of the vstreams */ - auto expanded_user_buffer_format = user_buffer_format; - if (HAILO_FORMAT_TYPE_AUTO == expanded_user_buffer_format.type) { - expanded_user_buffer_format.type = vstream_info.format.type; - } - if (HAILO_FORMAT_ORDER_AUTO == expanded_user_buffer_format.order) { - expanded_user_buffer_format.order = vstream_info.format.order; - } - - return expanded_user_buffer_format; - } - -private: - static Expected> create_output_post_process_argmax(std::shared_ptr output_stream, - const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info, - const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata); - static Expected> create_output_post_process_softmax(std::shared_ptr output_stream, - const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info, - const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata); - static Expected> create_output_post_process_iou(std::shared_ptr output_stream, - hailo_vstream_params_t vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata); -}; - } /* namespace hailort */ #endif /* _HAILO_VSTREAM_INTERNAL_HPP_ */ diff --git a/hailort/libhailort/src/network_group/network_group.cpp b/hailort/libhailort/src/network_group/network_group.cpp index dfc59160..569ead8d 100644 --- a/hailort/libhailort/src/network_group/network_group.cpp +++ b/hailort/libhailort/src/network_group/network_group.cpp @@ -17,14 +17,15 @@ #include "common/os_utils.hpp" #include "network_group/network_group_internal.hpp" -#include "hef/hef_internal.hpp" #include "eth/eth_stream.hpp" #include "vdma/vdma_stream.hpp" #include "mipi/mipi_stream.hpp" #include "device_common/control.hpp" -#include "net_flow/pipeline/vstream_internal.hpp" +#include "net_flow/pipeline/vstream_builder.hpp" +#include "net_flow/ops_metadata/yolov5_seg_op_metadata.hpp" #include "core_op/resource_manager/resource_manager.hpp" - +#include "utils/buffer_storage.hpp" +#include "hef/hef_internal.hpp" namespace hailort { @@ -37,7 +38,7 @@ class ActivatedNetworkGroupImpl : public ActivatedNetworkGroup { auto status = HAILO_UNINITIALIZED; std::unique_ptr ang = make_unique_nothrow(cng, status); CHECK_NOT_NULL_AS_EXPECTED(ang, HAILO_OUT_OF_HOST_MEMORY); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { LOGGER__ERROR("Network group activation failed because some of the low level streams are aborted. Make sure to run clear_abort before activating!"); return make_unexpected(status); } @@ -84,7 +85,7 @@ class ActivatedNetworkGroupImpl : public ActivatedNetworkGroup { m_cng(cng) { auto activate_status = m_cng.activate_impl(); - if (HAILO_STREAM_ABORTED_BY_USER == activate_status) { + if (HAILO_STREAM_ABORT == activate_status) { LOGGER__INFO("Network group activation failed because it was aborted by user"); status = activate_status; return; @@ -159,18 +160,13 @@ Expected> ConfiguredNetworkGroup::activat return activate(HailoRTDefaults::get_active_network_group_params()); } -hailo_status ConfiguredNetworkGroup::wait_for_callbacks_finish() -{ - return wait_for_callbacks_to_maintain_below_threshold(1); -} - -hailo_status ConfiguredNetworkGroup::wait_for_callbacks_to_maintain_below_threshold(const size_t threshold) +hailo_status ConfiguredNetworkGroup::wait_for_ongoing_callbacks_count_under(const size_t threshold) { std::unique_lock lock(m_infer_requests_mutex); bool done = m_cv.wait_for(lock, DEFAULT_TRANSFER_TIMEOUT, [&, threshold](){ return (m_ongoing_transfers.load() < threshold); }); - CHECK(done, HAILO_TIMEOUT, "Got timeout in `wait_for_callbacks_to_maintain_below_threshold`"); + CHECK(done, HAILO_TIMEOUT); return HAILO_SUCCESS; } @@ -303,7 +299,7 @@ Expected> ConfiguredNetworkGroupBase::get_layer_info( return res; } -Expected> ConfiguredNetworkGroupBase::get_nms_meta_data(const std::string &edge_name) +Expected ConfiguredNetworkGroupBase::get_op_meta_data(const std::string &edge_name) { auto expected_ops_metadata = get_ops_metadata(); CHECK_EXPECTED(expected_ops_metadata); @@ -319,9 +315,19 @@ Expected> ConfiguredNetworkGroupBase::g return false; }); CHECK_AS_EXPECTED(matching_metadata != ops_metadata.end(), HAILO_INVALID_ARGUMENT, - "There is no NMS post-process for '{}'", edge_name); + "There is no post-process metadata for '{}'", edge_name); + auto metadata = (*matching_metadata); + return metadata; +} + +Expected> ConfiguredNetworkGroupBase::get_nms_meta_data(const std::string &edge_name) +{ + auto matching_metadata = get_op_meta_data(edge_name); + CHECK_EXPECTED(matching_metadata); + auto nms_metadata = std::dynamic_pointer_cast(*matching_metadata); - CHECK_NOT_NULL_AS_EXPECTED(nms_metadata, HAILO_INVALID_ARGUMENT); + CHECK((nms_metadata != nullptr), HAILO_INVALID_ARGUMENT, + "Failed to get nms metadata for `{}`. Op's metadata is not nms metadata", edge_name); return nms_metadata; } @@ -349,6 +355,19 @@ hailo_status ConfiguredNetworkGroupBase::set_nms_max_bboxes_per_class(const std: return HAILO_SUCCESS; } +hailo_status ConfiguredNetworkGroupBase::set_nms_max_accumulated_mask_size(const std::string &edge_name, uint32_t max_accumulated_mask_size) +{ + auto expected_op_metadata = get_op_meta_data(edge_name); + CHECK_EXPECTED_AS_STATUS(expected_op_metadata); + + auto nms_metadata = std::dynamic_pointer_cast(expected_op_metadata.value()); + CHECK((nms_metadata != nullptr), HAILO_INVALID_ARGUMENT, + "Failed to `set_nms_max_accumulated_mask_size` for `{}`. Op's metadata is not YOLOv5-Seg metadata", edge_name); + + nms_metadata->yolov5seg_config().max_accumulated_mask_size = max_accumulated_mask_size; + return HAILO_SUCCESS; +} + ConfiguredNetworkGroupBase::ConfiguredNetworkGroupBase( const ConfigureNetworkParams &config_params, std::vector> &&core_ops, NetworkGroupMetadata &&metadata) : @@ -512,7 +531,7 @@ hailo_status ConfiguredNetworkGroupBase::deactivate_impl() hailo_status ConfiguredNetworkGroupBase::shutdown() { - std::unique_lock lock(m_shutdown_mutex); + std::unique_lock lock(m_mutex); if (!m_is_shutdown) { m_is_shutdown = true; return get_core_op()->shutdown(); @@ -773,9 +792,6 @@ Expected ConfiguredNetworkGroupBase::get_min_buffer_pool_size() } } - // TODO (HRT-11294): In some cases, buffer_pool_size is lower then batch_size. we should remove this line. - buffer_pool_size = std::max(buffer_pool_size, static_cast(get_smallest_configured_batch_size(get_config_params()))); - return buffer_pool_size; } @@ -783,31 +799,14 @@ hailo_status ConfiguredNetworkGroupBase::infer_async(const NamedBuffersCallbacks const std::function &infer_request_done_cb) { InferRequest infer_request{}; - const auto dma_able_alignment = OsUtils::get_dma_able_alignment(); for (auto &named_buffer_callback : named_buffers_callbacks) { const auto &name = named_buffer_callback.first; const auto &buffer = named_buffer_callback.second.first; const auto &callback = named_buffer_callback.second.second; - TransferRequest trans_req{}; - trans_req.callback = callback; - BufferPtr buffer_ptr = nullptr; - // TODO (HRT-12239): Avoid this section - if (reinterpret_cast(buffer.data()) % dma_able_alignment == 0) { - auto hailo_buffer = DmaStorage::create_dma_able_buffer_from_user_size(const_cast(buffer.data()), - buffer.size()); - CHECK_EXPECTED_AS_STATUS(hailo_buffer); - buffer_ptr = hailo_buffer.release(); - } else { - auto hailo_buffer = UserBufferStorage::create_storage_from_user_buffer(const_cast(buffer.data()), - buffer.size()); - CHECK_EXPECTED_AS_STATUS(hailo_buffer); - buffer_ptr = hailo_buffer.release(); - } - trans_req.transfer_buffers.emplace_back(buffer_ptr); - infer_request.transfers.emplace(name, trans_req); + infer_request.transfers.emplace(name, TransferRequest{buffer, callback}); } infer_request.callback = [this, infer_request_done_cb](hailo_status status){ - if (status == HAILO_STREAM_ABORTED_BY_USER) { + if (status == HAILO_STREAM_ABORT) { LOGGER__INFO("Infer request was aborted by user"); } else if (status != HAILO_SUCCESS) { @@ -818,7 +817,8 @@ hailo_status ConfiguredNetworkGroupBase::infer_async(const NamedBuffersCallbacks decrease_ongoing_callbacks(); }; - increase_ongoing_callbacks(); + increase_ongoing_callbacks(); // Increase before lunch, as the cb may be called before we got the chance to increase the counter + std::unique_lock lock(m_mutex); auto status = get_core_op()->infer_async(std::move(infer_request)); if (status != HAILO_SUCCESS) { // If we got error in `infer_async()`, then the callbacks will not be called. diff --git a/hailort/libhailort/src/network_group/network_group_internal.hpp b/hailort/libhailort/src/network_group/network_group_internal.hpp index 98b8f987..87b4a635 100644 --- a/hailort/libhailort/src/network_group/network_group_internal.hpp +++ b/hailort/libhailort/src/network_group/network_group_internal.hpp @@ -32,10 +32,11 @@ #include "common/latency_meter.hpp" -#include "hef/hef_internal.hpp" #include "core_op/active_core_op_holder.hpp" #include "core_op/core_op.hpp" +#include "net_flow/ops_metadata/nms_op_metadata.hpp" + #ifdef HAILO_SUPPORT_MULTI_PROCESS #include "service/hailort_rpc_client.hpp" #include "rpc/rpc_definitions.hpp" @@ -198,10 +199,12 @@ class ConfiguredNetworkGroupBase : public ConfiguredNetworkGroup virtual Expected> get_layer_info(const std::string &stream_name) override; virtual Expected> get_ops_metadata() override; + Expected get_op_meta_data(const std::string &edge_name); virtual hailo_status set_nms_score_threshold(const std::string &edge_name, float32_t nms_score_threshold) override; virtual hailo_status set_nms_iou_threshold(const std::string &edge_name, float32_t iou_threshold) override; virtual hailo_status set_nms_max_bboxes_per_class(const std::string &edge_name, uint32_t max_bboxes_per_class) override; + virtual hailo_status set_nms_max_accumulated_mask_size(const std::string &edge_name, uint32_t max_accumulated_mask_size) override; Expected> get_nms_meta_data(const std::string &edge_name); private: @@ -221,10 +224,10 @@ class ConfiguredNetworkGroupBase : public ConfiguredNetworkGroup bool m_is_shutdown = false; bool m_is_forked; - std::mutex m_shutdown_mutex; + std::mutex m_mutex; friend class VDeviceCoreOp; - friend class PipelineBuilder; + friend class AsyncPipelineBuilder; }; // Move client ng to different header @@ -322,12 +325,15 @@ class ConfiguredNetworkGroupClient : public ConfiguredNetworkGroup const std::function &infer_request_done_cb) override; hailo_status execute_callback(const ProtoCallbackIdentifier &cb_id); + void execute_callbacks_on_error(hailo_status error_status); + virtual Expected> get_layer_info(const std::string &stream_name) override; virtual Expected> get_ops_metadata() override; virtual hailo_status set_nms_score_threshold(const std::string &edge_name, float32_t nms_score_threshold) override; virtual hailo_status set_nms_iou_threshold(const std::string &edge_name, float32_t iou_threshold) override; virtual hailo_status set_nms_max_bboxes_per_class(const std::string &edge_name, uint32_t max_bboxes_per_class) override; + virtual hailo_status set_nms_max_accumulated_mask_size(const std::string &edge_name, uint32_t max_accumulated_mask_size) override; private: ConfiguredNetworkGroupClient(NetworkGroupIdentifier &&identifier, const std::string &network_group_name); diff --git a/hailort/libhailort/src/os/driver_scan.hpp b/hailort/libhailort/src/os/driver_scan.hpp deleted file mode 100644 index c8620aa5..00000000 --- a/hailort/libhailort/src/os/driver_scan.hpp +++ /dev/null @@ -1,22 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file driver_scan.hpp - * @brief Get list and parse pcie driver info - **/ - -#include "os/hailort_driver.hpp" - -namespace hailort -{ - -Expected> list_devices(); -#ifndef __QNX__ -Expected query_device_info(const std::string &device_name); -#else // __QNX__ -Expected query_device_info(const std::string &device_name, uint32_t index); -#endif // __QNX__ - -} /* namespace hailort */ diff --git a/hailort/libhailort/src/os/microsec_timer.hpp b/hailort/libhailort/src/os/microsec_timer.hpp index 87f70bcd..7d4ce71d 100644 --- a/hailort/libhailort/src/os/microsec_timer.hpp +++ b/hailort/libhailort/src/os/microsec_timer.hpp @@ -32,6 +32,11 @@ class MicrosecTimer final * @note This function is guaranteed to sleep for at least the desired time, though it may sleep for more. */ static void sleep(uint64_t microsecs); + + static void sleep(std::chrono::microseconds microsecs) + { + sleep(microsecs.count()); + } }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/os/posix/CMakeLists.txt b/hailort/libhailort/src/os/posix/CMakeLists.txt index 2aa2e8ae..b8fd6eea 100644 --- a/hailort/libhailort/src/os/posix/CMakeLists.txt +++ b/hailort/libhailort/src/os/posix/CMakeLists.txt @@ -10,7 +10,6 @@ set(files ${CMAKE_CURRENT_SOURCE_DIR}/microsec_timer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/file_descriptor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mmap_buffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/hailort_driver.cpp ) set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${files} PARENT_SCOPE) diff --git a/hailort/libhailort/src/os/posix/linux/CMakeLists.txt b/hailort/libhailort/src/os/posix/linux/CMakeLists.txt index cffd810c..779505b3 100644 --- a/hailort/libhailort/src/os/posix/linux/CMakeLists.txt +++ b/hailort/libhailort/src/os/posix/linux/CMakeLists.txt @@ -1,8 +1,8 @@ cmake_minimum_required(VERSION 3.0.0) set(files - ${CMAKE_CURRENT_SOURCE_DIR}/driver_scan.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/dma_buffer_utils.cpp ) set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${files} PARENT_SCOPE) diff --git a/hailort/libhailort/src/os/posix/linux/dma_buffer_utils.cpp b/hailort/libhailort/src/os/posix/linux/dma_buffer_utils.cpp new file mode 100644 index 00000000..0f9a7166 --- /dev/null +++ b/hailort/libhailort/src/os/posix/linux/dma_buffer_utils.cpp @@ -0,0 +1,79 @@ +/** + * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file dma_buffer_utils.cpp + * @brief A module for managing DMA buffers on Linux + **/ +#include +#include +#include + + +#include "hailo/hailort.h" +#include "hailo/event.hpp" +#include "common/utils.hpp" +#include "utils/dma_buffer_utils.hpp" + +namespace hailort +{ + +Expected DmaBufferUtils::mmap_dma_buffer_write(hailo_dma_buffer_t dma_buffer) +{ + void* dma_buf_ptr = mmap(NULL, dma_buffer.size, PROT_WRITE, MAP_SHARED, dma_buffer.fd, 0); + CHECK_AS_EXPECTED(MAP_FAILED != dma_buf_ptr, HAILO_INTERNAL_FAILURE, "Failed to run mmap on DMA buffer for writing"); + + struct dma_buf_sync sync = { + .flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE, + }; + auto err = ioctl(dma_buffer.fd, DMA_BUF_IOCTL_SYNC, &sync); + CHECK_AS_EXPECTED(0 == err, HAILO_INTERNAL_FAILURE, "Failed to run DMA_BUF_IOCTL_SYNC ioctl, errno {}", err); + + return MemoryView(dma_buf_ptr, dma_buffer.size); +} + +hailo_status DmaBufferUtils::munmap_dma_buffer_write(hailo_dma_buffer_t dma_buffer, MemoryView dma_buffer_memview) +{ + struct dma_buf_sync sync = { + .flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE, + }; + + auto err = ioctl(dma_buffer.fd, DMA_BUF_IOCTL_SYNC, &sync); + CHECK(0 == err, HAILO_INTERNAL_FAILURE, "Failed to run DMA_BUF_IOCTL_SYNC ioctl, errno {}", err); + + err = munmap(dma_buffer_memview.data(), dma_buffer.size); + CHECK(0 == err, HAILO_INTERNAL_FAILURE, "Failed to munmap dma buffer, errno {}", err); + + return HAILO_SUCCESS; +} + +Expected DmaBufferUtils::mmap_dma_buffer_read(hailo_dma_buffer_t dma_buffer) +{ + void* dma_buf_ptr = mmap(NULL, dma_buffer.size, PROT_READ, MAP_SHARED, dma_buffer.fd, 0); + CHECK_AS_EXPECTED(MAP_FAILED != dma_buf_ptr, HAILO_INTERNAL_FAILURE, "Failed to run mmap on DMA buffer for reading"); + + struct dma_buf_sync sync = { + .flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ, + }; + auto err = ioctl(dma_buffer.fd, DMA_BUF_IOCTL_SYNC, &sync); + CHECK_AS_EXPECTED(0 == err, HAILO_INTERNAL_FAILURE, "Failed to run DMA_BUF_IOCTL_SYNC ioctl, errno {}", err); + + return MemoryView(dma_buf_ptr, dma_buffer.size); +} + +hailo_status DmaBufferUtils::munmap_dma_buffer_read(hailo_dma_buffer_t dma_buffer, MemoryView dma_buffer_memview) +{ + struct dma_buf_sync sync = { + .flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ, + }; + auto err = ioctl(dma_buffer.fd, DMA_BUF_IOCTL_SYNC, &sync); + CHECK(0 == err, HAILO_INTERNAL_FAILURE, "Failed to run DMA_BUF_IOCTL_SYNC ioctl, errno {}", err); + + err = munmap(dma_buffer_memview.data(), dma_buffer.size); + CHECK(0 == err, HAILO_INTERNAL_FAILURE, "Failed to unmap dma buffer, errno {}", err); + + return HAILO_SUCCESS; +} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/os/posix/linux/event.cpp b/hailort/libhailort/src/os/posix/linux/event.cpp index 3e1e9477..b12d0d32 100644 --- a/hailort/libhailort/src/os/posix/linux/event.cpp +++ b/hailort/libhailort/src/os/posix/linux/event.cpp @@ -180,14 +180,15 @@ Expected Semaphore::create(uint32_t initial_count) return Semaphore(handle); } -SemaphorePtr Semaphore::create_shared(uint32_t initial_count) +Expected Semaphore::create_shared(uint32_t initial_count) { const auto handle = open_semaphore_handle(initial_count); - if (-1 == handle) { - return nullptr; - } + CHECK_AS_EXPECTED(-1 != handle, HAILO_EVENT_CREATE_FAIL); - return make_shared_nothrow(handle); + auto res = make_shared_nothrow(handle); + CHECK_NOT_NULL_AS_EXPECTED(res, HAILO_OUT_OF_HOST_MEMORY); + + return res; } hailo_status Semaphore::signal() diff --git a/hailort/libhailort/src/os/posix/mmap_buffer.cpp b/hailort/libhailort/src/os/posix/mmap_buffer.cpp index 09391182..6144f05e 100644 --- a/hailort/libhailort/src/os/posix/mmap_buffer.cpp +++ b/hailort/libhailort/src/os/posix/mmap_buffer.cpp @@ -8,7 +8,7 @@ **/ #include "os/mmap_buffer.hpp" -#include "os/hailort_driver.hpp" +#include "vdma/driver/hailort_driver.hpp" #include "hailo_ioctl_common.h" #include diff --git a/hailort/libhailort/src/os/posix/qnx/CMakeLists.txt b/hailort/libhailort/src/os/posix/qnx/CMakeLists.txt index cffd810c..779505b3 100644 --- a/hailort/libhailort/src/os/posix/qnx/CMakeLists.txt +++ b/hailort/libhailort/src/os/posix/qnx/CMakeLists.txt @@ -1,8 +1,8 @@ cmake_minimum_required(VERSION 3.0.0) set(files - ${CMAKE_CURRENT_SOURCE_DIR}/driver_scan.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/dma_buffer_utils.cpp ) set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${files} PARENT_SCOPE) diff --git a/hailort/libhailort/src/os/posix/qnx/dma_buffer_utils.cpp b/hailort/libhailort/src/os/posix/qnx/dma_buffer_utils.cpp new file mode 100644 index 00000000..4937a7f4 --- /dev/null +++ b/hailort/libhailort/src/os/posix/qnx/dma_buffer_utils.cpp @@ -0,0 +1,38 @@ +/** + * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file dma_buffer_utils.cpp + * @brief A module for managing DMA buffers on QNX (not supported) + **/ + +#include "hailo/hailort.h" +#include "hailo/expected.hpp" +#include "utils/dma_buffer_utils.hpp" + + +namespace hailort +{ + +Expected DmaBufferUtils::mmap_dma_buffer_write(hailo_dma_buffer_t /*dma_buffer*/) +{ + return make_unexpected(HAILO_NOT_IMPLEMENTED); +} + +hailo_status DmaBufferUtils::munmap_dma_buffer_write(hailo_dma_buffer_t /*dma_buffer*/, MemoryView /*dma_buffer_memview*/) +{ + return HAILO_NOT_IMPLEMENTED; +} + +Expected DmaBufferUtils::mmap_dma_buffer_read(hailo_dma_buffer_t /*dma_buffer*/) +{ + return make_unexpected(HAILO_NOT_IMPLEMENTED); +} + +hailo_status DmaBufferUtils::munmap_dma_buffer_read(hailo_dma_buffer_t /*dma_buffer*/, MemoryView /*dma_buffer_memview*/) +{ + return HAILO_NOT_IMPLEMENTED; +} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/os/posix/qnx/event.cpp b/hailort/libhailort/src/os/posix/qnx/event.cpp index 312b6991..855cdf2e 100644 --- a/hailort/libhailort/src/os/posix/qnx/event.cpp +++ b/hailort/libhailort/src/os/posix/qnx/event.cpp @@ -120,14 +120,15 @@ Expected Semaphore::create(uint32_t initial_count) return std::move(Semaphore(handle, initial_count)); } -SemaphorePtr Semaphore::create_shared(uint32_t initial_count) +Expected Semaphore::create_shared(uint32_t initial_count) { const auto handle = open_semaphore_handle(initial_count); - if (INVALID_EVENT_HANDLE == handle) { - return nullptr; - } + CHECK(INVALID_EVENT_HANDLE != handle, HAILO_EVENT_CREATE_FAIL); - return make_shared_nothrow(handle, initial_count); + auto res = make_shared_nothrow(handle, initial_count); + CHECK_NOT_NULL(res, HAILO_OUT_OF_HOST_MEMORY); + + return res; } hailo_status Semaphore::signal() diff --git a/hailort/libhailort/src/os/windows/CMakeLists.txt b/hailort/libhailort/src/os/windows/CMakeLists.txt index bba4bed1..c7ad1a20 100644 --- a/hailort/libhailort/src/os/windows/CMakeLists.txt +++ b/hailort/libhailort/src/os/windows/CMakeLists.txt @@ -4,9 +4,8 @@ set(files ${CMAKE_CURRENT_SOURCE_DIR}/microsec_timer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/file_descriptor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mmap_buffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/hailort_driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/driver_scan.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/dma_buffer_utils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/virtual_alloc_guard.cpp ) diff --git a/hailort/libhailort/src/os/windows/dma_buffer_utils.cpp b/hailort/libhailort/src/os/windows/dma_buffer_utils.cpp new file mode 100644 index 00000000..b8404bd4 --- /dev/null +++ b/hailort/libhailort/src/os/windows/dma_buffer_utils.cpp @@ -0,0 +1,36 @@ +/** + * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file dma_buffer_utils.cpp + * @brief A module for managing DMA buffers on Windows (not supported) + **/ + +#include "utils/dma_buffer_utils.hpp" + + +namespace hailort +{ + +Expected DmaBufferUtils::mmap_dma_buffer_write(hailo_dma_buffer_t /*dma_buffer*/) +{ + return make_unexpected(HAILO_NOT_IMPLEMENTED); +} + +hailo_status DmaBufferUtils::munmap_dma_buffer_write(hailo_dma_buffer_t /*dma_buffer*/, MemoryView /*dma_buffer_memview*/) +{ + return HAILO_NOT_IMPLEMENTED; +} + +Expected DmaBufferUtils::mmap_dma_buffer_read(hailo_dma_buffer_t /*dma_buffer*/) +{ + return make_unexpected(HAILO_NOT_IMPLEMENTED); +} + +hailo_status DmaBufferUtils::munmap_dma_buffer_read(hailo_dma_buffer_t /*dma_buffer*/, MemoryView /*dma_buffer_memview*/) +{ + return HAILO_NOT_IMPLEMENTED; +} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/os/windows/event.cpp b/hailort/libhailort/src/os/windows/event.cpp index 3512631d..9cf851f6 100644 --- a/hailort/libhailort/src/os/windows/event.cpp +++ b/hailort/libhailort/src/os/windows/event.cpp @@ -129,14 +129,15 @@ Expected Semaphore::create(uint32_t initial_count) return std::move(Semaphore(handle)); } -SemaphorePtr Semaphore::create_shared(uint32_t initial_count) +Expected Semaphore::create_shared(uint32_t initial_count) { const auto handle = open_semaphore_handle(initial_count); - if (nullptr == handle) { - return nullptr; - } + CHECK_AS_EXPECTED(nullptr != handle, HAILO_EVENT_CREATE_FAIL); - return make_shared_nothrow(handle); + auto res = make_shared_nothrow(handle); + CHECK_NOT_NULL_AS_EXPECTED(res, HAILO_OUT_OF_HOST_MEMORY); + + return res; } hailo_status Semaphore::signal() diff --git a/hailort/libhailort/src/os/windows/hailort_driver.cpp b/hailort/libhailort/src/os/windows/hailort_driver.cpp deleted file mode 100644 index f614f17e..00000000 --- a/hailort/libhailort/src/os/windows/hailort_driver.cpp +++ /dev/null @@ -1,947 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file hailort_driver.cpp - * @brief Low level interface to PCI driver - **/ - -#include "os/windows/osdep.hpp" -#include "os/hailort_driver.hpp" -#include "os/driver_scan.hpp" -#include "common/logger_macros.hpp" -#include "common/utils.hpp" -#include "common/os/windows/string_conversion.hpp" -#include "os/mmap_buffer.hpp" -#include "../../../../drivers/win/include/Public.h" - -#pragma comment(lib, "cfgmgr32.lib") - -namespace hailort -{ - -static_assert(VDMA_CHANNELS_PER_ENGINE == MAX_VDMA_CHANNELS_PER_ENGINE, "Driver and libhailort parameters mismatch"); -static_assert(MAX_VDMA_ENGINES == MAX_VDMA_ENGINES_COUNT, "Driver and libhailort parameters mismatch"); -static_assert(MIN_D2H_CHANNEL_INDEX == VDMA_DEST_CHANNELS_START, "Driver and libhailort parameters mismatch"); - -//TODO HRT-7309: merge with posix -static hailo_dma_data_direction direction_to_dma_data_direction(HailoRTDriver::DmaDirection direction) { - switch (direction){ - case HailoRTDriver::DmaDirection::H2D: - return HAILO_DMA_TO_DEVICE; - case HailoRTDriver::DmaDirection::D2H: - return HAILO_DMA_FROM_DEVICE; - case HailoRTDriver::DmaDirection::BOTH: - return HAILO_DMA_BIDIRECTIONAL; - } - - assert(false); - // On release build Return value that will make ioctls to fail. - return HAILO_DMA_NONE; -} - -static enum hailo_cpu_id translate_cpu_id(hailo_cpu_id_t cpu_id) -{ - switch (cpu_id) - { - case HAILO_CPU_ID_0: - return HAILO_CPU_ID_CPU0; - case HAILO_CPU_ID_1: - return HAILO_CPU_ID_CPU1; - case HAILO_CPU_ID_MAX_ENUM: - // Add label for HAILO_CPU_ID_MAX_ENUM to cover all enum cases (avoid warnings). Continue to the assert. - break; - } - - assert(false); - // On release build Return value that will make ioctls to fail. - return HAILO_CPU_ID_NONE; -} - -static hailo_transfer_memory_type translate_memory_type(HailoRTDriver::MemoryType memory_type) -{ - using MemoryType = HailoRTDriver::MemoryType; - switch (memory_type) { - case MemoryType::DIRECT_MEMORY: - return HAILO_TRANSFER_DEVICE_DIRECT_MEMORY; - case MemoryType::VDMA0: - return HAILO_TRANSFER_MEMORY_VDMA0; - case MemoryType::VDMA1: - return HAILO_TRANSFER_MEMORY_VDMA1; - case MemoryType::VDMA2: - return HAILO_TRANSFER_MEMORY_VDMA2; - case MemoryType::PCIE_BAR0: - return HAILO_TRANSFER_MEMORY_PCIE_BAR0; - case MemoryType::PCIE_BAR2: - return HAILO_TRANSFER_MEMORY_PCIE_BAR2; - case MemoryType::PCIE_BAR4: - return HAILO_TRANSFER_MEMORY_PCIE_BAR4; - case MemoryType::DMA_ENGINE0: - return HAILO_TRANSFER_MEMORY_DMA_ENGINE0; - case MemoryType::DMA_ENGINE1: - return HAILO_TRANSFER_MEMORY_DMA_ENGINE1; - case MemoryType::DMA_ENGINE2: - return HAILO_TRANSFER_MEMORY_DMA_ENGINE2; - } - - assert(false); - return HAILO_TRANSFER_MEMORY_MAX_ENUM; -} - -class CWaitable -{ -public: - ULONG Wait(ULONG millies = INFINITE) - { - return WaitForSingleObject(m_Handle, millies); - } - ~CWaitable() - { - if (m_Handle) { - CloseHandle(m_Handle); - } - } -protected: - CWaitable(HANDLE h) : m_Handle(h) { } - HANDLE m_Handle; -}; - -class CMutex : public CWaitable -{ -public: - CMutex() : CWaitable(CreateMutex(NULL, false, NULL)) { } - void Release() - { - ReleaseMutex(m_Handle); - } -}; - -class CEvent : public CWaitable -{ -public: - CEvent(bool Manual) : CWaitable(CreateEvent(NULL, Manual, false, NULL)) { } -}; - -class COverlapped : public CEvent -{ -public: - COverlapped() : CEvent(true) - { - RtlZeroMemory(&m_Overlapped, sizeof(m_Overlapped)); - m_Overlapped.hEvent = m_Handle; - } - operator LPOVERLAPPED() { return &m_Overlapped; } -protected: - OVERLAPPED m_Overlapped; -}; - -template -class CSync -{ -public: - CSync(t& obj) : m_Obj(obj) { m_Obj.Wait(); } - ~CSync() { m_Obj.Release(); } -private: - t& m_Obj; -}; -using CMutexSync = CSync; - -class CDeviceFile -{ -public: - - CDeviceFile(const std::string& path) - { - Create(path.c_str(), true); - } - void Close() - { - CMutexSync sync(m_Mutex); - if (m_Handle) { - CloseHandle(m_Handle); - m_Handle = NULL; - } - } - ~CDeviceFile() - { - Unregister(); - Close(); - } - bool Present() const - { - return m_Handle; - } - HANDLE Detach() { - CMutexSync sync(m_Mutex); - HANDLE h = m_Handle; - m_Handle = NULL; - return h; - } -protected: - bool Notify() - { - if (m_Handle) { - LOGGER__ERROR("Closing the file {}", m_InterfaceName); - } - Close(); - return true; - } - void Create(LPCSTR Name, bool Writable) - { - ULONG access = GENERIC_READ, share = FILE_SHARE_READ; - if (Writable) { - access |= GENERIC_WRITE; - } - else { - share |= FILE_SHARE_WRITE; - } - m_Handle = CreateFileA( - Name, - access, - share, - NULL, - OPEN_EXISTING, - FILE_FLAG_OVERLAPPED, - NULL); - if (m_Handle == INVALID_HANDLE_VALUE) { - m_Handle = NULL; - LOGGER__ERROR("can't open '{}'", Name); - return; - } - - if (!m_SetNotify) { - return; - } - - CM_NOTIFY_FILTER filter; - filter.cbSize = sizeof(filter); - filter.Flags = 0; - filter.FilterType = CM_NOTIFY_FILTER_TYPE_DEVICEHANDLE; - filter.u.DeviceHandle.hTarget = m_Handle; - Unregister(); - CM_Register_Notification(&filter, this, []( - _In_ HCMNOTIFICATION, - _In_opt_ PVOID Context, - _In_ CM_NOTIFY_ACTION Action, - _In_reads_bytes_(EventDataSize) PCM_NOTIFY_EVENT_DATA, - _In_ DWORD) -> DWORD - { - CDeviceFile* f = (CDeviceFile*)Context; - if (Action == CM_NOTIFY_ACTION_DEVICEQUERYREMOVE) { - return f->Notify() ? ERROR_SUCCESS : ERROR_CANCELLED; - } - if (Action == CM_NOTIFY_ACTION_DEVICEREMOVECOMPLETE) { - f->Notify(); - } - return ERROR_SUCCESS; - }, - &m_Notification); - } - void Unregister() - { - if (m_Notification) { - CM_Unregister_Notification(m_Notification); - m_Notification = NULL; - } - } -private: - std::string m_InterfaceName; - HCMNOTIFICATION m_Notification = NULL; - CMutex m_Mutex; - bool m_SetNotify = false; - HANDLE m_Handle = NULL; -}; - -// TODO: HRT-7309 : implement hailo_ioctl for windows -static int ioctl(HANDLE h, ULONG val, tCompatibleHailoIoctlData *ioctl_data) -{ - ioctl_data->Parameters.u.value = val; - ULONG returned; - COverlapped overlapped; - bool res = DeviceIoControl(h, HAILO_IOCTL_COMPATIBLE, ioctl_data, sizeof(*ioctl_data), - ioctl_data, sizeof(*ioctl_data), &returned, overlapped); - if (!res) { - ULONG lastError = GetLastError(); - if (lastError != ERROR_IO_PENDING) { - errno = (int)lastError; - return -1; - } - if (!GetOverlappedResult(h, overlapped, &returned, true)) { - errno = (int)GetLastError(); - return -1; - } - } - return 0; -} - -// TODO: validate wraparounds for buffer/mapping handles in the driver (HRT-9509) -const uintptr_t HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE = INVALID_DRIVER_HANDLE_VALUE; -const size_t HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE = INVALID_DRIVER_HANDLE_VALUE; -const uint8_t HailoRTDriver::INVALID_VDMA_CHANNEL_INDEX = INVALID_VDMA_CHANNEL; - -static hailo_status validate_driver_version(const hailo_driver_info &driver_info) -{ - hailo_version_t library_version{}; - auto status = hailo_get_library_version(&library_version); - CHECK_SUCCESS(status); - CHECK((driver_info.major_version == library_version.major) && - (driver_info.minor_version == library_version.minor) && - (driver_info.revision_version == library_version.revision), HAILO_INVALID_DRIVER_VERSION, - "Driver version ({}.{}.{}) is different from library version ({}.{}.{})", - driver_info.major_version, driver_info.minor_version, driver_info.revision_version, - library_version.major, library_version.minor, library_version.revision); - return HAILO_SUCCESS; -} - -HailoRTDriver::HailoRTDriver(const DeviceInfo &device_info, FileDescriptor &&fd, hailo_status &status) : - m_fd(std::move(fd)), - m_device_info(device_info), - m_allocate_driver_buffer(false) -{ - tCompatibleHailoIoctlData data = {}; - hailo_driver_info& driver_info = data.Buffer.DriverInfo; - if (0 > ioctl(m_fd, HAILO_QUERY_DRIVER_INFO, &data)) { - LOGGER__ERROR("Failed to query driver info, errno {}", errno); - status = HAILO_DRIVER_FAIL; - return; - } - status = validate_driver_version(driver_info); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Driver version mismatch, status {}", status); - return; - } - - hailo_device_properties& device_properties = data.Buffer.DeviceProperties; - if (0 > ioctl(m_fd, HAILO_QUERY_DEVICE_PROPERTIES, &data)) { - LOGGER__ERROR("Failed query pcie device properties, errno {}", errno); - status = HAILO_DRIVER_FAIL; - return; - } - - m_desc_max_page_size = device_properties.desc_max_page_size; - m_dma_engines_count = device_properties.dma_engines_count; - - switch (device_properties.dma_type) { - case HAILO_DMA_TYPE_PCIE: - m_dma_type = DmaType::PCIE; - break; - case HAILO_DMA_TYPE_DRAM: - m_dma_type = DmaType::DRAM; - break; - default: - LOGGER__ERROR("Invalid dma type returned from ioctl {}", device_properties.dma_type); - status = HAILO_DRIVER_FAIL; - return; - } - - m_is_fw_loaded = device_properties.is_fw_loaded; - status = HAILO_SUCCESS; -} - -Expected> HailoRTDriver::scan_devices() -{ - auto device_names = list_devices(); - CHECK_EXPECTED(device_names, "Failed listing pcie devices"); - - std::vector devices_info; - for (const auto &device_name : device_names.value()) { - auto device_info = query_device_info(device_name); - CHECK_EXPECTED(device_info, "failed parsing device info for {}", device_name); - devices_info.push_back(device_info.release()); - } - return devices_info; -} - -Expected> HailoRTDriver::create(const DeviceInfo &device_info) -{ - CDeviceFile f(device_info.dev_path); - if (!f.Present()) { - LOGGER__ERROR("Failed to open board {}", device_info.dev_path); - return make_unexpected(HAILO_OPEN_FILE_FAILURE); - } - FileDescriptor fd(f.Detach()); - - hailo_status status = HAILO_UNINITIALIZED; - std::unique_ptr driver(new (std::nothrow) HailoRTDriver(device_info, std::move(fd), status)); - CHECK_NOT_NULL_AS_EXPECTED(driver, HAILO_OUT_OF_HOST_MEMORY); - CHECK_SUCCESS_AS_EXPECTED(status); - - return driver; -} - -Expected> HailoRTDriver::read_notification() -{ - tCompatibleHailoIoctlData data; - hailo_d2h_notification& notification_buffer = data.Buffer.D2HNotification; - - auto rc = ioctl(this->m_fd, HAILO_READ_NOTIFICATION, &data); - if (0 > rc) { - return make_unexpected(HAILO_DRIVER_FAIL); - } - - std::vector notification(notification_buffer.buffer_len); - memcpy(notification.data(), notification_buffer.buffer, notification_buffer.buffer_len); - return notification; -} - -hailo_status HailoRTDriver::disable_notifications() -{ - tCompatibleHailoIoctlData data = {}; - - int res = ioctl(m_fd, HAILO_DISABLE_NOTIFICATION, &data); - CHECK(0 <= res, HAILO_DRIVER_FAIL, "HAILO_DISABLE_NOTIFICATION failed with errno: {}", errno); - - return HAILO_SUCCESS; -} -hailo_status HailoRTDriver::read_memory(MemoryType memory_type, uint64_t address, void *buf, size_t size) -{ - if (size == 0) { - LOGGER__ERROR("Invalid size to read"); - return HAILO_INVALID_ARGUMENT; - } - - if (buf == nullptr) { - LOGGER__ERROR("Read buffer pointer is NULL"); - return HAILO_INVALID_ARGUMENT; - } - - constexpr uint32_t CHUNK_SIZE = ARRAY_ENTRIES(hailo_memory_transfer_params::buffer); - uint32_t offset = 0; - - while (offset < size) { - const uint32_t actual_size = std::min(CHUNK_SIZE, static_cast(size) - offset); - auto status = read_memory_ioctl(memory_type, address + offset, - reinterpret_cast(buf) + offset, actual_size); - CHECK_SUCCESS(status); - offset += actual_size; - } - return HAILO_SUCCESS; -} - -hailo_status HailoRTDriver::write_memory(MemoryType memory_type, uint64_t address, const void *buf, size_t size) -{ - if (size == 0) { - LOGGER__ERROR("Invalid size to read"); - return HAILO_INVALID_ARGUMENT; - } - - if (buf == nullptr) { - LOGGER__ERROR("Read buffer pointer is NULL"); - return HAILO_INVALID_ARGUMENT; - } - - constexpr uint32_t CHUNK_SIZE = ARRAY_ENTRIES(hailo_memory_transfer_params::buffer); - uint32_t offset = 0; - - while (offset < size) { - const uint32_t actual_size = std::min(CHUNK_SIZE, static_cast(size) - offset); - auto status = write_memory_ioctl(memory_type, address + offset, - reinterpret_cast(buf) + offset, actual_size); - CHECK_SUCCESS(status); - offset += actual_size; - } - return HAILO_SUCCESS; -} - -hailo_status HailoRTDriver::read_memory_ioctl(MemoryType memory_type, uint64_t address, void *buf, size_t size) -{ - if (size == 0) { - LOGGER__ERROR("Invalid size to read"); - return HAILO_INVALID_ARGUMENT; - } - - if (buf == nullptr) { - LOGGER__ERROR("Read buffer pointer is NULL"); - return HAILO_INVALID_ARGUMENT; - } - - if (m_dma_type == DmaType::PCIE) { - CHECK(address < std::numeric_limits::max(), HAILO_INVALID_ARGUMENT, "Address out of range {}", address); - } - - tCompatibleHailoIoctlData data = {}; - hailo_memory_transfer_params& transfer = data.Buffer.MemoryTransfer; - transfer.transfer_direction = TRANSFER_READ; - transfer.memory_type = translate_memory_type(memory_type); - transfer.address = address; - transfer.count = size; - memset(transfer.buffer, 0, sizeof(transfer.buffer)); - - CHECK(size <= sizeof(transfer.buffer), HAILO_INVALID_ARGUMENT, - "Invalid size to read, size given {} is larger than max size {}", size, sizeof(transfer.buffer)); - - if (0 > ioctl(m_fd, HAILO_MEMORY_TRANSFER, &data)) { - LOGGER__ERROR("HailoRTDriver::read_memory failed with errno:{}", errno); - return HAILO_DRIVER_FAIL; - } - - memcpy(buf, transfer.buffer, transfer.count); - - return HAILO_SUCCESS; -} - -hailo_status HailoRTDriver::write_memory_ioctl(MemoryType memory_type, uint64_t address, const void *buf, size_t size) -{ - if (size == 0) { - LOGGER__ERROR("Invalid size to write"); - return HAILO_INVALID_ARGUMENT; - } - - if (buf == nullptr) { - LOGGER__ERROR("Write buffer pointer is NULL"); - return HAILO_INVALID_ARGUMENT; - } - - if (m_dma_type == DmaType::PCIE) { - CHECK(address < std::numeric_limits::max(), HAILO_INVALID_ARGUMENT, "Address out of range {}", address); - } - - tCompatibleHailoIoctlData data = {}; - hailo_memory_transfer_params& transfer = data.Buffer.MemoryTransfer; - transfer.transfer_direction = TRANSFER_WRITE; - transfer.memory_type = translate_memory_type(memory_type); - transfer.address = address; - transfer.count = size; - memset(transfer.buffer, 0, sizeof(transfer.buffer)); - - CHECK(size <= sizeof(transfer.buffer), HAILO_INVALID_ARGUMENT, - "Invalid size to write, size given {} is larger than max size {}", size, sizeof(transfer.buffer)); - - memcpy(transfer.buffer, buf, transfer.count); - - if (0 > ioctl(this->m_fd, HAILO_MEMORY_TRANSFER, &data)) { - LOGGER__ERROR("HailoRTDriver::write_memory failed with errno: {}", errno); - return HAILO_DRIVER_FAIL; - } - - return HAILO_SUCCESS; -} - -Expected HailoRTDriver::read_vdma_channel_register(vdma::ChannelId channel_id, DmaDirection data_direction, - size_t offset, size_t reg_size) -{ - CHECK_AS_EXPECTED(is_valid_channel_id(channel_id), HAILO_INVALID_ARGUMENT, "Invalid channel id {} given", channel_id); - CHECK_AS_EXPECTED(data_direction != DmaDirection::BOTH, HAILO_INVALID_ARGUMENT, "Invalid direction given"); - - tCompatibleHailoIoctlData data = {}; - auto& params = data.Buffer.ChannelRegisterRead; - params.engine_index = channel_id.engine_index; - params.channel_index = channel_id.channel_index; - params.direction = direction_to_dma_data_direction(data_direction); - params.offset = offset; - params.reg_size = reg_size; - params.data = 0; - - if (0 > ioctl(this->m_fd, HAILO_VDMA_CHANNEL_READ_REGISTER, &data)) { - LOGGER__ERROR("HailoRTDriver::read_vdma_channel_register failed with errno: {}", errno); - return make_unexpected(HAILO_DRIVER_FAIL); - } - - return std::move(params.data); -} - -hailo_status HailoRTDriver::write_vdma_channel_register(vdma::ChannelId channel_id, DmaDirection data_direction, - size_t offset, size_t reg_size, uint32_t value) -{ - CHECK(is_valid_channel_id(channel_id), HAILO_INVALID_ARGUMENT, "Invalid channel id {} given", channel_id); - CHECK(data_direction != DmaDirection::BOTH, HAILO_INVALID_ARGUMENT, "Invalid direction given"); - - tCompatibleHailoIoctlData data = {}; - auto& params = data.Buffer.ChannelRegisterWrite; - params.engine_index = channel_id.engine_index; - params.channel_index = channel_id.channel_index; - params.direction = direction_to_dma_data_direction(data_direction); - params.offset = offset; - params.reg_size = reg_size; - params.data = value; - - if (0 > ioctl(this->m_fd, HAILO_VDMA_CHANNEL_WRITE_REGISTER, &data)) { - LOGGER__ERROR("HailoRTDriver::write_vdma_channel_register failed with errno: {}", errno); - return HAILO_DRIVER_FAIL; - } - - return HAILO_SUCCESS; -} - -hailo_status HailoRTDriver::vdma_buffer_sync(VdmaBufferHandle handle, DmaSyncDirection sync_direction, - size_t offset, size_t count) -{ - tCompatibleHailoIoctlData data = {}; - hailo_vdma_buffer_sync_params& sync_info = data.Buffer.VdmaBufferSync; - sync_info.handle = handle; - sync_info.sync_type = (sync_direction == DmaSyncDirection::TO_HOST) ? HAILO_SYNC_FOR_CPU : HAILO_SYNC_FOR_DEVICE; - sync_info.offset = offset; - sync_info.count = count; - if (0 > ioctl(this->m_fd, HAILO_VDMA_BUFFER_SYNC, &data)) { - LOGGER__ERROR("HAILO_VDMA_BUFFER_SYNC failed with errno: {}", errno); - return HAILO_DRIVER_FAIL; - } - return HAILO_SUCCESS; -} - - -hailo_status HailoRTDriver::vdma_interrupts_enable(const ChannelsBitmap &channels_bitmap, bool enable_timestamps_measure) -{ - CHECK(is_valid_channels_bitmap(channels_bitmap), HAILO_INVALID_ARGUMENT, "Invalid channel bitmap given"); - tCompatibleHailoIoctlData data = {}; - hailo_vdma_interrupts_enable_params& params = data.Buffer.VdmaInterruptsEnable; - std::copy(channels_bitmap.begin(), channels_bitmap.end(), params.channels_bitmap_per_engine); - params.enable_timestamps_measure = enable_timestamps_measure; - - CHECK(ioctl(this->m_fd, HAILO_VDMA_INTERRUPTS_ENABLE, &data) >= 0, HAILO_DRIVER_FAIL, - "Failed to enable vdma interrupts with errno:{}", errno); - - return HAILO_SUCCESS; -} - -hailo_status HailoRTDriver::vdma_interrupts_disable(const ChannelsBitmap &channels_bitmap) -{ - CHECK(is_valid_channels_bitmap(channels_bitmap), HAILO_INVALID_ARGUMENT, "Invalid channel bitmap given"); - tCompatibleHailoIoctlData data = {}; - hailo_vdma_interrupts_disable_params& params = data.Buffer.VdmaInterruptsDisable; - std::copy(channels_bitmap.begin(), channels_bitmap.end(), params.channels_bitmap_per_engine); - - - if (0 > ioctl(this->m_fd, HAILO_VDMA_INTERRUPTS_DISABLE, &data)) { - LOGGER__ERROR("Failed to disable vdma interrupts with errno: {}", errno); - return HAILO_DRIVER_FAIL; - } - - return HAILO_SUCCESS; -} - -// TODO: HRT-7309 - unite with posix -static Expected create_interrupt_timestamp_list( - hailo_vdma_interrupts_read_timestamp_params &inter_data) -{ - CHECK_AS_EXPECTED(inter_data.timestamps_count <= MAX_IRQ_TIMESTAMPS_SIZE, HAILO_DRIVER_FAIL, - "Invalid channel interrupts timestamps count returned {}", inter_data.timestamps_count); - ChannelInterruptTimestampList timestamp_list{}; - - timestamp_list.count = inter_data.timestamps_count; - for (size_t i = 0; i < timestamp_list.count; i++) { - timestamp_list.timestamp_list[i].timestamp = std::chrono::nanoseconds(inter_data.timestamps[i].timestamp_ns); - timestamp_list.timestamp_list[i].desc_num_processed = inter_data.timestamps[i].desc_num_processed; - } - return timestamp_list; -} - -static Expected to_irq_data(const hailo_vdma_interrupts_wait_params& params, - uint8_t engines_count) -{ - static_assert(ARRAY_ENTRIES(IrqData::channels_irq_data) == ARRAY_ENTRIES(params.irq_data), "Mismatch irq data size"); - CHECK_AS_EXPECTED(params.channels_count <= ARRAY_ENTRIES(params.irq_data), HAILO_DRIVER_FAIL, - "Invalid channels count returned from vdma_interrupts_wait"); - - IrqData irq{}; - irq.channels_count = params.channels_count; - for (uint8_t i = 0; i < params.channels_count; i++) { - const auto engine_index = params.irq_data[i].engine_index; - const auto channel_index = params.irq_data[i].channel_index; - CHECK_AS_EXPECTED(engine_index < engines_count, HAILO_DRIVER_FAIL, - "Invalid engine index {} returned from vdma_interrupts_wait, max {}", engine_index, engines_count); - CHECK_AS_EXPECTED(channel_index < MAX_VDMA_CHANNELS_PER_ENGINE, HAILO_DRIVER_FAIL, - "Invalid channel_index index {} returned from vdma_interrupts_wait", channel_index); - - irq.channels_irq_data[i].channel_id.engine_index = engine_index; - irq.channels_irq_data[i].channel_id.channel_index = channel_index; - irq.channels_irq_data[i].is_active = params.irq_data[i].is_active; - irq.channels_irq_data[i].desc_num_processed = params.irq_data[i].host_num_processed; - irq.channels_irq_data[i].host_error = params.irq_data[i].host_error; - irq.channels_irq_data[i].device_error = params.irq_data[i].device_error; - } - return irq; -} - -Expected HailoRTDriver::vdma_interrupts_wait(const ChannelsBitmap &channels_bitmap) -{ - CHECK_AS_EXPECTED(is_valid_channels_bitmap(channels_bitmap), HAILO_INVALID_ARGUMENT, "Invalid channel bitmap given"); - tCompatibleHailoIoctlData data = {}; - hailo_vdma_interrupts_wait_params& params = data.Buffer.VdmaInterruptsWait; - std::copy(channels_bitmap.begin(), channels_bitmap.end(), params.channels_bitmap_per_engine); - - if (0 > ioctl(this->m_fd, HAILO_VDMA_INTERRUPTS_WAIT, &data)) { - LOGGER__ERROR("Failed to wait interrupts for channels bitmap with errno: {}", errno); - return make_unexpected(HAILO_DRIVER_FAIL); - } - - return to_irq_data(params, static_cast(m_dma_engines_count)); -} - -Expected HailoRTDriver::vdma_interrupts_read_timestamps(vdma::ChannelId channel_id) -{ - tCompatibleHailoIoctlData data = {}; - hailo_vdma_interrupts_read_timestamp_params ¶ms = data.Buffer.VdmaInterruptsReadTimestamps; - params.engine_index = channel_id.engine_index; - params.channel_index = channel_id.channel_index; - - if (0 > ioctl(this->m_fd, HAILO_VDMA_INTERRUPTS_READ_TIMESTAMPS, &data)) { - LOGGER__ERROR("Failed to read channel interrupts timestamps errno: {}", errno); - return make_unexpected(HAILO_DRIVER_FAIL); - } - - return create_interrupt_timestamp_list(params); -} - -hailo_status HailoRTDriver::fw_control(const void *request, size_t request_len, const uint8_t request_md5[PCIE_EXPECTED_MD5_LENGTH], - void *response, size_t *response_len, uint8_t response_md5[PCIE_EXPECTED_MD5_LENGTH], - std::chrono::milliseconds timeout, hailo_cpu_id_t cpu_id) -{ - CHECK_ARG_NOT_NULL(request); - CHECK_ARG_NOT_NULL(response); - CHECK_ARG_NOT_NULL(response_len); - CHECK(timeout.count() >= 0, HAILO_INVALID_ARGUMENT); - - tCompatibleHailoIoctlData data = {}; - hailo_fw_control& command = data.Buffer.FirmwareControl; - static_assert(PCIE_EXPECTED_MD5_LENGTH == sizeof(command.expected_md5), "mismatch md5 size"); - memcpy(&command.expected_md5, request_md5, sizeof(command.expected_md5)); - command.buffer_len = static_cast(request_len); - CHECK(request_len <= sizeof(command.buffer), HAILO_INVALID_ARGUMENT, - "FW control request len can't be larger than {} (size given {})", sizeof(command.buffer), request_len); - memcpy(&command.buffer, request, request_len); - command.timeout_ms = static_cast(timeout.count()); - command.cpu_id = translate_cpu_id(cpu_id); - - if (0 > ioctl(this->m_fd, HAILO_FW_CONTROL, &data)) { - LOGGER__ERROR("HAILO_FW_CONTROL failed with errno: {}", errno); - return HAILO_FW_CONTROL_FAILURE; - } - - if (*response_len < command.buffer_len) { - LOGGER__ERROR("FW control response len needs to be at least {} (size given {})", command.buffer_len, *response_len); - *response_len = command.buffer_len; - return HAILO_INSUFFICIENT_BUFFER; - } - memcpy(response, command.buffer, command.buffer_len); - *response_len = command.buffer_len; - memcpy(response_md5, command.expected_md5, PCIE_EXPECTED_MD5_LENGTH); - - return HAILO_SUCCESS; -} - -hailo_status read_log(uint8_t *buffer, size_t buffer_size, size_t *read_bytes, hailo_cpu_id_t cpu_id) -{ - (void)buffer; - (void)buffer_size; - (void)read_bytes; - (void)cpu_id; - return HAILO_PCIE_NOT_SUPPORTED_ON_PLATFORM; -} - -Expected HailoRTDriver::vdma_buffer_map(void *user_address, size_t required_size, DmaDirection data_direction, - const vdma_mapped_buffer_driver_identifier &driver_buff_handle) -{ - tCompatibleHailoIoctlData data = {}; - hailo_vdma_buffer_map_params& map_user_buffer_info = data.Buffer.VdmaBufferMap; - map_user_buffer_info.user_address = user_address; - map_user_buffer_info.size = required_size; - map_user_buffer_info.data_direction = direction_to_dma_data_direction(data_direction); - map_user_buffer_info.allocated_buffer_handle = driver_buff_handle; - map_user_buffer_info.mapped_handle = 0; - - if (0 > ioctl(this->m_fd, HAILO_VDMA_BUFFER_MAP, &data)) { - LOGGER__ERROR("Failed to map user buffer with errno: {}", errno); - return make_unexpected(HAILO_DRIVER_FAIL); - } - - return std::move(map_user_buffer_info.mapped_handle); -} - -hailo_status HailoRTDriver::vdma_buffer_unmap(VdmaBufferHandle handle) -{ - tCompatibleHailoIoctlData data = {}; - hailo_vdma_buffer_unmap_params& unmap_user_buffer_info = data.Buffer.VdmaBufferUnmap; - unmap_user_buffer_info.mapped_handle = handle; - if (0 > ioctl(this->m_fd, HAILO_VDMA_BUFFER_UNMAP, &data)) { - LOGGER__ERROR("Failed to unmap user buffer with errno: {}", errno); - return HAILO_DRIVER_FAIL; - } - - return HAILO_SUCCESS; -} - -Expected HailoRTDriver::descriptors_list_create(size_t desc_count, bool is_circular) -{ - auto handle_to_dma_address_pair = descriptors_list_create_ioctl(desc_count, is_circular); - CHECK_EXPECTED(handle_to_dma_address_pair); - - const auto desc_handle = handle_to_dma_address_pair->first; - const auto dma_address = handle_to_dma_address_pair->second; - - auto user_address = descriptors_list_create_mmap(desc_handle, desc_count); - if (!user_address) { - auto status = descriptors_list_release_ioctl(desc_handle); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed releasing descriptors list, status {}", status); - // continue - } - return make_unexpected(user_address.status()); - } - - return DescriptorsListInfo{desc_handle, dma_address, desc_count, user_address.release()}; -} - -hailo_status HailoRTDriver::descriptors_list_release(const DescriptorsListInfo &descriptors_list_info) -{ - hailo_status status = HAILO_SUCCESS; - - auto unmap_status = descriptors_list_create_munmap(descriptors_list_info.user_address, descriptors_list_info.desc_count); - if (HAILO_SUCCESS != unmap_status) { - LOGGER__ERROR("Descriptors list unmap failed with {}", unmap_status); - status = unmap_status; - // continue - } - - auto release_status = descriptors_list_release_ioctl(descriptors_list_info.handle); - if (HAILO_SUCCESS != release_status) { - LOGGER__ERROR("Descriptors list release status failed with {}", release_status); - status = release_status; - // continue - } - - return status; -} - -Expected> HailoRTDriver::descriptors_list_create_ioctl(size_t desc_count, bool is_circular) -{ - tCompatibleHailoIoctlData data = {}; - hailo_desc_list_create_params& create_desc_info = data.Buffer.DescListCreate; - create_desc_info.desc_count = desc_count; - create_desc_info.is_circular = is_circular; - - if (0 > ioctl(this->m_fd, HAILO_DESC_LIST_CREATE, &data)) { - LOGGER__ERROR("Failed to create descriptors list with errno: {}", errno); - return make_unexpected(HAILO_DRIVER_FAIL); - } - - return std::move(std::make_pair(create_desc_info.desc_handle, create_desc_info.dma_address)); -} - -hailo_status HailoRTDriver::descriptors_list_release_ioctl(uintptr_t desc_handle) -{ - tCompatibleHailoIoctlData data = {}; - uintptr_t& release_desc_info = data.Buffer.DescListReleaseParam; - release_desc_info = desc_handle; - if (0 > ioctl(this->m_fd, HAILO_DESC_LIST_RELEASE, &data)) { - LOGGER__ERROR("Failed to release descriptors list with errno: {}", errno); - return HAILO_DRIVER_FAIL; - } - - return HAILO_SUCCESS; -} - -Expected HailoRTDriver::descriptors_list_create_mmap(uintptr_t desc_handle, size_t desc_count) -{ - tCompatibleHailoIoctlData data = {}; - data.Buffer.DescListMmap.desc_handle = desc_handle; - data.Buffer.DescListMmap.size = desc_count * SIZE_OF_SINGLE_DESCRIPTOR; - if (0 > ioctl(m_fd, HAILO_NON_LINUX_DESC_LIST_MMAP, &data)) { - LOGGER__ERROR("Failed to map physical memory with errno: {}", errno); - return make_unexpected(HAILO_DRIVER_FAIL); - } - - void *user_address = data.Buffer.DescListMmap.user_address; - return user_address; -} - -hailo_status HailoRTDriver::descriptors_list_create_munmap(void *, size_t ) -{ - // On windows, the unmap is done on the release ioctl - return HAILO_SUCCESS; -} - -hailo_status HailoRTDriver::descriptors_list_bind_vdma_buffer(uintptr_t desc_handle, VdmaBufferHandle buffer_handle, - uint16_t desc_page_size, uint8_t channel_index, uint32_t starting_desc) -{ - tCompatibleHailoIoctlData data = {}; - hailo_desc_list_bind_vdma_buffer_params& config_info = data.Buffer.DescListBind; - config_info.buffer_handle = buffer_handle; - config_info.desc_handle = desc_handle; - config_info.desc_page_size = desc_page_size; - config_info.channel_index = channel_index; - config_info.starting_desc = starting_desc; - - if (0 > ioctl(this->m_fd, HAILO_DESC_LIST_BIND_VDMA_BUFFER, &data)) { - LOGGER__ERROR("Failed to bind vdma buffer to descriptors list with errno: {}", errno); - return HAILO_DRIVER_FAIL; - } - - return HAILO_SUCCESS; -} - -hailo_status HailoRTDriver::read_log(uint8_t *buffer, size_t buffer_size, size_t *read_bytes, hailo_cpu_id_t cpu_id) -{ - tCompatibleHailoIoctlData data = {}; - hailo_read_log_params& params = data.Buffer.ReadLog; - params.buffer_size = __min(buffer_size, sizeof(params.buffer)); - params.cpu_id = translate_cpu_id(cpu_id); - - CHECK_ARG_NOT_NULL(buffer); - CHECK_ARG_NOT_NULL(read_bytes); - - if (0 > ioctl(this->m_fd, HAILO_READ_LOG, &data)) { - LOGGER__ERROR("Failed to read log with errno:{}", errno); - return HAILO_DRIVER_FAIL; - } - - CHECK(params.read_bytes <= sizeof(params.buffer), HAILO_DRIVER_FAIL, - "Amount of bytes read from log {} is bigger than size of buffer {}", - params.read_bytes, sizeof(params.buffer)); - - memcpy(buffer, params.buffer, params.read_bytes); - *read_bytes = params.read_bytes; - - return HAILO_SUCCESS; -} - -hailo_status HailoRTDriver::reset_nn_core() -{ - LOGGER__ERROR("Reset nn core is not supported over the windows driver"); - return HAILO_NOT_IMPLEMENTED; -} - -Expected HailoRTDriver::vdma_low_memory_buffer_alloc(size_t size) { - (void) size; - return make_unexpected(HAILO_INVALID_OPERATION); -} - - -hailo_status HailoRTDriver::vdma_low_memory_buffer_free(uintptr_t buffer_handle) { - (void) buffer_handle; - return HAILO_INVALID_OPERATION; -} - -Expected HailoRTDriver::vdma_continuous_buffer_alloc(size_t size) -{ - (void) size; - return make_unexpected(HAILO_INVALID_OPERATION); -} - -hailo_status HailoRTDriver::vdma_continuous_buffer_free(const ContinousBufferInfo &buffer_info) -{ - (void) buffer_info; - return HAILO_INVALID_OPERATION; -} - -hailo_status HailoRTDriver::mark_as_used() -{ - tCompatibleHailoIoctlData data = {}; - if (0 > ioctl(this->m_fd, HAILO_MARK_AS_IN_USE, &data)) { - LOGGER__ERROR("Failed to mark device as in use with errno: {}", errno); - return HAILO_DRIVER_FAIL; - } - if (data.Buffer.MarkAsInUse.in_use) { - return HAILO_DEVICE_IN_USE; - } - return HAILO_SUCCESS; -} - -// TODO: HRT-7309 merge with posix -bool HailoRTDriver::is_valid_channel_id(const vdma::ChannelId &channel_id) -{ - return (channel_id.engine_index < m_dma_engines_count) && (channel_id.channel_index < MAX_VDMA_CHANNELS_PER_ENGINE); -} - -} /* namespace hailort */ diff --git a/hailort/libhailort/src/service/hailort_rpc_client.cpp b/hailort/libhailort/src/service/hailort_rpc_client.cpp index 91db64d4..85acf5fa 100644 --- a/hailort/libhailort/src/service/hailort_rpc_client.cpp +++ b/hailort/libhailort/src/service/hailort_rpc_client.cpp @@ -11,14 +11,14 @@ #include "hef/hef_internal.hpp" #include "hailort_rpc_client.hpp" -#include "net_flow/ops/yolov8_post_process.hpp" -#include "net_flow/ops/yolox_post_process.hpp" -#include "net_flow/ops/ssd_post_process.hpp" -#include "net_flow/ops/softmax_post_process.hpp" -#include "net_flow/ops/argmax_post_process.hpp" -#include "net_flow/ops/nms_post_process.hpp" -#include "net_flow/ops/yolov5_op_metadata.hpp" -#include "net_flow/ops/yolov5_seg_op_metadata.hpp" +#include "net_flow/ops_metadata/yolov8_op_metadata.hpp" +#include "net_flow/ops_metadata/yolox_op_metadata.hpp" +#include "net_flow/ops_metadata/ssd_op_metadata.hpp" +#include "net_flow/ops_metadata/softmax_op_metadata.hpp" +#include "net_flow/ops_metadata/argmax_op_metadata.hpp" +#include "net_flow/ops_metadata/nms_op_metadata.hpp" +#include "net_flow/ops_metadata/yolov5_op_metadata.hpp" +#include "net_flow/ops_metadata/yolov5_seg_op_metadata.hpp" #include @@ -82,7 +82,12 @@ hailo_status HailoRtRpcClient::VDevice_release(const VDeviceIdentifier &identifi request.set_pid(pid); Release_Reply reply; - ClientContextWithTimeout context; + // Note: In multiple devices app and multiple networks, there are many mapped buffers for each device. + // Theerefore, the release of the devices might take a longer time to finished un-mapping all the buffers, + // so we increase the timeout for the VDevice_release context. + // TODO: HRT-13274 + const std::chrono::minutes release_timeout(2); + ClientContextWithTimeout context(release_timeout); grpc::Status status = m_stub->VDevice_release(&context, request, &reply); CHECK_GRPC_STATUS(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -803,7 +808,7 @@ Expected create_yolov5seg_post_pr { auto yolov5seg_config_proto = op_metadata_proto.yolov5seg_config(); hailort::net_flow::YoloV5SegPostProcessConfig yolov5seg_post_process_config = {yolov5seg_config_proto.mask_threshold(), - yolov5seg_config_proto.layer_name()}; + yolov5seg_config_proto.max_accumulated_mask_size(), yolov5seg_config_proto.layer_name()}; return yolov5seg_post_process_config; } @@ -1051,7 +1056,7 @@ hailo_vstream_info_t deserialize_vstream_info(const ProtoVStreamInfo &info_proto hailo_nms_shape_t nms_shape = { info_proto.nms_shape().number_of_classes(), info_proto.nms_shape().max_bbox_per_class(), - info_proto.nms_shape().max_mask_size() + info_proto.nms_shape().max_accumulated_mask_size() }; info.nms_shape = nms_shape; } else { @@ -1395,6 +1400,23 @@ hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_set_nms_max_bboxes_per_cla return static_cast(reply.status()); } +hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(const NetworkGroupIdentifier &identifier, + const std::string &edge_name, uint32_t max_accumulated_mask_size) +{ + ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Request request; + auto proto_identifier = request.mutable_identifier(); + ConfiguredNetworkGroup_convert_identifier_to_proto(identifier, proto_identifier); + request.set_edge_name(edge_name); + request.set_max_accumulated_mask_size(max_accumulated_mask_size); + + ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Reply reply; + ClientContextWithTimeout context; + grpc::Status status = m_stub->ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(&context, request, &reply); + CHECK_GRPC_STATUS(status); + assert(reply.status() < HAILO_STATUS_COUNT); + return static_cast(reply.status()); +} + Expected> HailoRtRpcClient::ConfiguredNetworkGroup_get_stream_names_from_vstream_name(const NetworkGroupIdentifier &identifier, const std::string &vstream_name) { @@ -1454,7 +1476,6 @@ hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_infer_async(const NetworkG } else { proto_transfer_request.set_direction(HAILO_D2H_STREAM); } - proto_transfer_request.set_size(static_cast(std::get<2>(idx_named_buffer).size())); proto_transfer_buffers->Add(std::move(proto_transfer_request)); } request.set_infer_request_done_cb_idx(infer_request_done_cb); @@ -1462,7 +1483,7 @@ hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_infer_async(const NetworkG ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_infer_async(&context, request, &reply); assert(reply.status() < HAILO_STATUS_COUNT); - if (reply.status() == HAILO_STREAM_ABORTED_BY_USER) { + if (reply.status() == HAILO_STREAM_ABORT) { return static_cast(reply.status()); } CHECK_GRPC_STATUS(status); @@ -1488,6 +1509,8 @@ Expected HailoRtRpcClient::InputVStream_is_multi_planar(const VStreamIdent hailo_status HailoRtRpcClient::InputVStream_write(const VStreamIdentifier &identifier, const hailo_pix_buffer_t &buffer) { + CHECK(HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR == buffer.memory_type, HAILO_NOT_SUPPORTED, "Memory type of pix buffer must be of type USERPTR!"); + InputVStream_write_pix_Request request; auto proto_identifier = request.mutable_identifier(); VStream_convert_identifier_to_proto(identifier, proto_identifier); @@ -1502,7 +1525,7 @@ hailo_status HailoRtRpcClient::InputVStream_write(const VStreamIdentifier &ident grpc::Status status = m_stub->InputVStream_write_pix(&context, request, &reply); CHECK_GRPC_STATUS(status); assert(reply.status() < HAILO_STATUS_COUNT); - if (reply.status() == HAILO_STREAM_ABORTED_BY_USER) { + if (reply.status() == HAILO_STREAM_ABORT) { return static_cast(reply.status()); } CHECK_SUCCESS(static_cast(reply.status())); @@ -1521,7 +1544,7 @@ hailo_status HailoRtRpcClient::InputVStream_write(const VStreamIdentifier &ident grpc::Status status = m_stub->InputVStream_write(&context, request, &reply); CHECK_GRPC_STATUS(status); assert(reply.status() < HAILO_STATUS_COUNT); - if (reply.status() == HAILO_STREAM_ABORTED_BY_USER) { + if (reply.status() == HAILO_STREAM_ABORT) { return static_cast(reply.status()); } CHECK_SUCCESS(static_cast(reply.status())); @@ -1540,7 +1563,7 @@ hailo_status HailoRtRpcClient::OutputVStream_read(const VStreamIdentifier &ident grpc::Status status = m_stub->OutputVStream_read(&context, request, &reply); CHECK_GRPC_STATUS(status); assert(reply.status() < HAILO_STATUS_COUNT); - if (reply.status() == HAILO_STREAM_ABORTED_BY_USER) { + if (reply.status() == HAILO_STREAM_ABORT) { return static_cast(reply.status()); } CHECK_SUCCESS(static_cast(reply.status())); @@ -1921,6 +1944,21 @@ hailo_status HailoRtRpcClient::OutputVStream_set_nms_max_proposals_per_class(con return static_cast(reply.status()); } +hailo_status HailoRtRpcClient::OutputVStream_set_nms_max_accumulated_mask_size(const VStreamIdentifier &identifier, uint32_t max_accumulated_mask_size) +{ + VStream_set_nms_max_accumulated_mask_size_Request request; + auto proto_identifier = request.mutable_identifier(); + VStream_convert_identifier_to_proto(identifier, proto_identifier); + request.set_max_accumulated_mask_size(max_accumulated_mask_size); + + ClientContextWithTimeout context; + VStream_set_nms_max_accumulated_mask_size_Reply reply; + grpc::Status status = m_stub->OutputVStream_set_nms_max_accumulated_mask_size(&context, request, &reply); + CHECK_GRPC_STATUS(status); + assert(reply.status() < HAILO_STATUS_COUNT); + return static_cast(reply.status()); +} + void HailoRtRpcClient::VDevice_convert_identifier_to_proto(const VDeviceIdentifier &identifier, ProtoVDeviceIdentifier *proto_identifier) { proto_identifier->set_vdevice_handle(identifier.m_vdevice_handle); diff --git a/hailort/libhailort/src/service/hailort_rpc_client.hpp b/hailort/libhailort/src/service/hailort_rpc_client.hpp index 7be961d5..2f83319d 100644 --- a/hailort/libhailort/src/service/hailort_rpc_client.hpp +++ b/hailort/libhailort/src/service/hailort_rpc_client.hpp @@ -40,9 +40,9 @@ using callback_idx_t = uint32_t; class ClientContextWithTimeout : public grpc::ClientContext { public: - ClientContextWithTimeout() + ClientContextWithTimeout(const std::chrono::milliseconds context_timeout = CONTEXT_TIMEOUT) { - set_deadline(std::chrono::system_clock::now() + CONTEXT_TIMEOUT); + set_deadline(std::chrono::system_clock::now() + context_timeout); } }; @@ -98,6 +98,7 @@ class HailoRtRpcClient final { hailo_status ConfiguredNetworkGroup_set_nms_score_threshold(const NetworkGroupIdentifier &identifier, const std::string &edge_name, float32_t nms_score_th); hailo_status ConfiguredNetworkGroup_set_nms_iou_threshold(const NetworkGroupIdentifier &identifier, const std::string &edge_name, float32_t iou_th); hailo_status ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(const NetworkGroupIdentifier &identifier, const std::string &edge_name, uint32_t max_bboxes); + hailo_status ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(const NetworkGroupIdentifier &identifier, const std::string &edge_name, uint32_t max_accumulated_mask_size); Expected> ConfiguredNetworkGroup_get_stream_names_from_vstream_name(const NetworkGroupIdentifier &identifier, const std::string &vstream_name); Expected> ConfiguredNetworkGroup_get_vstream_names_from_stream_name(const NetworkGroupIdentifier &identifier, const std::string &stream_name); hailo_status ConfiguredNetworkGroup_infer_async(const NetworkGroupIdentifier &identifier, @@ -150,6 +151,7 @@ class HailoRtRpcClient final { hailo_status OutputVStream_set_nms_score_threshold(const VStreamIdentifier &identifier, float32_t threshold); hailo_status OutputVStream_set_nms_iou_threshold(const VStreamIdentifier &identifier, float32_t threshold); hailo_status OutputVStream_set_nms_max_proposals_per_class(const VStreamIdentifier &identifier, uint32_t max_proposals_per_class); + hailo_status OutputVStream_set_nms_max_accumulated_mask_size(const VStreamIdentifier &identifier, uint32_t max_accumulated_mask_size); private: void VDevice_convert_identifier_to_proto(const VDeviceIdentifier &identifier, ProtoVDeviceIdentifier *proto_identifier); diff --git a/hailort/libhailort/src/service/network_group_client.cpp b/hailort/libhailort/src/service/network_group_client.cpp index 89b32ecd..06c9dbd2 100644 --- a/hailort/libhailort/src/service/network_group_client.cpp +++ b/hailort/libhailort/src/service/network_group_client.cpp @@ -14,7 +14,7 @@ #include "common/os_utils.hpp" #include "network_group/network_group_internal.hpp" -#include "net_flow/pipeline/vstream_internal.hpp" +#include "net_flow/pipeline/vstream_builder.hpp" #include "net_flow/ops/nms_post_process.hpp" #include "rpc_client_utils.hpp" @@ -78,6 +78,11 @@ ConfiguredNetworkGroupClient::~ConfiguredNetworkGroupClient() if (reply != HAILO_SUCCESS) { LOGGER__CRITICAL("ConfiguredNetworkGroup_release failed with status: {}", reply); } + execute_callbacks_on_error(HAILO_INTERNAL_FAILURE); // At this point there should'nt be any callbacks left. if there are any, raise HAILO_INTERNAL_FAILURE + auto status = wait_for_ongoing_callbacks_count_under(1); + if (HAILO_SUCCESS != status) { + LOGGER__CRITICAL("Failed to wait for callbacks to finish"); + } } hailo_status ConfiguredNetworkGroupClient::before_fork() @@ -212,7 +217,12 @@ hailo_status ConfiguredNetworkGroupClient::wait_for_activation(const std::chrono hailo_status ConfiguredNetworkGroupClient::shutdown() { - return m_client->ConfiguredNetworkGroup_shutdown(m_identifier); + auto status = m_client->ConfiguredNetworkGroup_shutdown(m_identifier); + CHECK_SUCCESS(status, "Failed to shutdown"); + status = wait_for_ongoing_callbacks_count_under(1); + CHECK_SUCCESS(status, "Failed to wait for callbacks to finish"); + + return status; } Expected>> ConfiguredNetworkGroupClient::get_output_vstream_groups() @@ -413,6 +423,11 @@ hailo_status ConfiguredNetworkGroupClient::set_nms_max_bboxes_per_class(const st return m_client->ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(m_identifier, edge_name, max_bboxes_per_class); } +hailo_status ConfiguredNetworkGroupClient::set_nms_max_accumulated_mask_size(const std::string &edge_name, uint32_t max_accumulated_mask_size) +{ + return m_client->ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(m_identifier, edge_name, max_accumulated_mask_size); +} + hailo_status ConfiguredNetworkGroupClient::execute_callback(const ProtoCallbackIdentifier &cb_id) { if (cb_id.cb_type() == CALLBACK_TYPE_TRANSFER) { @@ -427,6 +442,19 @@ hailo_status ConfiguredNetworkGroupClient::execute_callback(const ProtoCallbackI return HAILO_SUCCESS; } +void ConfiguredNetworkGroupClient::execute_callbacks_on_error(hailo_status error_status) +{ + std::unique_lock lock(m_mutex); + for (auto cb_pair : m_idx_to_callbacks) { + std::get<2>(*cb_pair.second)(error_status); + } + m_idx_to_callbacks.clear(); + for (auto cb_pair : m_infer_request_idx_to_callbacks) { + cb_pair.second(error_status); + } + m_infer_request_idx_to_callbacks.clear(); +} + hailo_status ConfiguredNetworkGroupClient::execute_infer_request_callback(const ProtoCallbackIdentifier &cb_id) { std::function cb; @@ -485,7 +513,7 @@ hailo_status ConfiguredNetworkGroupClient::infer_async(const NamedBuffersCallbac } auto infer_request_callback = [this, infer_request_done_cb](hailo_status status){ - if (status == HAILO_STREAM_ABORTED_BY_USER) { + if (status == HAILO_STREAM_ABORT) { LOGGER__INFO("Infer request was aborted by user"); } else if (status != HAILO_SUCCESS) { @@ -503,10 +531,22 @@ hailo_status ConfiguredNetworkGroupClient::infer_async(const NamedBuffersCallbac m_infer_request_idx_to_callbacks.emplace(infer_request_cb_idx, infer_request_callback); } - increase_ongoing_callbacks(); + increase_ongoing_callbacks(); // Increase before lunch, as the cb may be called before we got the chance to increase the counter auto status = m_client->ConfiguredNetworkGroup_infer_async(m_identifier, cb_idx_to_stream_buffer, infer_request_cb_idx, m_input_streams_names); - if (status == HAILO_STREAM_ABORTED_BY_USER) { + + if (HAILO_SUCCESS != status) { + // If we got error in `infer_async()`, then the callbacks will not be called in the service domain. + // remove them from the cb lists so they wont be called in the client domain as well. + std::unique_lock lock(m_mutex); + for (auto &pair : cb_idx_to_stream_buffer) { + m_idx_to_callbacks.erase(std::get<0>(pair)); + } + m_infer_request_idx_to_callbacks.erase(infer_request_cb_idx); + decrease_ongoing_callbacks(); + } + + if (status == HAILO_STREAM_ABORT) { LOGGER__INFO("Infer request was aborted by user"); return status; } diff --git a/hailort/libhailort/src/stream_common/async_stream_base.cpp b/hailort/libhailort/src/stream_common/async_stream_base.cpp index 0968aee8..e2abd420 100644 --- a/hailort/libhailort/src/stream_common/async_stream_base.cpp +++ b/hailort/libhailort/src/stream_common/async_stream_base.cpp @@ -172,7 +172,7 @@ hailo_status AsyncInputStreamBase::write_async(TransferRequest &&transfer_reques std::unique_lock lock(m_stream_mutex); if (m_is_aborted) { - return HAILO_STREAM_ABORTED_BY_USER; + return HAILO_STREAM_ABORT; } else if (!m_is_stream_activated) { return HAILO_STREAM_NOT_ACTIVATED; } @@ -187,6 +187,12 @@ hailo_status AsyncInputStreamBase::activate_stream() auto status = activate_stream_impl(); CHECK_SUCCESS(status); + // If the mode is OWNING is set, it means we use the write/write_impl API. We want to make sure the buffer starts + // from the beginning of the buffer pool (to avoid unnecessary buffer bindings). + if (StreamBufferMode::OWNING == m_buffer_mode) { + m_buffer_pool->reset_pointers(); + } + m_is_stream_activated = true; return HAILO_SUCCESS; @@ -231,7 +237,7 @@ hailo_status AsyncInputStreamBase::call_write_async_impl(TransferRequest &&trans auto status = write_async_impl(std::move(transfer_request)); - if ((HAILO_STREAM_NOT_ACTIVATED == status) || (HAILO_STREAM_ABORTED_BY_USER == status)) { + if ((HAILO_STREAM_NOT_ACTIVATED == status) || (HAILO_STREAM_ABORT == status)) { return status; } CHECK_SUCCESS(status); @@ -307,7 +313,7 @@ hailo_status AsyncOutputStreamBase::read_async(TransferRequest &&transfer_reques std::unique_lock lock(m_stream_mutex); if (m_is_aborted) { - return HAILO_STREAM_ABORTED_BY_USER; + return HAILO_STREAM_ABORT; } else if (!m_is_stream_activated) { return HAILO_STREAM_NOT_ACTIVATED; } @@ -329,7 +335,7 @@ hailo_status AsyncOutputStreamBase::call_read_async_impl(TransferRequest &&trans }; auto status = read_async_impl(std::move(transfer_request)); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { return status; } CHECK_SUCCESS(status); @@ -464,7 +470,7 @@ hailo_status AsyncOutputStreamBase::read_impl(MemoryView user_buffer) CHECK_SUCCESS(status); status = dequeue_and_launch_transfer(); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { // The buffer_pool state will reset on next activation. return status; } @@ -479,7 +485,7 @@ hailo_status AsyncOutputStreamBase::dequeue_and_launch_transfer() CHECK_EXPECTED_AS_STATUS(buffer); auto callback = [this, buffer=buffer.value()](hailo_status status) { - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { // On deactivation flow, we should get this status. We just ignore the callback here, and in the next // activation we should reset the buffers. return; @@ -492,7 +498,7 @@ hailo_status AsyncOutputStreamBase::dequeue_and_launch_transfer() }; auto status = call_read_async_impl(TransferRequest(std::move(buffer.value()), callback)); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { // The buffer_pool state will reset on next activation. return status; } diff --git a/hailort/libhailort/src/stream_common/async_stream_base.hpp b/hailort/libhailort/src/stream_common/async_stream_base.hpp index 48640a31..739bfef6 100644 --- a/hailort/libhailort/src/stream_common/async_stream_base.hpp +++ b/hailort/libhailort/src/stream_common/async_stream_base.hpp @@ -66,7 +66,7 @@ class AsyncInputStreamBase : public InputStreamBase { const auto wait_done = m_has_ready_buffer.wait_for(lock, timeout, [this, pred, &status] { if (m_is_aborted) { - status = HAILO_STREAM_ABORTED_BY_USER; + status = HAILO_STREAM_ABORT; return true; } @@ -151,7 +151,7 @@ class AsyncOutputStreamBase : public OutputStreamBase { const auto wait_done = m_has_ready_buffer.wait_for(lock, timeout, [this, pred, &status] { if (m_is_aborted) { - status = HAILO_STREAM_ABORTED_BY_USER; + status = HAILO_STREAM_ABORT; return true; } diff --git a/hailort/libhailort/src/stream_common/nms_stream.cpp b/hailort/libhailort/src/stream_common/nms_stream.cpp index 725530b2..b09be229 100644 --- a/hailort/libhailort/src/stream_common/nms_stream.cpp +++ b/hailort/libhailort/src/stream_common/nms_stream.cpp @@ -173,7 +173,7 @@ hailo_status NMSStreamReader::read_nms_bbox_mode(OutputStreamBase &stream, void while (true) { MemoryView buffer_view(static_cast(buffer) + offset, bbox_size); auto status = stream.read_impl(buffer_view); - if ((HAILO_STREAM_ABORTED_BY_USER == status) || + if ((HAILO_STREAM_ABORT == status) || ((HAILO_STREAM_NOT_ACTIVATED == status))) { return status; } @@ -189,7 +189,7 @@ hailo_status NMSStreamReader::read_nms_bbox_mode(OutputStreamBase &stream, void } class_bboxes_count++; - CHECK_IN_DEBUG(class_bboxes_count <= stream.get_info().nms_info.max_bboxes_per_class, HAILO_INTERNAL_FAILURE, + CHECK(class_bboxes_count <= stream.get_info().nms_info.max_bboxes_per_class, HAILO_INTERNAL_FAILURE, "Data read from the device for the current class was size {}, max size is {}", class_bboxes_count, stream.get_info().nms_info.max_bboxes_per_class); offset += bbox_size; @@ -203,7 +203,7 @@ hailo_status NMSStreamReader::read_nms_bbox_mode(OutputStreamBase &stream, void // last class delimeter) uint64_t last_bbox = 0; auto status = stream.read_impl(MemoryView(&last_bbox, sizeof(last_bbox))); - if ((HAILO_STREAM_ABORTED_BY_USER == status) || + if ((HAILO_STREAM_ABORT == status) || ((HAILO_STREAM_NOT_ACTIVATED == status))) { return status; } @@ -249,7 +249,7 @@ hailo_status NMSStreamReader::read_nms_burst_mode(OutputStreamBase &stream, void assert(offset + transfer_size <= buffer_size); current_burst = MemoryView(static_cast(buffer) + offset, transfer_size); auto status = stream.read_impl(current_burst); - if ((HAILO_STREAM_ABORTED_BY_USER == status) || ((HAILO_STREAM_NOT_ACTIVATED == status))) { + if ((HAILO_STREAM_ABORT == status) || ((HAILO_STREAM_NOT_ACTIVATED == status))) { return status; } CHECK_SUCCESS(status, "Failed reading nms burst"); @@ -290,7 +290,7 @@ hailo_status NMSStreamReader::read_nms_burst_mode(OutputStreamBase &stream, void } class_bboxes_count++; - CHECK_IN_DEBUG(class_bboxes_count <= stream.get_info().nms_info.max_bboxes_per_class, HAILO_INTERNAL_FAILURE, + CHECK(class_bboxes_count <= stream.get_info().nms_info.max_bboxes_per_class, HAILO_INTERNAL_FAILURE, "Data read from the device for the current class was size {}, max size is {}", class_bboxes_count, stream.get_info().nms_info.max_bboxes_per_class); @@ -303,17 +303,22 @@ hailo_status NMSStreamReader::read_nms_burst_mode(OutputStreamBase &stream, void return HAILO_SUCCESS; } -hailo_status NMSStreamReader::read_nms(OutputStreamBase &stream, void *buffer, size_t offset, size_t size) +hailo_status NMSStreamReader::read_nms(OutputStreamBase &stream, void *buffer, size_t offset, size_t size, + hailo_stream_interface_t stream_interface) { hailo_status status = HAILO_UNINITIALIZED; const auto burst_type = stream.get_layer_info().nms_info.burst_type; const bool is_burst_mode = (HAILO_BURST_TYPE_H8_BBOX != burst_type) && (HAILO_BURST_TYPE_H15_BBOX != burst_type); + // Burst mode in Ethernet is not supported - Return Error in this case + CHECK(!(is_burst_mode && (HAILO_STREAM_INTERFACE_ETH == stream_interface)), HAILO_NOT_SUPPORTED, + "NMS Burst mode is not supported in Ethernet interface"); + if (is_burst_mode) { status = NMSStreamReader::read_nms_burst_mode(stream, buffer, offset, size); } else { status = NMSStreamReader::read_nms_bbox_mode(stream, buffer, offset); } - if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) { + if ((HAILO_STREAM_ABORT == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) { return status; } CHECK_SUCCESS(status, "Failed reading nms"); @@ -322,11 +327,12 @@ hailo_status NMSStreamReader::read_nms(OutputStreamBase &stream, void *buffer, s } Expected> NmsOutputStream::create(std::shared_ptr base_stream, - const LayerInfo &edge_layer, size_t max_queue_size, EventPtr core_op_activated_event) + const LayerInfo &edge_layer, size_t max_queue_size, EventPtr core_op_activated_event, + hailo_stream_interface_t stream_interface) { auto status = HAILO_UNINITIALIZED; auto nms_stream = make_shared_nothrow(base_stream, edge_layer, max_queue_size, - std::move(core_op_activated_event), status); + std::move(core_op_activated_event), stream_interface, status); CHECK_NOT_NULL_AS_EXPECTED(nms_stream, HAILO_OUT_OF_HOST_MEMORY); CHECK_SUCCESS_AS_EXPECTED(status); @@ -387,10 +393,12 @@ hailo_status NmsOutputStream::cancel_pending_transfers() return m_base_stream->cancel_pending_transfers(); } -NmsReaderThread::NmsReaderThread(std::shared_ptr base_stream, size_t max_queue_size) : +NmsReaderThread::NmsReaderThread(std::shared_ptr base_stream, size_t max_queue_size, + hailo_stream_interface_t stream_interface) : m_base_stream(base_stream), m_queue_max_size(max_queue_size), m_should_quit(false), + m_stream_interface(stream_interface), m_worker_thread([this] { process_transfer_requests(); }) {} @@ -472,12 +480,12 @@ void NmsReaderThread::process_transfer_requests() assert(1 == transfer_request.transfer_buffers.size()); assert(0 == transfer_request.transfer_buffers[0].offset()); auto buffer = transfer_request.transfer_buffers[0].base_buffer(); - auto status = NMSStreamReader::read_nms(*m_base_stream, buffer->data(), 0, buffer->size()); + auto status = NMSStreamReader::read_nms(*m_base_stream, buffer.data(), 0, buffer.size(), m_stream_interface); - if ((HAILO_STREAM_NOT_ACTIVATED == status) || (HAILO_STREAM_ABORTED_BY_USER == status)) { - // On both deactivation/abort, we want to send HAILO_STREAM_ABORTED_BY_USER since it is part of the callback + if ((HAILO_STREAM_NOT_ACTIVATED == status) || (HAILO_STREAM_ABORT == status)) { + // On both deactivation/abort, we want to send HAILO_STREAM_ABORT since it is part of the callback // API. - transfer_request.callback(HAILO_STREAM_ABORTED_BY_USER); + transfer_request.callback(HAILO_STREAM_ABORT); } else { transfer_request.callback(status); } @@ -490,7 +498,7 @@ void NmsReaderThread::cancel_pending_transfers() while(!m_queue.empty()) { auto transfer_request = m_queue.front(); m_queue.pop(); - transfer_request.callback(HAILO_STREAM_ABORTED_BY_USER); + transfer_request.callback(HAILO_STREAM_ABORT); } } diff --git a/hailort/libhailort/src/stream_common/nms_stream.hpp b/hailort/libhailort/src/stream_common/nms_stream.hpp index 4242b283..9b11ef37 100644 --- a/hailort/libhailort/src/stream_common/nms_stream.hpp +++ b/hailort/libhailort/src/stream_common/nms_stream.hpp @@ -34,7 +34,8 @@ enum class NMSBurstState { // For explanation on the different burst modes and types and state machine and logic of the class please check out the cpp. class NMSStreamReader { public: - static hailo_status read_nms(OutputStreamBase &stream, void *buffer, size_t offset, size_t size); + static hailo_status read_nms(OutputStreamBase &stream, void *buffer, size_t offset, size_t size, + hailo_stream_interface_t stream_interface); private: static hailo_status read_nms_bbox_mode(OutputStreamBase &stream, void *buffer, size_t offset); static hailo_status read_nms_burst_mode(OutputStreamBase &stream, void *buffer, size_t offset, size_t buffer_size); @@ -46,7 +47,8 @@ class NMSStreamReader { class NmsReaderThread final { public: - NmsReaderThread(std::shared_ptr base_stream, size_t max_queue_size); + NmsReaderThread(std::shared_ptr base_stream, size_t max_queue_size, + hailo_stream_interface_t stream_interface); ~NmsReaderThread(); NmsReaderThread(const NmsReaderThread &) = delete; @@ -71,6 +73,7 @@ class NmsReaderThread final { std::queue m_queue; // m_should_quit is used to quit the thread (called on destruction) bool m_should_quit; + hailo_stream_interface_t m_stream_interface; std::thread m_worker_thread; }; @@ -80,15 +83,16 @@ class NmsReaderThread final { class NmsOutputStream : public AsyncOutputStreamBase { public: static Expected> create(std::shared_ptr base_stream, - const LayerInfo &edge_layer, size_t max_queue_size, EventPtr core_op_activated_event); + const LayerInfo &edge_layer, size_t max_queue_size, EventPtr core_op_activated_event, + hailo_stream_interface_t stream_interface); virtual hailo_stream_interface_t get_interface() const override; NmsOutputStream(std::shared_ptr base_stream, const LayerInfo &edge_layer, size_t max_queue_size, - EventPtr core_op_activated_event, hailo_status &status) : + EventPtr core_op_activated_event, hailo_stream_interface_t stream_interface, hailo_status &status) : AsyncOutputStreamBase(edge_layer, std::move(core_op_activated_event), status), m_base_stream(base_stream), - m_reader_thread(base_stream, max_queue_size) + m_reader_thread(base_stream, max_queue_size, stream_interface) {} void set_vdevice_core_op_handle(vdevice_core_op_handle_t core_op_handle) override; diff --git a/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.cpp b/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.cpp index e93de5af..3aadb450 100644 --- a/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.cpp +++ b/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.cpp @@ -31,7 +31,7 @@ QueuedStreamBufferPool::QueuedStreamBufferPool(std::vector &&storage) m_storage(std::move(storage)) { for (auto buffer : m_storage) { - m_queue.push(buffer); + m_queue.push(MemoryView(*buffer)); } } @@ -40,6 +40,15 @@ size_t QueuedStreamBufferPool::max_queue_size() const return m_storage.size(); } +hailo_status QueuedStreamBufferPool::dma_map(VDevice &vdevice, hailo_dma_buffer_direction_t direction) +{ + for (auto &buffer : m_storage) { + TRY(auto mapping, DmaMappedBuffer::create(vdevice, buffer->data(), buffer->size(), direction)); + m_dma_mappings.emplace_back(std::move(mapping)); + } + return HAILO_SUCCESS; +} + Expected QueuedStreamBufferPool::dequeue() { CHECK_AS_EXPECTED(!m_queue.empty(), HAILO_INTERNAL_FAILURE, "QueuedStreamBufferPool is empty"); @@ -53,7 +62,7 @@ hailo_status QueuedStreamBufferPool::enqueue(TransferBuffer &&buffer_info) { CHECK(buffer_info.offset() == 0, HAILO_INTERNAL_FAILURE, "Cant use offset on queued buffer pool"); CHECK(buffer_info.size() == m_storage[0]->size(), HAILO_INTERNAL_FAILURE, "Invalid enqueue buffer size"); - CHECK(buffer_info.base_buffer()->data() == m_storage[m_next_enqueue_buffer_index]->data(), HAILO_INTERNAL_FAILURE, + CHECK(buffer_info.base_buffer().data() == m_storage[m_next_enqueue_buffer_index]->data(), HAILO_INTERNAL_FAILURE, "Out of order enqueue for queued stream buffer pool"); m_queue.push(buffer_info.base_buffer()); @@ -70,7 +79,7 @@ void QueuedStreamBufferPool::reset_pointers() // Now fill the buffers from the storage in the right order for (auto buffer : m_storage) { - m_queue.push(buffer); + m_queue.push(MemoryView(*buffer)); } m_next_enqueue_buffer_index = 0; } diff --git a/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.hpp b/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.hpp index 373206d6..263a2034 100644 --- a/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.hpp +++ b/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.hpp @@ -11,6 +11,7 @@ #define _HAILO_QUEUED_STREAM_BUFFER_POOL_HPP_ #include "stream_common/stream_buffer_pool.hpp" +#include "hailo/dma_mapped_buffer.hpp" #include @@ -24,6 +25,8 @@ class QueuedStreamBufferPool : public StreamBufferPool { explicit QueuedStreamBufferPool(std::vector &&storage); + hailo_status dma_map(VDevice &vdevice, hailo_dma_buffer_direction_t direction); + virtual size_t max_queue_size() const override; virtual Expected dequeue() override; virtual hailo_status enqueue(TransferBuffer &&buffer_info) override; @@ -33,7 +36,10 @@ class QueuedStreamBufferPool : public StreamBufferPool { // Hold the buffer storage, keeps all buffers alive. std::vector m_storage; - std::queue m_queue; + // Keeps mappings alive (only if dma_map was called). + std::vector m_dma_mappings; + + std::queue m_queue; // Used for buffer enqueue order validation. size_t m_next_enqueue_buffer_index = 0; diff --git a/hailort/libhailort/src/stream_common/remote_process_stream.cpp b/hailort/libhailort/src/stream_common/remote_process_stream.cpp index f231a172..4ad2b504 100644 --- a/hailort/libhailort/src/stream_common/remote_process_stream.cpp +++ b/hailort/libhailort/src/stream_common/remote_process_stream.cpp @@ -221,8 +221,8 @@ hailo_status RemoteProcessInputStream::flush() // Get available buffer. We don't use the buffer, just use it to send flush request auto write_buffer = m_buffer_pool->dequeue_host_buffer(flush_timeout); - if (HAILO_STREAM_ABORTED_BY_USER == write_buffer.status()) { - return HAILO_STREAM_ABORTED_BY_USER; + if (HAILO_STREAM_ABORT == write_buffer.status()) { + return HAILO_STREAM_ABORT; } CHECK_EXPECTED_AS_STATUS(write_buffer); @@ -235,8 +235,8 @@ hailo_status RemoteProcessInputStream::flush() // Now wait until available buffers is full status = m_buffer_pool->wait_until_host_queue_full(flush_timeout); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - return HAILO_STREAM_ABORTED_BY_USER; + if (HAILO_STREAM_ABORT == status) { + return HAILO_STREAM_ABORT; } CHECK_SUCCESS(status); @@ -262,8 +262,8 @@ hailo_status RemoteProcessInputStream::write_impl(const MemoryView &buffer) { // Get available buffer auto write_buffer = m_buffer_pool->dequeue_host_buffer(m_timeout); - if (HAILO_STREAM_ABORTED_BY_USER == write_buffer.status()) { - return HAILO_STREAM_ABORTED_BY_USER; + if (HAILO_STREAM_ABORT == write_buffer.status()) { + return HAILO_STREAM_ABORT; } CHECK_EXPECTED_AS_STATUS(write_buffer); @@ -300,8 +300,15 @@ RemoteProcessInputStream::RemoteProcessInputStream(std::shared_ptrget_async_max_queue_size(); - const auto queue_size = queue_size_exp ? *queue_size_exp : DEFAULT_QUEUE_SIZE; + auto queue_size = DEFAULT_QUEUE_SIZE; + if (HAILO_STREAM_INTERFACE_ETH != m_base_stream->get_interface() && HAILO_STREAM_INTERFACE_MIPI != m_base_stream->get_interface()) { + auto queue_size_exp = m_base_stream->get_async_max_queue_size(); + if (!queue_size_exp) { + status = queue_size_exp.status(); + return; + } + queue_size = *queue_size_exp; + } auto buffer_pool = RemoteProcessBufferPool::create(HAILO_H2D_STREAM, base_stream->get_frame_size(), queue_size); if (!buffer_pool) { @@ -332,7 +339,7 @@ void RemoteProcessInputStream::run_write_thread() } status = write_single_buffer(); - if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) { + if ((HAILO_STREAM_ABORT == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) { continue; } else if (HAILO_SUCCESS != status) { LOGGER__ERROR("Failure on read thread {}", status); @@ -473,8 +480,8 @@ hailo_status RemoteProcessOutputStream::cancel_pending_transfers() hailo_status RemoteProcessOutputStream::read_impl(MemoryView buffer) { auto read_buffer = m_buffer_pool->dequeue_host_buffer(m_timeout); - if (HAILO_STREAM_ABORTED_BY_USER == read_buffer.status()) { - return HAILO_STREAM_ABORTED_BY_USER; + if (HAILO_STREAM_ABORT == read_buffer.status()) { + return HAILO_STREAM_ABORT; } CHECK_EXPECTED_AS_STATUS(read_buffer); @@ -509,8 +516,15 @@ RemoteProcessOutputStream::RemoteProcessOutputStream(std::shared_ptrget_async_max_queue_size(); - auto queue_size = queue_size_exp ? *queue_size_exp : DEFAULT_QUEUE_SIZE; + auto queue_size = DEFAULT_QUEUE_SIZE; + if (HAILO_STREAM_INTERFACE_ETH != m_base_stream->get_interface() && HAILO_STREAM_INTERFACE_MIPI != m_base_stream->get_interface()) { + auto queue_size_exp = m_base_stream->get_async_max_queue_size(); + if (!queue_size_exp) { + status = queue_size_exp.status(); + return; + } + queue_size = *queue_size_exp; + } auto buffer_pool = RemoteProcessBufferPool::create(HAILO_D2H_STREAM, base_stream->get_frame_size(), queue_size); if (!buffer_pool) { @@ -544,7 +558,7 @@ void RemoteProcessOutputStream::run_read_thread() } status = read_single_buffer(); - if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) { + if ((HAILO_STREAM_ABORT == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) { continue; } else if (HAILO_SUCCESS != status) { LOGGER__ERROR("Failure on read thread {}", status); diff --git a/hailort/libhailort/src/stream_common/remote_process_stream.hpp b/hailort/libhailort/src/stream_common/remote_process_stream.hpp index 2f6ccc78..d7087dcf 100644 --- a/hailort/libhailort/src/stream_common/remote_process_stream.hpp +++ b/hailort/libhailort/src/stream_common/remote_process_stream.hpp @@ -12,6 +12,7 @@ #ifndef _HAILO_REMOTE_PROCESS_STREAM_HPP_ #define _HAILO_REMOTE_PROCESS_STREAM_HPP_ +#include "common/event_internal.hpp" #include "common/fork_support.hpp" #include "stream_common/stream_internal.hpp" @@ -77,7 +78,7 @@ class RemoteProcessBufferPool final : public SharedAllocatedObject { }); CHECK(done, HAILO_TIMEOUT, "Timeout waiting on cond variable"); if (m_is_aborted) { - return HAILO_STREAM_ABORTED_BY_USER; + return HAILO_STREAM_ABORT; } return HAILO_SUCCESS; } diff --git a/hailort/libhailort/src/stream_common/stream.cpp b/hailort/libhailort/src/stream_common/stream.cpp index c2fa06a8..eaaa447b 100644 --- a/hailort/libhailort/src/stream_common/stream.cpp +++ b/hailort/libhailort/src/stream_common/stream.cpp @@ -31,7 +31,6 @@ hailo_status InputStream::wait_for_async_ready(size_t /* transfer_size */, std:: Expected InputStream::get_async_max_queue_size() const { - LOGGER__ERROR("get_async_max_queue_size not implemented for sync API"); return make_unexpected(HAILO_NOT_IMPLEMENTED); } @@ -57,7 +56,6 @@ hailo_status OutputStream::wait_for_async_ready(size_t /* transfer_size */, std: Expected OutputStream::get_async_max_queue_size() const { - LOGGER__ERROR("get_async_max_queue_size not implemented for sync API"); return make_unexpected(HAILO_NOT_IMPLEMENTED); } diff --git a/hailort/libhailort/src/stream_common/stream_internal.cpp b/hailort/libhailort/src/stream_common/stream_internal.cpp index eb0de7bc..520ab934 100644 --- a/hailort/libhailort/src/stream_common/stream_internal.cpp +++ b/hailort/libhailort/src/stream_common/stream_internal.cpp @@ -15,6 +15,8 @@ #include "common/logger_macros.hpp" #include "common/os_utils.hpp" +#include "utils/buffer_storage.hpp" + #include "stream_common/stream_internal.hpp" @@ -37,35 +39,18 @@ hailo_status InputStreamBase::write(const void *buffer, size_t size) return write(MemoryView::create_const(buffer, size)); } -hailo_status InputStreamBase::write_async(BufferPtr buffer, const TransferDoneCallback &user_callback) -{ - CHECK_ARG_NOT_NULL(buffer); - CHECK_ARG_NOT_NULL(buffer->data()); - CHECK(buffer->size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Write size {} must be frame size {}", buffer->size(), - get_frame_size()); - - auto wrapped_callback = [buffer, user_callback](hailo_status status) { - user_callback(CompletionInfo{status, buffer->data(), buffer->size()}); - }; - return write_async(TransferRequest(std::move(buffer), wrapped_callback)); -} - hailo_status InputStreamBase::write_async(const MemoryView &buffer, const TransferDoneCallback &user_callback) { + CHECK(!buffer.empty(), HAILO_INVALID_ARGUMENT, "Invalid buffer was passed to write_async"); CHECK(0 == (reinterpret_cast(buffer.data()) % HailoRTCommon::HW_DATA_ALIGNMENT), HAILO_INVALID_ARGUMENT, "User address must be aligned to {}", HailoRTCommon::HW_DATA_ALIGNMENT); + CHECK(buffer.size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Write size {} must be frame size {}", + buffer.size(), get_frame_size()); - const auto dma_able_alignment = OsUtils::get_dma_able_alignment(); - // User address is not aligned to page size - if ((0 != (reinterpret_cast(buffer.data()) % dma_able_alignment))) { - auto user_buffer = UserBufferStorage::create_storage_from_user_buffer(const_cast(buffer.data()), buffer.size()); - CHECK_EXPECTED_AS_STATUS(user_buffer); - return write_async(user_buffer.release(), user_callback); - } else { - auto dma_able_buffer = DmaStorage::create_dma_able_buffer_from_user_size(const_cast(buffer.data()), buffer.size()); - CHECK_EXPECTED_AS_STATUS(dma_able_buffer); - return write_async(dma_able_buffer.release(), user_callback); - } + auto wrapped_callback = [buffer, user_callback](hailo_status status) { + user_callback(CompletionInfo{status, buffer.data(), buffer.size()}); + }; + return write_async(TransferRequest(buffer, wrapped_callback)); } hailo_status InputStreamBase::write_async(const void *buffer, size_t size, const TransferDoneCallback &user_callback) @@ -131,38 +116,22 @@ hailo_status OutputStreamBase::read(void *buffer, size_t size) return read(MemoryView(buffer, size)); } -hailo_status OutputStreamBase::read_async(BufferPtr buffer, const TransferDoneCallback &user_callback) -{ - CHECK_ARG_NOT_NULL(buffer); - CHECK_ARG_NOT_NULL(buffer->data()); - CHECK(buffer->size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Read size {} must be frame size {}", buffer->size(), - get_frame_size()); - - auto wrapped_callback = [buffer, user_callback](hailo_status status) { - user_callback(CompletionInfo{status, const_cast(buffer->data()), buffer->size()}); - }; - return read_async(TransferRequest(std::move(buffer), wrapped_callback)); -} - hailo_status OutputStreamBase::read_async(MemoryView buffer, const TransferDoneCallback &user_callback) { CHECK_ARG_NOT_NULL(buffer.data()); CHECK(buffer.size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Read size {} must be frame size {}", buffer.size(), get_frame_size()); - const auto dma_able_alignment = HailoRTCommon::DMA_ABLE_ALIGNMENT_READ_HW_LIMITATION; - BufferPtr wrapped_buffer = nullptr; - if ((0 != (reinterpret_cast(buffer.data()) % dma_able_alignment))) { - auto user_buffer = UserBufferStorage::create_storage_from_user_buffer(const_cast(buffer.data()), buffer.size()); - CHECK_EXPECTED_AS_STATUS(user_buffer); - wrapped_buffer = user_buffer.release(); - } else { - auto dma_able_buffer = DmaStorage::create_dma_able_buffer_from_user_size(const_cast(buffer.data()), buffer.size()); - CHECK_EXPECTED_AS_STATUS(dma_able_buffer); - wrapped_buffer = dma_able_buffer.release(); - } - - return read_async(wrapped_buffer, user_callback); + CHECK(!buffer.empty(), HAILO_INVALID_ARGUMENT, "Invalid buffer was passed to read_async"); + CHECK(0 == (reinterpret_cast(buffer.data()) % HailoRTCommon::HW_DATA_ALIGNMENT), HAILO_INVALID_ARGUMENT, + "User address must be aligned to {}", HailoRTCommon::HW_DATA_ALIGNMENT); + CHECK(buffer.size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Read size {} must be frame size {}", + buffer.size(), get_frame_size()); + + auto wrapped_callback = [buffer, user_callback](hailo_status status) { + user_callback(CompletionInfo{status, const_cast(buffer.data()), buffer.size()}); + }; + return read_async(TransferRequest(buffer, wrapped_callback)); } hailo_status OutputStreamBase::read_async(void *buffer, size_t size, const TransferDoneCallback &user_callback) diff --git a/hailort/libhailort/src/stream_common/stream_internal.hpp b/hailort/libhailort/src/stream_common/stream_internal.hpp index 7e0758da..6791267e 100644 --- a/hailort/libhailort/src/stream_common/stream_internal.hpp +++ b/hailort/libhailort/src/stream_common/stream_internal.hpp @@ -38,7 +38,6 @@ #include "hailo/hailort_common.hpp" #include "stream_common/transfer_common.hpp" -#include "hef/hef_internal.hpp" #include "device_common/control_protocol.hpp" #include "hef/layer_info.hpp" @@ -96,7 +95,6 @@ class InputStreamBase : public InputStream virtual hailo_status write_impl(const MemoryView &buffer) = 0; - virtual hailo_status write_async(BufferPtr buffer, const TransferDoneCallback &user_callback) override final; virtual hailo_status write_async(const MemoryView &buffer, const TransferDoneCallback &user_callback) override final; virtual hailo_status write_async(const void *buffer, size_t size, const TransferDoneCallback &user_callback) override final; @@ -160,7 +158,6 @@ class OutputStreamBase : public OutputStream virtual hailo_status read_impl(MemoryView buffer) = 0; - virtual hailo_status read_async(BufferPtr buffer, const TransferDoneCallback &user_callback) override final; virtual hailo_status read_async(MemoryView buffer, const TransferDoneCallback &user_callback) override final; virtual hailo_status read_async(void *buffer, size_t size, const TransferDoneCallback &user_callback) override final; diff --git a/hailort/libhailort/src/stream_common/transfer_common.cpp b/hailort/libhailort/src/stream_common/transfer_common.cpp index f181ec23..a5fd6a42 100644 --- a/hailort/libhailort/src/stream_common/transfer_common.cpp +++ b/hailort/libhailort/src/stream_common/transfer_common.cpp @@ -8,41 +8,52 @@ #include "transfer_common.hpp" #include "vdma/memory/mapped_buffer.hpp" -#include "vdma/vdma_device.hpp" +#include "utils/buffer_storage.hpp" namespace hailort { TransferBuffer::TransferBuffer() : - m_base_buffer(nullptr), + m_base_buffer(MemoryView{}), m_size(0), m_offset(0) {} -TransferBuffer::TransferBuffer(BufferPtr base_buffer, size_t size, size_t offset) : - m_base_buffer(std::move(base_buffer)), +TransferBuffer::TransferBuffer(MemoryView base_buffer, size_t size, size_t offset) : + m_base_buffer(base_buffer), m_size(size), m_offset(offset) { - assert(m_size <= m_base_buffer->size()); - assert(m_offset < m_base_buffer->size()); + assert(m_size <= base_buffer.size()); + assert(m_offset < base_buffer.size()); } -TransferBuffer::TransferBuffer(BufferPtr base_buffer) - : TransferBuffer(base_buffer, base_buffer->size(), 0) +TransferBuffer::TransferBuffer(MemoryView base_buffer) + : TransferBuffer(base_buffer, base_buffer.size(), 0) {} -Expected TransferBuffer::map_buffer(VdmaDevice &device, HailoRTDriver::DmaDirection direction) +Expected TransferBuffer::map_buffer(HailoRTDriver &driver, HailoRTDriver::DmaDirection direction) { - CHECK_AS_EXPECTED(m_base_buffer->storage().type() == BufferStorage::Type::DMA, HAILO_INVALID_ARGUMENT, - "Buffer must be dma-able (provided buffer type {})", static_cast(m_base_buffer->storage().type())); + CHECK_AS_EXPECTED(!m_mappings, HAILO_INTERNAL_FAILURE, "Buffer is already mapped"); + + vdma::DmaAbleBufferPtr dma_able_buffer; + const auto storage_key = std::make_pair(m_base_buffer.data(), m_base_buffer.size()); + if (auto storage = BufferStorageResourceManager::get_resource(storage_key)) { + auto dma_able_buffer_exp = storage->get()->get_dma_able_buffer(); + CHECK_EXPECTED(dma_able_buffer_exp); + dma_able_buffer = dma_able_buffer_exp.release(); + } else { + auto dma_able_buffer_exp = vdma::DmaAbleBuffer::create_from_user_address(m_base_buffer.data(), m_base_buffer.size()); + CHECK_EXPECTED(dma_able_buffer_exp); + dma_able_buffer = dma_able_buffer_exp.release(); + } - // Map if not already mapped - auto is_new_mapping_exp = m_base_buffer->storage().dma_map(device, to_hailo_dma_direction(direction)); - CHECK_EXPECTED(is_new_mapping_exp); + auto mapped_buffer = vdma::MappedBuffer::create_shared(std::move(dma_able_buffer), driver, direction); + CHECK_EXPECTED(mapped_buffer); - return m_base_buffer->storage().get_dma_mapped_buffer(device.get_dev_id()); + m_mappings = mapped_buffer.value(); + return mapped_buffer; } hailo_status TransferBuffer::copy_to(MemoryView buffer) @@ -72,52 +83,24 @@ hailo_status TransferBuffer::copy_from(const MemoryView buffer) return HAILO_SUCCESS; } -hailo_status TransferBuffer::synchronize(VdmaDevice &device, HailoRTDriver::DmaSyncDirection sync_direction) -{ - auto mapped_buffer = m_base_buffer->storage().get_dma_mapped_buffer(device.get_dev_id()); - CHECK_EXPECTED_AS_STATUS(mapped_buffer); - - auto continuous_parts = get_continuous_parts(); - - auto status = synchronize_part(*mapped_buffer, continuous_parts.first, sync_direction); - CHECK_SUCCESS(status); - - if (!continuous_parts.second.empty()) { - status = synchronize_part(*mapped_buffer, continuous_parts.second, sync_direction); - CHECK_SUCCESS(status); - } - - return HAILO_SUCCESS; -} - -hailo_status TransferBuffer::synchronize_part(vdma::MappedBufferPtr &mapped_buffer, MemoryView continuous_part, - HailoRTDriver::DmaSyncDirection sync_direction) -{ - assert(!continuous_part.empty()); - assert(continuous_part.data() >= m_base_buffer->data()); - - return mapped_buffer->synchronize(continuous_part.data() - m_base_buffer->data(), continuous_part.size(), - sync_direction); -} - bool TransferBuffer::is_wrap_around() const { - return (m_offset + m_size) > m_base_buffer->size(); + return (m_offset + m_size) > m_base_buffer.size(); } std::pair TransferBuffer::get_continuous_parts() { if (is_wrap_around()) { - const auto size_to_end = m_base_buffer->size() - m_offset; + const auto size_to_end = m_base_buffer.size() - m_offset; assert(size_to_end < m_size); return std::make_pair( - MemoryView(m_base_buffer->data() + m_offset, size_to_end), - MemoryView(m_base_buffer->data(), m_size - size_to_end) + MemoryView(m_base_buffer.data() + m_offset, size_to_end), + MemoryView(m_base_buffer.data(), m_size - size_to_end) ); } else { return std::make_pair( - MemoryView(m_base_buffer->data() + m_offset, m_size), + MemoryView(m_base_buffer.data() + m_offset, m_size), MemoryView() ); } diff --git a/hailort/libhailort/src/stream_common/transfer_common.hpp b/hailort/libhailort/src/stream_common/transfer_common.hpp index 795b4586..66aaf404 100644 --- a/hailort/libhailort/src/stream_common/transfer_common.hpp +++ b/hailort/libhailort/src/stream_common/transfer_common.hpp @@ -13,40 +13,33 @@ #include "hailo/stream.hpp" #include "hailo/buffer.hpp" -#include "os/hailort_driver.hpp" +#include "vdma/driver/hailort_driver.hpp" +#include "vdma/memory/mapped_buffer.hpp" namespace hailort { -class VdmaDevice; - // Contains buffer that can be transferred. The buffer can be circular - // It relies at [m_offset, m_base_buffer.size()) and [0, m_base_buffer.size() - m_size). class TransferBuffer final { public: TransferBuffer(); - TransferBuffer(BufferPtr base_buffer); - TransferBuffer(BufferPtr base_buffer, size_t size, size_t offset); - BufferPtr base_buffer() { return m_base_buffer; } + TransferBuffer(MemoryView base_buffer); + TransferBuffer(MemoryView base_buffer, size_t size, size_t offset); + + MemoryView base_buffer() { return m_base_buffer; } size_t offset() const { return m_offset; } size_t size() const { return m_size; } - Expected map_buffer(VdmaDevice &device, HailoRTDriver::DmaDirection direction); + Expected map_buffer(HailoRTDriver &driver, HailoRTDriver::DmaDirection direction); hailo_status copy_to(MemoryView buffer); hailo_status copy_from(const MemoryView buffer); - // Sync the buffer to the given direction, fails if the buffer is not mapped. - hailo_status synchronize(VdmaDevice &device, HailoRTDriver::DmaSyncDirection sync_direction); - private: - // Sync a signal continuous part - hailo_status synchronize_part(vdma::MappedBufferPtr &mapped_buffer, MemoryView continuous_part, - HailoRTDriver::DmaSyncDirection sync_direction); - bool is_wrap_around() const; // Returns the continuous parts of the buffer. @@ -57,9 +50,12 @@ class TransferBuffer final { // 2. If the buffer is not circular, the first part will contain the buffer, the second will point to nullptr. std::pair get_continuous_parts(); - BufferPtr m_base_buffer; + MemoryView m_base_buffer; size_t m_size; size_t m_offset; + + // Once map_buffer is called, a MappedBuffer object is stored here to make sure the buffer is mapped. + vdma::MappedBufferPtr m_mappings; }; // Internal function, wrapper to the user callbacks, accepts the callback status as an argument. diff --git a/hailort/libhailort/src/transform/transform.cpp b/hailort/libhailort/src/transform/transform.cpp index b977c466..8a82aaec 100644 --- a/hailort/libhailort/src/transform/transform.cpp +++ b/hailort/libhailort/src/transform/transform.cpp @@ -156,6 +156,16 @@ std::string TransformContextUtils::make_reorder_description(hailo_format_order_t return reorder_description.str(); } +std::string TransformContextUtils::make_pad_periph_description(hailo_3d_image_shape_t src_shape, hailo_3d_image_shape_t dst_shape) +{ + std::stringstream reorder_description; + reorder_description << "Padding Periph shape - src_shape: (" << src_shape.height << ", " << src_shape.width << ", " + << src_shape.features << "), dst_shape: (" << dst_shape.height << ", " << dst_shape.width << ", " + << dst_shape.features << ")"; + + return reorder_description.str(); +} + std::string TransformContextUtils::make_transpose_description(hailo_3d_image_shape_t src_shape, hailo_3d_image_shape_t transposed_shape) { std::stringstream transpose_description; @@ -1031,8 +1041,6 @@ hailo_status reorder_input_stream(const void *src_ptr, hailo_3d_image_shape_t sr if (((HAILO_FORMAT_ORDER_FCR == src_format.order) || (HAILO_FORMAT_ORDER_NHWC == src_format.order)) && (HAILO_FORMAT_ORDER_FCR == dst_format.order)) { - //Check that there is alignment for 8 bytes - assert(0 == ((HailoRTCommon::get_data_bytes(dst_format.type) * dst_image_shape.features) % HailoRTCommon::HW_DATA_ALIGNMENT)); switch (dst_format.type) { case HAILO_FORMAT_TYPE_UINT8: transform__h2d_FCR((uint8_t*)src_ptr, &src_image_shape, (uint8_t*)dst_ptr, &dst_image_shape); @@ -1536,7 +1544,7 @@ hailo_status transform_demux_raw_frame(const void *src, uint32_t offset, } hailo_status validate_input_transform_params(hailo_3d_image_shape_t src_image_shape, hailo_format_t src_format, - hailo_3d_image_shape_t dst_image_shape, hailo_format_t dst_format) + hailo_format_t dst_format) { /* Check device type */ if (!((HAILO_FORMAT_TYPE_UINT16 == dst_format.type) || (HAILO_FORMAT_TYPE_UINT8 == dst_format.type))) { @@ -1545,15 +1553,7 @@ hailo_status validate_input_transform_params(hailo_3d_image_shape_t src_image_sh } /* Check reorder flags - where no reorder is needed */ - if ((HAILO_FORMAT_ORDER_FCR == src_format.order) && - (HAILO_FORMAT_ORDER_FCR == dst_format.order)) { - //Check that there is alignment for 8 bytes - if (0 != ((HailoRTCommon::get_data_bytes(dst_format.type) * dst_image_shape.features) % HailoRTCommon::HW_DATA_ALIGNMENT)) { - LOGGER__ERROR("HW features must be aligned to {}. passed hw features - {}", - HailoRTCommon::HW_DATA_ALIGNMENT, dst_image_shape.features); - return HAILO_INVALID_ARGUMENT; - } - } else if ((HAILO_FORMAT_ORDER_BAYER_RGB == src_format.order) && + if ((HAILO_FORMAT_ORDER_BAYER_RGB == src_format.order) && (HAILO_FORMAT_ORDER_BAYER_RGB == dst_format.order)) { if (src_image_shape.features != 1) { LOGGER__ERROR("Invalid Bayer user features. Expected 1, received {}", src_image_shape.features); @@ -1565,11 +1565,6 @@ hailo_status validate_input_transform_params(hailo_3d_image_shape_t src_image_sh LOGGER__ERROR("Invalid Bayer user features. Expected 1, received {}", src_image_shape.features); return HAILO_INVALID_ARGUMENT; } - } else if ((HAILO_FORMAT_ORDER_YUY2 == src_format.order) && - (HAILO_FORMAT_ORDER_YUY2 == dst_format.order)) { - auto shape_size_in_bytes = HailoRTCommon::get_shape_size(src_image_shape) * HailoRTCommon::get_data_bytes(src_format.type); - CHECK(shape_size_in_bytes % HailoRTCommon::HW_DATA_ALIGNMENT == 0, HAILO_INVALID_ARGUMENT, - "YUY2_to_YUY2 Transform shape_size must be aligned to {}", HailoRTCommon::HW_DATA_ALIGNMENT); } return HAILO_SUCCESS; @@ -1654,6 +1649,15 @@ std::string InputTransformContext::description() const transform_description << TransformContextUtils::make_reorder_description(m_src_format.order, m_src_image_shape, m_dst_format.order, m_dst_image_shape); } + if (m_should_pad_periph) { + if (!first) { + transform_description << " | "; + } else { + first = false; + } + transform_description << TransformContextUtils::make_pad_periph_description(m_src_image_shape, m_dst_image_shape); + } + return transform_description.str(); } @@ -1661,7 +1665,7 @@ Expected> InputTransformContext::create(c const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector &dst_quant_infos) { - auto status = validate_input_transform_params(src_image_shape, src_format, dst_image_shape, dst_format); + auto status = validate_input_transform_params(src_image_shape, src_format, dst_format); CHECK_SUCCESS_AS_EXPECTED(status); const auto internal_src_format = HailoRTDefaults::expand_auto_format(src_format, dst_format); @@ -2077,6 +2081,15 @@ std::string FrameOutputTransformContext::description() const transform_description << TransformContextUtils::make_reorder_description(m_src_format.order, m_src_image_shape, m_dst_format.order, m_dst_image_shape); } + if (m_should_pad_periph) { + if (!first) { + transform_description << " | "; + } else { + first = false; + } + transform_description << TransformContextUtils::make_pad_periph_description(m_src_image_shape, m_dst_image_shape); + } + return transform_description.str(); } @@ -2192,8 +2205,9 @@ hailo_status fuse_buffers(const std::vector &buffers, HailoRTCommon::get_nms_hw_frame_size(info)); } - // We keep the size of the dst buffer 1 bbox_size too big to stay in the format of not defused nms frames. - total_size_of_buffers += frames[0].first->bbox_size; + // We keep the size of the dst buffer 1 burst_size too big to stay in the format of not defused nms frames. + const auto burst_size = (frames[0].first->bbox_size * frames[0].first->burst_size); + total_size_of_buffers += burst_size; CHECK(dst.size() == total_size_of_buffers, HAILO_INVALID_ARGUMENT, "Size of destination buffer is not same as the expected size of the fused frame! (size: {}, expected: {})", diff --git a/hailort/libhailort/src/transform/transform_internal.hpp b/hailort/libhailort/src/transform/transform_internal.hpp index 3f254a62..8cbc115f 100644 --- a/hailort/libhailort/src/transform/transform_internal.hpp +++ b/hailort/libhailort/src/transform/transform_internal.hpp @@ -46,6 +46,7 @@ class HAILORTAPI TransformContextUtils final static std::string make_reorder_description(hailo_format_order_t src_order, hailo_3d_image_shape_t src_shape, hailo_format_order_t dst_order, hailo_3d_image_shape_t dst_shape); static std::string make_transpose_description(hailo_3d_image_shape_t original_shape, hailo_3d_image_shape_t transposed_shape); + static std::string make_pad_periph_description(hailo_3d_image_shape_t src_shape, hailo_3d_image_shape_t dst_shape); template static hailo_status transform__d2h_NHCW_to_NCHW( diff --git a/hailort/libhailort/src/utils/CMakeLists.txt b/hailort/libhailort/src/utils/CMakeLists.txt index 066e16e5..57d45d50 100644 --- a/hailort/libhailort/src/utils/CMakeLists.txt +++ b/hailort/libhailort/src/utils/CMakeLists.txt @@ -8,6 +8,7 @@ set(SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sensor_config_utils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/soc_utils/partial_cluster_reader.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/measurement_utils.cpp ) if(HAILO_BUILD_PROFILER) diff --git a/hailort/libhailort/src/utils/buffer.cpp b/hailort/libhailort/src/utils/buffer.cpp index 6283fb55..054c24fd 100644 --- a/hailort/libhailort/src/utils/buffer.cpp +++ b/hailort/libhailort/src/utils/buffer.cpp @@ -10,6 +10,8 @@ **/ #include "hailo/buffer.hpp" +#include "utils/buffer_storage.hpp" +#include "utils/exported_resource_manager.hpp" #include "common/logger_macros.hpp" #include "common/utils.hpp" #include "common/string_utils.hpp" @@ -39,20 +41,37 @@ static void format_buffer(std::ostream& stream, const uint8_t *buffer, size_t si } } +class Buffer::StorageImpl final { +public: + StorageImpl(BufferStoragePtr storage, std::unique_ptr storage_resource) : + m_storage(std::move(storage)), + m_storage_resource(std::move(storage_resource)) + {} + + BufferStoragePtr m_storage; + + // Optionally we register the resource. By default we register the resource to the manager, but on some cases (for + // example - the unit tests, we want to skip the registration). + std::unique_ptr m_storage_resource; +}; + Buffer::Buffer() : - m_storage(), + m_storage_impl(), m_data(nullptr), m_size(0) {} -Buffer::Buffer(BufferStoragePtr storage) : - m_storage(storage), - m_data(static_cast(m_storage->user_address())), - m_size(m_storage->size()) +// Declare on c++ file since StorageImpl definition is needed. +Buffer::~Buffer() = default; + +Buffer::Buffer(std::unique_ptr storage) : + m_storage_impl(std::move(storage)), + m_data(static_cast(m_storage_impl->m_storage->user_address())), + m_size(m_storage_impl->m_storage->size()) {} Buffer::Buffer(Buffer&& other) : - m_storage(std::move(other.m_storage)), + m_storage_impl(std::move(other.m_storage_impl)), m_data(std::exchange(other.m_data, nullptr)), m_size(std::exchange(other.m_size, 0)) {} @@ -62,7 +81,7 @@ Expected Buffer::create(size_t size, const BufferStorageParams ¶ms) auto storage = BufferStorage::create(size, params); CHECK_EXPECTED(storage); - return Buffer(storage.release()); + return create(storage.release()); } Expected Buffer::create(size_t size, uint8_t default_value, const BufferStorageParams ¶ms) @@ -121,6 +140,24 @@ Expected Buffer::create(std::initializer_list init, const Buffe return buffer; } +Expected Buffer::create(BufferStoragePtr storage, bool register_storage /* = true */) +{ + // If needed, register the storage + std::unique_ptr optional_registered_resource; + if (register_storage) { + const auto storage_key = std::make_pair(storage->user_address(), storage->size()); + auto registered_resource = BufferStorageRegisteredResource::create(storage, storage_key); + CHECK_EXPECTED(registered_resource); + optional_registered_resource = make_unique_nothrow(registered_resource.release()); + CHECK_NOT_NULL(optional_registered_resource, HAILO_OUT_OF_HOST_MEMORY); + } + + auto storage_impl = make_unique_nothrow(std::move(storage), std::move(optional_registered_resource)); + CHECK_NOT_NULL(storage_impl, HAILO_OUT_OF_HOST_MEMORY); + + return Buffer(std::move(storage_impl)); +} + Expected Buffer::copy() const { return Buffer::create(m_data, m_size); @@ -128,7 +165,7 @@ Expected Buffer::copy() const Buffer& Buffer::operator=(Buffer&& other) { - m_storage = std::move(other.m_storage); + m_storage_impl = std::move(other.m_storage_impl); m_data = std::exchange(other.m_data, nullptr); m_size = std::exchange(other.m_size, 0); return *this; @@ -174,7 +211,8 @@ Buffer::iterator Buffer::end() BufferStorage &Buffer::storage() { - return *m_storage; + assert(m_storage_impl); + return *m_storage_impl->m_storage; } uint8_t* Buffer::data() noexcept @@ -241,6 +279,11 @@ uint64_t& Buffer::as_uint64() return as_type(); } +Expected Buffer::release() noexcept +{ + return m_storage_impl->m_storage->release(); +} + MemoryView::MemoryView() : m_data(nullptr), m_size(0) diff --git a/hailort/libhailort/src/utils/buffer_storage.cpp b/hailort/libhailort/src/utils/buffer_storage.cpp index fbdde3d3..877ab7a6 100644 --- a/hailort/libhailort/src/utils/buffer_storage.cpp +++ b/hailort/libhailort/src/utils/buffer_storage.cpp @@ -7,7 +7,7 @@ * @brief TODO: fill me (HRT-10026) **/ -#include "hailo/buffer_storage.hpp" +#include "buffer_storage.hpp" #include "hailo/hailort.h" #include "hailo/vdevice.hpp" #include "vdma/vdma_device.hpp" @@ -26,88 +26,16 @@ static_assert(HAILO_DMA_BUFFER_DIRECTION_D2H == (int)HailoRTDriver::DmaDirection static_assert(HAILO_DMA_BUFFER_DIRECTION_BOTH == (int)HailoRTDriver::DmaDirection::BOTH, "hailo_dma_buffer_direction_t must match HailoRTDriver::DmaDirection"); -BufferStorageParams::HeapParams::HeapParams() -{} - -Expected BufferStorageParams::DmaMappingParams::create( - const hailo_buffer_dma_mapping_params_t ¶ms) -{ - CHECK_AS_EXPECTED((params.device == nullptr) || (params.vdevice == nullptr), HAILO_INVALID_ARGUMENT, - "Can't set both device and vdevice fields"); - return DmaMappingParams(params); -} - -BufferStorageParams::DmaMappingParams::DmaMappingParams(const hailo_buffer_dma_mapping_params_t ¶ms) : - device(reinterpret_cast(params.device)), - vdevice(reinterpret_cast(params.vdevice)), - data_direction(params.direction) -{} - -BufferStorageParams::DmaMappingParams::DmaMappingParams(Device &device, hailo_dma_buffer_direction_t data_direction) : - device(&device), - vdevice(nullptr), - data_direction(data_direction) -{} - -BufferStorageParams::DmaMappingParams::DmaMappingParams(VDevice &vdevice, hailo_dma_buffer_direction_t data_direction) : - device(nullptr), - vdevice(&vdevice), - data_direction(data_direction) -{} - -BufferStorageParams::DmaMappingParams::DmaMappingParams() : - device(nullptr), - vdevice(nullptr), - data_direction(HAILO_DMA_BUFFER_DIRECTION_MAX_ENUM) -{} - -Expected BufferStorageParams::create(const hailo_buffer_parameters_t ¶ms) -{ - BufferStorageParams result{}; - result.flags = params.flags; - - if (params.flags == HAILO_BUFFER_FLAGS_NONE) { - result.heap_params = HeapParams(); - } else if ((params.flags & HAILO_BUFFER_FLAGS_DMA) != 0) { - auto dma_mapping_params = DmaMappingParams::create(params.dma_mapping_params); - CHECK_EXPECTED(dma_mapping_params); - result.dma_mapping_params = dma_mapping_params.release(); - } else { - // TODO: HRT-10903 - LOGGER__ERROR("Buffer storage flags not currently supported {}", params.flags); - return make_unexpected(HAILO_NOT_IMPLEMENTED); - } - - return result; -} BufferStorageParams BufferStorageParams::create_dma() { BufferStorageParams result{}; result.flags = HAILO_BUFFER_FLAGS_DMA; - result.dma_mapping_params = DmaMappingParams(); - return result; -} - -BufferStorageParams BufferStorageParams::create_dma(Device &device, hailo_dma_buffer_direction_t data_direction) -{ - BufferStorageParams result{}; - result.flags = HAILO_BUFFER_FLAGS_DMA; - result.dma_mapping_params = DmaMappingParams(device, data_direction); - return result; -} - -BufferStorageParams BufferStorageParams::create_dma(VDevice &vdevice, hailo_dma_buffer_direction_t data_direction) -{ - BufferStorageParams result{}; - result.flags = HAILO_BUFFER_FLAGS_DMA; - result.dma_mapping_params = DmaMappingParams(vdevice, data_direction); return result; } BufferStorageParams::BufferStorageParams() : - flags(HAILO_BUFFER_FLAGS_NONE), - heap_params() + flags(HAILO_BUFFER_FLAGS_NONE) {} Expected BufferStorage::create(size_t size, const BufferStorageParams ¶ms) @@ -117,29 +45,9 @@ Expected BufferStorage::create(size_t size, const BufferStorag CHECK_EXPECTED(result); return std::static_pointer_cast(result.release()); } else if (0 != (params.flags & HAILO_BUFFER_FLAGS_DMA)) { - // TODO: check other flags here (HRT-10903) - auto &dma_mapping_params = params.dma_mapping_params; - - DmaStoragePtr storage = nullptr; - if ((dma_mapping_params.device != nullptr) && (dma_mapping_params.vdevice != nullptr)) { - LOGGER__ERROR("Can't map a buffer to both vdevice and device"); - return make_unexpected(HAILO_INVALID_ARGUMENT); - } else if (dma_mapping_params.device != nullptr) { - auto result = DmaStorage::create(size, dma_mapping_params.data_direction, - *dma_mapping_params.device); - CHECK_EXPECTED(result); - storage = result.release(); - } else if (dma_mapping_params.vdevice != nullptr) { - auto result = DmaStorage::create(size, dma_mapping_params.data_direction, - *dma_mapping_params.vdevice); - CHECK_EXPECTED(result); - storage = result.release(); - } else { - auto result = DmaStorage::create(size); - CHECK_EXPECTED(result); - storage = result.release(); - } - return std::static_pointer_cast(storage); + auto result = DmaStorage::create(size); + CHECK_EXPECTED(result); + return std::static_pointer_cast(result.release()); } // TODO: HRT-10903 @@ -147,13 +55,9 @@ Expected BufferStorage::create(size_t size, const BufferStorag return make_unexpected(HAILO_NOT_IMPLEMENTED); } -BufferStorage::BufferStorage(Type type) : - m_type(type) -{} - -BufferStorage::Type BufferStorage::type() const +Expected BufferStorage::get_dma_able_buffer() { - return m_type; + return make_unexpected(HAILO_NOT_IMPLEMENTED); } Expected HeapStorage::create(size_t size) @@ -168,7 +72,6 @@ Expected HeapStorage::create(size_t size) } HeapStorage::HeapStorage(std::unique_ptr data, size_t size) : - BufferStorage(Type::HEAP), m_data(std::move(data)), m_size(size) {} @@ -195,127 +98,21 @@ Expected HeapStorage::release() noexcept return m_data.release(); } -Expected HeapStorage::dma_map(Device &, hailo_dma_buffer_direction_t) -{ - LOGGER__ERROR("Heap allocated buffers can't be mapped to DMA"); - return make_unexpected(HAILO_INVALID_OPERATION); -} - -Expected HeapStorage::dma_map(VdmaDevice &, hailo_dma_buffer_direction_t) -{ - LOGGER__ERROR("Heap allocated buffers can't be mapped to DMA"); - return make_unexpected(HAILO_INVALID_OPERATION); -} - -Expected HeapStorage::get_dma_mapped_buffer(const std::string &) -{ - LOGGER__ERROR("Mapped buffer is not supported for Heap allocated buffers"); - return make_unexpected(HAILO_INVALID_OPERATION); -} Expected DmaStorage::create(size_t size) { - static const auto ALLOCATE_BUFFER = nullptr; - return create(ALLOCATE_BUFFER, size); -} - -Expected DmaStorage::create(size_t size, - hailo_dma_buffer_direction_t data_direction, Device &device) -{ - static const auto ALLOCATE_BUFFER = nullptr; - return create(ALLOCATE_BUFFER, size, data_direction, - std::vector>{std::ref(device)}); -} - -Expected DmaStorage::create(size_t size, - hailo_dma_buffer_direction_t data_direction, VDevice &vdevice) -{ - static const auto ALLOCATE_BUFFER = nullptr; - auto physical_devices = vdevice.get_physical_devices(); - CHECK_EXPECTED(physical_devices); - return create(ALLOCATE_BUFFER, size, data_direction, physical_devices.release()); -} - -Expected DmaStorage::create_from_user_address(void *user_address, size_t size) -{ - return create(user_address, size); -} - -Expected DmaStorage::create_from_user_address(void *user_address, size_t size, - hailo_dma_buffer_direction_t data_direction, Device &device) -{ - CHECK_ARG_NOT_NULL_AS_EXPECTED(user_address); - return create(user_address, size, data_direction, - std::vector>{std::ref(device)}); -} - -Expected DmaStorage::create_from_user_address(void *user_address, size_t size, - hailo_dma_buffer_direction_t data_direction, VDevice &vdevice) -{ - CHECK_ARG_NOT_NULL_AS_EXPECTED(user_address); - auto physical_devices = vdevice.get_physical_devices(); - CHECK_EXPECTED(physical_devices); - return create(user_address, size, data_direction, physical_devices.release()); -} - -Expected> DmaStorage::create_dma_able_buffer_from_user_size(void *addr, size_t size) -{ - auto storage = create_from_user_address(addr, size); - CHECK_EXPECTED(storage); + // TODO: HRT-10283 support sharing low memory buffers for DART and similar systems. + TRY(auto dma_able_buffer, vdma::DmaAbleBuffer::create_by_allocation(size)); - auto buffer = make_shared_nothrow(storage.release()); - CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY); - - return buffer; -} - -Expected DmaStorage::create(void *user_address, size_t size, - hailo_dma_buffer_direction_t data_direction, - std::vector> &&physical_devices) -{ - vdma::DmaAbleBufferPtr dma_able_buffer_ptr = nullptr; - if (nullptr == user_address) { - // TODO: HRT-10283 support sharing low memory buffers for DART and similar systems. - auto dma_able_buffer = vdma::DmaAbleBuffer::create_by_allocation(size); - CHECK_EXPECTED(dma_able_buffer); - dma_able_buffer_ptr = dma_able_buffer.release(); - } else { - auto dma_able_buffer = vdma::DmaAbleBuffer::create_from_user_address(user_address, size); - CHECK_EXPECTED(dma_able_buffer); - dma_able_buffer_ptr = dma_able_buffer.release(); - } - - auto result = make_shared_nothrow(std::move(dma_able_buffer_ptr)); + auto result = make_shared_nothrow(std::move(dma_able_buffer)); CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); - - for (auto &device : physical_devices) { - auto is_new_mapping = result->dma_map(device, data_direction); - CHECK_EXPECTED(is_new_mapping); - CHECK_AS_EXPECTED(is_new_mapping.value(), HAILO_INTERNAL_FAILURE); - } - return result; } DmaStorage::DmaStorage(vdma::DmaAbleBufferPtr &&dma_able_buffer) : - BufferStorage(Type::DMA), - m_dma_able_buffer(std::move(dma_able_buffer)), - m_mappings() + m_dma_able_buffer(std::move(dma_able_buffer)) {} -DmaStorage::~DmaStorage() -{ - // TODO: deleter callback holds a reference to a device, which is bad since this BufferStorage could outlive - // the device. We need to doc that it isn't allowed. Later on, I think devices should use shared_ptrs - // and then the mapping will inc the reference count (HRT-12361) - for (const auto &device_mapping_pair : m_mappings) { - const auto &mapping = device_mapping_pair.second; - if (nullptr != mapping.second) { - mapping.second(); - } - } -} - size_t DmaStorage::size() const { return m_dma_able_buffer->size(); @@ -331,115 +128,9 @@ Expected DmaStorage::release() noexcept return make_unexpected(HAILO_NOT_IMPLEMENTED); } -Expected DmaStorage::dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) -{ - const auto device_type = device.get_type(); - CHECK_AS_EXPECTED(((Device::Type::INTEGRATED == device_type) || (Device::Type::PCIE == device_type)), - HAILO_INVALID_ARGUMENT, "Invalid device type (expected integrated/pcie, received {})", device_type); - return dma_map(*reinterpret_cast(&device), data_direction); -} - -// TODO: change data_direction to hailo_stream_direction_t (HRT-12391) -Expected DmaStorage::dma_map(VdmaDevice &device, hailo_dma_buffer_direction_t data_direction) -{ - CHECK_AS_EXPECTED(data_direction <= HAILO_DMA_BUFFER_DIRECTION_BOTH, HAILO_INVALID_ARGUMENT, - "Invalid data direction {}", data_direction); - - const auto device_id = device.get_dev_id(); - auto find_result = m_mappings.find(device_id); - if (find_result != m_mappings.end()) { - // The buffer has been mapped in this object => don't map it again - return Expected(false); // not a new mapping - } - - const auto direction = (data_direction == HAILO_DMA_BUFFER_DIRECTION_H2D) ? HAILO_H2D_STREAM : HAILO_D2H_STREAM; - - auto mapping_result = device.try_dma_map(m_dma_able_buffer, direction); - CHECK_EXPECTED(mapping_result); - - const auto is_new_mapping = mapping_result->second; - if (is_new_mapping) { - const auto deleter = [&device, address = m_dma_able_buffer->user_address(), direction]() { - // Best effort - auto status = device.dma_unmap(address, direction); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to un-map buffer {} from device {} in direction {}", - address, device.get_dev_id(), direction); - } - }; - m_mappings.emplace(device_id, std::make_pair(mapping_result->first, deleter)); - } else { - m_mappings.emplace(device_id, std::make_pair(mapping_result->first, nullptr)); - } - return Expected(is_new_mapping); -} - -Expected DmaStorage::get_dma_mapped_buffer(const std::string &device_id) -{ - auto mapped_buffer = m_mappings.find(device_id); - if (mapped_buffer == m_mappings.end()) { - // Don't print error message here - LOGGER__INFO("Mapped buffer for {} not found", device_id); - return make_unexpected(HAILO_NOT_FOUND); - } - - return Expected(mapped_buffer->second.first); -} - -Expected UserBufferStorage::create(void *user_address, const size_t size) -{ - auto result = make_shared_nothrow(user_address, size); - CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); - - return result; -} - -UserBufferStorage::UserBufferStorage(void * user_address, const size_t size) : - BufferStorage(Type::USER_BUFFER), - m_user_address(user_address), - m_size(size) -{} - -size_t UserBufferStorage::size() const -{ - return m_size; -} - -void *UserBufferStorage::user_address() -{ - return const_cast(m_user_address); -} - -Expected UserBufferStorage::release() noexcept -{ - return make_unexpected(HAILO_NOT_IMPLEMENTED); -} - -Expected UserBufferStorage::dma_map(Device &/* device */, hailo_dma_buffer_direction_t /* data_direction */) -{ - return make_unexpected(HAILO_NOT_IMPLEMENTED); -} - -// TODO: change data_direction to hailo_stream_direction_t (HRT-12391) -Expected UserBufferStorage::dma_map(VdmaDevice &/* device */, hailo_dma_buffer_direction_t /* data_direction */) -{ - return make_unexpected(HAILO_NOT_IMPLEMENTED); -} - -Expected UserBufferStorage::get_dma_mapped_buffer(const std::string &/* device_id */) -{ - return make_unexpected(HAILO_NOT_IMPLEMENTED); -} - -Expected> UserBufferStorage::create_storage_from_user_buffer(void *addr, size_t size) +Expected DmaStorage::get_dma_able_buffer() { - auto storage = UserBufferStorage::create(addr, size); - CHECK_EXPECTED(storage); - - auto buffer = make_shared_nothrow(storage.release()); - CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY); - - return buffer; + return vdma::DmaAbleBufferPtr{m_dma_able_buffer}; } } /* namespace hailort */ diff --git a/hailort/libhailort/src/utils/buffer_storage.hpp b/hailort/libhailort/src/utils/buffer_storage.hpp new file mode 100644 index 00000000..b277c983 --- /dev/null +++ b/hailort/libhailort/src/utils/buffer_storage.hpp @@ -0,0 +1,149 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file buffer_storage.hpp + * @brief Contains the internal storage object for the Buffer object. + **/ + +#ifndef _HAILO_BUFFER_STORAGE_HPP_ +#define _HAILO_BUFFER_STORAGE_HPP_ + +#include "hailo/hailort.h" +#include "hailo/expected.hpp" +#include "hailo/buffer.hpp" + +#include "utils/exported_resource_manager.hpp" + +#include +#include +#include +#include +#include +#include + + +/** hailort namespace */ +namespace hailort +{ + +// Forward declarations +class Device; +class VDevice; +class VdmaDevice; +class BufferStorage; +class HeapStorage; +class DmaStorage; +class HailoRTDriver; +class Buffer; + +namespace vdma { + class DmaAbleBuffer; + using DmaAbleBufferPtr = std::shared_ptr; + + class MappedBuffer; + using MappedBufferPtr = std::shared_ptr; +} + + +using BufferStoragePtr = std::shared_ptr; + +// Using void* and size as key. Since the key is std::pair (not hash-able), we use std::map as the underlying container. +using BufferStorageKey = std::pair; + +struct BufferStorageKeyHash { + size_t operator()(const BufferStorageKey &key) const noexcept + { + return std::hash()(key.first) ^ std::hash()(key.second); + } +}; + +using BufferStorageResourceManager = ExportedResourceManager; +using BufferStorageRegisteredResource = RegisteredResource; + +class BufferStorage +{ +public: + + static Expected create(size_t size, const BufferStorageParams ¶ms); + + BufferStorage(BufferStorage&& other) noexcept = default; + BufferStorage(const BufferStorage &) = delete; + BufferStorage &operator=(BufferStorage &&) = delete; + BufferStorage &operator=(const BufferStorage &) = delete; + virtual ~BufferStorage() = default; + + virtual size_t size() const = 0; + virtual void *user_address() = 0; + // Returns the pointer managed by this object and releases ownership + // TODO: Add a free function pointer? (HRT-10024) + // // Free the returned pointer with `delete` + // TODO: after release the containing buffer will hold pointers to values that were released. + // Document that this can happen? Disable this behavior somehow? (HRT-10024) + virtual Expected release() noexcept = 0; + + // Internal functions + virtual Expected get_dma_able_buffer(); + + BufferStorage() = default; +}; + +using HeapStoragePtr = std::shared_ptr; + +/** + * Most basic storage for buffer - regular heap allocation. + */ +class HeapStorage : public BufferStorage +{ +public: + static Expected create(size_t size); + HeapStorage(std::unique_ptr data, size_t size); + HeapStorage(HeapStorage&& other) noexcept; + HeapStorage(const HeapStorage &) = delete; + HeapStorage &operator=(HeapStorage &&) = delete; + HeapStorage &operator=(const HeapStorage &) = delete; + virtual ~HeapStorage() = default; + + virtual size_t size() const override; + virtual void *user_address() override; + virtual Expected release() noexcept override; + +private: + std::unique_ptr m_data; + size_t m_size; +}; + +using DmaStoragePtr = std::shared_ptr; + +/** + * Storage class for buffer that can be directly mapped to a device/vdevice for dma. + */ +class DmaStorage : public BufferStorage +{ +public: + // Creates a DmaStorage instance holding a dma-able buffer size bytes large. + static Expected create(size_t size); + + DmaStorage(const DmaStorage &other) = delete; + DmaStorage &operator=(const DmaStorage &other) = delete; + DmaStorage(DmaStorage &&other) noexcept = default; + DmaStorage &operator=(DmaStorage &&other) = delete; + virtual ~DmaStorage() = default; + + virtual size_t size() const override; + virtual void *user_address() override; + virtual Expected release() noexcept override; + + // Internal functions + DmaStorage(vdma::DmaAbleBufferPtr &&dma_able_buffer); + virtual Expected get_dma_able_buffer() override; + +private: + vdma::DmaAbleBufferPtr m_dma_able_buffer; +}; + + +} /* namespace hailort */ + +#endif /* _HAILO_BUFFER_STORAGE_HPP_ */ diff --git a/hailort/libhailort/src/utils/dma_buffer_utils.hpp b/hailort/libhailort/src/utils/dma_buffer_utils.hpp new file mode 100644 index 00000000..7ed80192 --- /dev/null +++ b/hailort/libhailort/src/utils/dma_buffer_utils.hpp @@ -0,0 +1,37 @@ +/** + * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file dma_buffer.hpp + * @brief A module for managing DMA buffers + **/ + +#ifndef _HAILO_DMA_BUFFER_UTILS_HPP_ +#define _HAILO_DMA_BUFFER_UTILS_HPP_ + +#include "hailo/hailort.h" +#include "hailo/expected.hpp" +#include "utils/buffer_storage.hpp" + +/** hailort namespace */ +namespace hailort +{ + +class HAILORTAPI DmaBufferUtils +{ +public: + + static Expected mmap_dma_buffer_write(hailo_dma_buffer_t dma_buffer); + + static hailo_status munmap_dma_buffer_write(hailo_dma_buffer_t dma_buffer, MemoryView dma_buffer_memview); + + static Expected mmap_dma_buffer_read(hailo_dma_buffer_t dma_buffer); + + static hailo_status munmap_dma_buffer_read(hailo_dma_buffer_t dma_buffer, MemoryView dma_buffer_memview); + +}; + +} /* namespace hailort */ + +#endif /* _HAILO_DMA_BUFFER_UTILS_HPP_ */ diff --git a/hailort/libhailort/src/utils/exported_resource_manager.hpp b/hailort/libhailort/src/utils/exported_resource_manager.hpp index a4d2d5df..5a59e241 100644 --- a/hailort/libhailort/src/utils/exported_resource_manager.hpp +++ b/hailort/libhailort/src/utils/exported_resource_manager.hpp @@ -11,8 +11,11 @@ #define _HAILO_EXPORTED_RESOURCE_MANAGER_HPP_ #include "hailo/hailort.h" +#include "common/logger_macros.hpp" +#include "common/utils.hpp" #include +#include namespace hailort { @@ -89,6 +92,62 @@ class ExportedResourceManager final std::unordered_map m_storage; }; +template> +class RegisteredResource final { +public: + using Manager = ExportedResourceManager; + + static Expected create(const Resource &resource, const Key &key) + { + hailo_status status = HAILO_UNINITIALIZED; + RegisteredResource registered_resource(resource, key, status); + if (HAILO_SUCCESS != status) { + LOGGER__TRACE("Resource registration failed with status {}", status); + return make_unexpected(status); + } + return registered_resource; + } + + RegisteredResource(const Resource &resource, const Key &key, hailo_status &status) : + m_key(key) + { + status = Manager::register_resource(resource, key); + if (HAILO_SUCCESS != status) { + return; + } + m_should_release = true; + status = HAILO_SUCCESS; + } + + ~RegisteredResource() + { + if (m_should_release) { + Manager::unregister_resource(m_key); + } + } + + RegisteredResource(const RegisteredResource &) = delete; + RegisteredResource& operator=(const RegisteredResource &) = delete; + + RegisteredResource(RegisteredResource &&other) : + m_key(other.m_key), + m_should_release(std::exchange(other.m_should_release, false)) + {} + + RegisteredResource& operator=(RegisteredResource &&other) + { + if (this != &other) { + m_key = other.m_key; + m_should_release = std::exchange(other.m_should_release, false); + } + return *this; + } + +private: + Key m_key; + bool m_should_release = false; +}; + } /* namespace hailort */ #endif /* _HAILO_EXPORTED_RESOURCE_MANAGER_HPP_ */ diff --git a/hailort/libhailort/src/utils/hailort_common.cpp b/hailort/libhailort/src/utils/hailort_common.cpp index d908e871..ddca83a3 100644 --- a/hailort/libhailort/src/utils/hailort_common.cpp +++ b/hailort/libhailort/src/utils/hailort_common.cpp @@ -15,7 +15,6 @@ namespace hailort // Needed for the linker const uint32_t HailoRTCommon::BBOX_PARAMS; -const uint32_t HailoRTCommon::MASK_PARAMS; const uint32_t HailoRTCommon::MAX_DEFUSED_LAYER_COUNT; const size_t HailoRTCommon::HW_DATA_ALIGNMENT; const uint32_t HailoRTCommon::MAX_NMS_BURST_SIZE; @@ -49,13 +48,13 @@ Expected> HailoRTCommon::to_device_ids_vector(con uint32_t HailoRTCommon::get_nms_host_frame_size(const hailo_nms_shape_t &nms_shape, const hailo_format_t &format) { - auto shape_size = 0; + double frame_size = 0; if (HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK == format.order) { - shape_size = get_nms_with_byte_mask_host_shape_size(nms_shape, format); + frame_size = get_nms_with_byte_mask_host_frame_size(nms_shape); } else { - shape_size = get_nms_host_shape_size(nms_shape); + auto shape_size = get_nms_host_shape_size(nms_shape); + frame_size = shape_size * get_format_data_bytes(format); } - double frame_size = shape_size * get_format_data_bytes(format); if (frame_size < UINT32_MAX) { return static_cast(frame_size); } else{ @@ -64,4 +63,49 @@ uint32_t HailoRTCommon::get_nms_host_frame_size(const hailo_nms_shape_t &nms_sha } } +Expected HailoRTCommon::as_hailo_pix_buffer(MemoryView &memory_view, hailo_format_order_t order) +{ + switch(order){ + case HAILO_FORMAT_ORDER_NV12: + case HAILO_FORMAT_ORDER_NV21: { + CHECK_AS_EXPECTED(0 == (memory_view.size() % 3), HAILO_INVALID_ARGUMENT, "buffer size must be divisible by 3"); + auto y_plane_size = memory_view.size() * 2 / 3; + auto uv_plane_size = memory_view.size() * 1 / 3; + + auto uv_data_ptr = reinterpret_cast(memory_view.data()) + y_plane_size; + + hailo_pix_buffer_plane_t y {uint32_t(y_plane_size), uint32_t(y_plane_size), {memory_view.data()}}; + hailo_pix_buffer_plane_t uv {uint32_t(uv_plane_size), uint32_t(uv_plane_size), {uv_data_ptr}}; + // Currently only support HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR + hailo_pix_buffer_t buffer{0, {y, uv}, NUMBER_OF_PLANES_NV12_NV21, HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR}; + + return buffer; + } + case HAILO_FORMAT_ORDER_I420: { + CHECK_AS_EXPECTED(0 == (memory_view.size() % 6), HAILO_INVALID_ARGUMENT, "buffer size must be divisible by 6"); + + auto y_plane_size = memory_view.size() * 2 / 3; + auto u_plane_size = memory_view.size() * 1 / 6; + auto v_plane_size = memory_view.size() * 1 / 6; + + auto u_data_ptr = (char*)memory_view.data() + y_plane_size; + auto v_data_ptr = u_data_ptr + u_plane_size; + + hailo_pix_buffer_plane_t y {uint32_t(y_plane_size), uint32_t(y_plane_size), {memory_view.data()}}; + hailo_pix_buffer_plane_t u {uint32_t(u_plane_size), uint32_t(u_plane_size), {u_data_ptr}}; + hailo_pix_buffer_plane_t v {uint32_t(v_plane_size), uint32_t(v_plane_size), {v_data_ptr}}; + // Currently only support HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR + hailo_pix_buffer_t buffer{0, {y, u, v}, NUMBER_OF_PLANES_I420, HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR}; + + return buffer; + } + default: { + hailo_pix_buffer_plane_t plane = {(uint32_t)memory_view.size(), (uint32_t)memory_view.size(), {memory_view.data()}}; + // Currently only support HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR + hailo_pix_buffer_t buffer{0, {plane}, 1, HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR}; + return buffer; + } + } +} + } /* namespace hailort */ diff --git a/hailort/libhailort/src/utils/measurement_utils.cpp b/hailort/libhailort/src/utils/measurement_utils.cpp new file mode 100644 index 00000000..b958eaaa --- /dev/null +++ b/hailort/libhailort/src/utils/measurement_utils.cpp @@ -0,0 +1,212 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file measurement_utils.cpp + * @brief Measurement utils module implementation + **/ + +#include "hailo/hailort.h" +#include "measurement_utils.hpp" + + +namespace hailort { +namespace utils { + +hailo_status MeasurementStorage::add_measurement(const std::string &accumulator_name, MeasurementType type, + double measurement) +{ + return get_instance().add_measurement_impl(accumulator_name, type, measurement); +} + +Expected MeasurementStorage::get_measurements(MeasurementType type, const std::string &accumulator_name) +{ + return get_instance().get_measurements_impl(type, accumulator_name); +} + +void MeasurementStorage::set_verbosity(bool verbosity) +{ + return get_instance().set_verbosity_impl(verbosity); +} + +void MeasurementStorage::set_precision(uint32_t precision) +{ + return get_instance().set_precision_impl(precision); +} + +void MeasurementStorage::clear() +{ + return get_instance().clear_impl(); +} + +void MeasurementStorage::show_output_on_destruction(bool show_output) +{ + return get_instance().show_output_on_destruction_impl(show_output); +} + +MeasurementStorage::~MeasurementStorage() +{ + if (!m_show_output_on_destruction) { + return; + } + + // Since MeasurementStorage has only one static instance, the following will be printed on program shutdown + std::cout << "**** MEASUREMENT UTIL RESULTS ****\n"; + format_measurements(std::cout, MeasurementType::TIME); + format_measurements(std::cout, MeasurementType::FPS); + format_measurements(std::cout, MeasurementType::VALUE); +} + +MeasurementStorage& MeasurementStorage::get_instance() +{ + static MeasurementStorage instance; + return instance; +} + +std::string MeasurementStorage::indent_string(const std::string &str, uint8_t indent_level) +{ + static const std::string INDENT = " "; + + std::stringstream stream; + for (auto i = 0; i < indent_level; i++) { + stream << INDENT; + } + + stream << str; + return stream.str(); +} + +MeasurementStorage::AccumulatorMap &MeasurementStorage::get_storage(MeasurementType type) +{ + switch (type) + { + case MeasurementType::TIME: + return m_time_acc_storage; + case MeasurementType::FPS: + return m_fps_acc_storage; + case MeasurementType::VALUE: + return m_value_acc_storage; + default: + // We should never get here, we'll return the time storage to avoid a crash + LOGGER__ERROR("Invalid measurement type"); + return m_time_acc_storage; + } +} + +std::vector> MeasurementStorage::get_sorted_elements(MeasurementType type) +{ + // Storage is unordered in order to be as fast as possible in add_measurement + // We now copy the elements to a vector and sort in order to get the most readable results + // Note that we return a snapshot of the storage elements, and after this function returns the storage may change + std::vector> sorted_accumulator_name_pairs; + { + auto &storage = get_storage(type); + std::lock_guard lock_guard(storage.mutex); + + sorted_accumulator_name_pairs.reserve(storage.map.size()); + sorted_accumulator_name_pairs.insert(sorted_accumulator_name_pairs.end(), storage.map.cbegin(), storage.map.cend()); + } + std::sort(sorted_accumulator_name_pairs.begin(), sorted_accumulator_name_pairs.end()); + + return sorted_accumulator_name_pairs; +} + +std::string MeasurementStorage::get_measurement_title(MeasurementType type) +{ + switch (type) + { + case MeasurementType::TIME: + return "Time measurements (ms)"; + case MeasurementType::FPS: + return "FPS measurements"; + case MeasurementType::VALUE: + return "Value measurements"; + default: + // We should never get here + LOGGER__ERROR("Invalid measurement type"); + return "Invalid measurement type"; + } +} + +void MeasurementStorage::format_measurements(std::ostream &output_stream, MeasurementType type) +{ + static const std::string LIST_MARKER = "- "; + + const auto sorted_elements = get_sorted_elements(type); + + output_stream << indent_string(LIST_MARKER, 1) + << get_measurement_title(type) << ": "; + if (sorted_elements.empty()) { + output_stream << "No measurements"; + } + output_stream << "\n"; + + for (const auto &accumulator_name_pair : sorted_elements) { + const auto &accumulator_name = accumulator_name_pair.first; + const auto &accumulator_results = accumulator_name_pair.second->get(); + output_stream << indent_string(LIST_MARKER, 2) << accumulator_name << ": " + << AccumulatorResultsHelper::format_results(accumulator_results, m_verbose, m_precision) << "\n"; + } +} + +hailo_status MeasurementStorage::add_measurement_impl(const std::string &accumulator_name, MeasurementType type, + double measurement) +{ + auto &storage = get_storage(type); + std::lock_guard lock_guard(storage.mutex); + + auto it = storage.map.find(accumulator_name); + if (it == storage.map.end()) { + AccumulatorPtr accumulator = nullptr; + if (MeasurementType::FPS == type) { + accumulator = make_shared_nothrow>(accumulator_name); + } else { + accumulator = make_shared_nothrow>(accumulator_name); + } + CHECK_NOT_NULL(accumulator, HAILO_OUT_OF_HOST_MEMORY); + storage.map[accumulator_name] = accumulator; + } + + storage.map[accumulator_name]->add_data_point(measurement); + return HAILO_SUCCESS; +} + +Expected MeasurementStorage::get_measurements_impl(MeasurementType type, const std::string &accumulator_name) +{ + auto &storage = get_storage(type); + std::lock_guard lock_guard(storage.mutex); + + auto it = storage.map.find(accumulator_name); + CHECK(it != storage.map.end(), HAILO_NOT_FOUND); + + return it->second->get(); +} + +void MeasurementStorage::set_verbosity_impl(bool verbosity) +{ + m_verbose = verbosity; +} + +void MeasurementStorage::set_precision_impl(uint32_t precision) +{ + m_precision = precision; +} + +void MeasurementStorage::clear_impl() +{ + // Note: After a certain storage is cleared, it could be filled again with new measurements + // We lock to avoid race conditions for a given map, not to make this function "atomic" + for (auto &storage : {&m_time_acc_storage, &m_fps_acc_storage, &m_value_acc_storage}) { + std::lock_guard lock_guard(storage->mutex); + storage->map.clear(); + } +} + +void MeasurementStorage::show_output_on_destruction_impl(bool show_output) +{ + m_show_output_on_destruction = show_output; +} + +} /* namespace utils */ +} /* namespace hailort */ diff --git a/hailort/libhailort/src/utils/measurement_utils.hpp b/hailort/libhailort/src/utils/measurement_utils.hpp new file mode 100644 index 00000000..ae9e3fbf --- /dev/null +++ b/hailort/libhailort/src/utils/measurement_utils.hpp @@ -0,0 +1,174 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file measurement_utils.hpp + * @brief This module provides utility classes for measuring and storing runtime statistics of designated code + * blocks/functions. + * Three classes are provided for measurements: + * 1) utils::MeasureTime - measures the execution time of the scope in which its declared + * 2) utils::MeasureFps - measures the fps of the scope in which its declared + * 3) utils::MeasureValue - measures a numeric value + * + * Usage: + * 1) To measure the running time of a certain function, declare an instance of utils::MeasureTime at the start + * of the function. E.g. + * 1 hailo_status BoundaryChannel::inc_num_available(uint16_t value) + * 2 { + * 3 utils::MeasureTime time("inc_num_available on channel_id={}", m_channel_id.channel_index); + * 4 // ... + * 5 return m_host_registers.set_num_available(static_cast(num_available)); + * 6 } + * The MEASURE_TIME macro can be used to simplify the declaration of MeasureTime instances. E.g. + * Replace line 3 in the above example with: + * MEASURE_TIME("inc_num_available on channel_id={}", m_channel_id.channel_index); + * 2) To measure the FPS of a certain function use utils::MeasureFps or the MEASURE_FPS macro. + * The usage is the same as utils::MeasureTime/MEASURE_TIME. + * 3) In some cases we'll want to only measure the performance-critical section of the function. In this case, + * open a new scope surrounding this section, and declare an instance of MeasureTime at the start of it. E.g. + * 1 hailo_status BoundaryChannel::prepare_descriptors(..., MappedBufferPtr mapped_buffer, ...) + * 2 { + * 3 if (mapped_buffer != nullptr) { + * 4 // Code that we don't want to measure... + * 5 if (!is_buffer_already_configured(mapped_buffer, buffer_offset_in_descs, starting_desc)) { + * 6 // More code that we don't want to measure... + * 7 { + * 8 // We wrapped configure_to_use_buffer with a new scope, because we only want to measure it + * 9 // (originally it wasn't in it's own scope) + * 10 utils::MeasureTime time("configure_to_use_buffer on channel_id={}", m_channel_id.channel_index); + * 11 auto status = m_desc_list->configure_to_use_buffer(*mapped_buffer, m_channel_id, configure_starting_desc); + * 12 CHECK_SUCCESS(status); + * 13 } + * 14 } + * 15 } + * 16 // More code... + * 17 return HAILO_SUCCESS; + * 18 } + * Again, the MEASURE_TIME macro can be used in place of the MeasureTime declaration. + * 4) To measure the FPS of a certain section use utils::MeasureFps or MEASURE_TIME. + * The usage is the same as utils::MeasureTime/MEASURE_TIME. + * 5) To measure a numeric value, use the MEASURE_VALUE macro. E.g. + * 1 hailo_status CoreOpsScheduler::switch_core_op(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id) + * 2 { + * 3 // ... + * 4 auto hw_batch_size = scheduled_core_op->use_dynamic_batch_flow() ? frames_count : SINGLE_CONTEXT_BATCH_SIZE; + * 5 MEASURE_VALUE(hw_batch_size, "core_op_handle={}", core_op_handle); + * 6 // ... + * 7 } + * The MEASURE_VALUE macro simplifies the declaration of MeasureValue instances (the class that implements + * the measurement logic), and it's usage is preferred. The macro will use the stringified variable name as + * the prefix for the accumulator name. + * E.g. for core_op_handle=0 the accumulator name will be "hw_batch_size (core_op_handle=0)". + * 6) Be sure to provide a descriptive name for each measurement. In the above examples, channel_id was used in + * order to differentiate between set_num_available/configure_to_use_buffer on different channels. + * 7) At the end of the program's execution, the measurements will be printed to stdout. For example, given the + * measurements registered in the examples provided for MeasureTime, the following will be printed upon + * hailortcli's completion: + * $ hailortcli run2 -m raw_async set-net shortcut_net_1080_1920_3.hef + * [===================>] 100% 00:00:00 + * shortcut_net: fps: 255.72 + * **** MEASUREMENT UTIL RESULTS **** + * - Time measurements (ms): + * - configure_to_use_buffer on channel_id=1: count=1285, mean=0.2604 + * - configure_to_use_buffer on channel_id=16: count=1285, mean=0.2583 + * - inc_num_available on channel_id=1: count=1285, mean=0.0030 + * - inc_num_available on channel_id=16: count=1285, mean=0.0017 + * - FPS measurements: No measurements + * - Value measurements: No measurements + * + * Important note! + * The module is intended for debugging of performance bottlenecks. For "release-grade" performance + * monitoring use other classes provided in the library. For example, see references to AccumulatorPtr + * in the core_op modules or DurationCollector in the pipeline modules. + **/ + +#ifndef _HAILO_MEASUREMENT_UTILS_HPP_ +#define _HAILO_MEASUREMENT_UTILS_HPP_ + +#include "measurement_utils_internal.hpp" +#include + +namespace hailort { +namespace utils { + +// Measures the execution time of a block/function in milli-seconds +class MeasureTime : public MeasureTimeBase +{ +public: + MeasureTime(const std::string &accumulator_name) : + MeasureTimeBase::MeasureTimeBase(MeasurementType::TIME, accumulator_name) + {} + + template + MeasureTime(const std::string &accumulator_name_format, Args&&... args) : + MeasureTime(fmt::format(accumulator_name_format, std::forward(args)...)) + {} +}; + +// Measures the fps of a block/function +// Using ratio<1,1> so that time measurements will be in seconds (needed for correct fps units) +class MeasureFps : public MeasureTimeBase> +{ +public: + MeasureFps(const std::string &accumulator_name) : + MeasureTimeBase::MeasureTimeBase(MeasurementType::FPS, accumulator_name) + {} + + template + MeasureFps(const std::string &accumulator_name_format, Args&&... args) : + MeasureFps(fmt::format(accumulator_name_format, std::forward(args)...)) + {} +}; + +// Measures a numeric value +template::value, int> = 0> +class MeasureValue : public Measure +{ +public: + MeasureValue(T value, const std::string &accumulator_name) : + Measure::Measure(MeasurementType::VALUE, accumulator_name) + { + m_measurement = static_cast(value); + } + + template + MeasureValue(T value, const std::string &accumulator_name_format, Args&&... args) : + MeasureValue(value, fmt::format(accumulator_name_format, std::forward(args)...)) + {} +}; + +// TODO: The helper macros are only available for GCC because of ##__VA_ARGS__ support (HRT-13031) +#ifdef __GNUC__ +#define _CONCAT_HELPER(x, y) x##y +#define _CONCAT(x, y) _CONCAT_HELPER(x, y) + +// Helper macro for measuring the execution time of a block/function +// Note: An instance with a unique name will be created (__time_), so that: +// a) the measurements will be completed at the end of the scope +// b) name shadowing will be avoided +#define MEASURE_TIME(accumulator_name_format, ...) \ + hailort::utils::MeasureTime _CONCAT(__time_, __LINE__)(accumulator_name_format, ##__VA_ARGS__) + +// Helper macro for measuring fps of a block/function +// Note: An instance with a unique name will be created (__time_), so that: +// a) the measurements will be completed at the end of the scope +// b) name shadowing will be avoided +#define MEASURE_FPS(accumulator_name_format, ...) \ + hailort::utils::MeasureFps _CONCAT(__time_, __LINE__)(accumulator_name_format, ##__VA_ARGS__) + +// Helper macro for measuring a numeric value +// Note: The accumulator's format is the stringified variable name together with accumulator_name_format. +// E.g. calling MEASURE_VALUE(hw_batch_size, "core_op_handle={}", core_op_handle) with core_op_handle=0 will +// yield the accumulator name "hw_batch_size (core_op_handle={})". +// Note: The MeasureValue instances created here are temporary. Unlike MeasureTime and MeasureFps, +// we measure the value right away and not at the end of a scope. +#define MEASURE_VALUE(value, accumulator_name_format, ...) \ + hailort::utils::MeasureValue((value), #value " (" accumulator_name_format ")", ##__VA_ARGS__) + +#endif /* __GNUC__ */ + +} /* namespace utils */ +} /* namespace hailort */ + +#endif /* _HAILO_MEASUREMENT_UTILS_HPP_ */ diff --git a/hailort/libhailort/src/utils/measurement_utils_internal.hpp b/hailort/libhailort/src/utils/measurement_utils_internal.hpp new file mode 100644 index 00000000..9ca28748 --- /dev/null +++ b/hailort/libhailort/src/utils/measurement_utils_internal.hpp @@ -0,0 +1,137 @@ +/** + * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file measurement_utils_internal.hpp + * @brief Internal class definitions for the measurement_utils module + **/ + +#ifndef _HAILO_MEASUREMENT_UTILS_INTERNAL_HPP_ +#define _HAILO_MEASUREMENT_UTILS_INTERNAL_HPP_ + +#include "hailo/hailort.h" +#include "utils/hailort_logger.hpp" +#include "common/runtime_statistics_internal.hpp" + +#include +#include +#include +#include + + +namespace hailort { +namespace utils { + +enum class MeasurementType +{ + TIME, + FPS, + VALUE +}; + +class MeasurementStorage final +{ +public: + // Adds a 'type' measurement to the 'accumulator_name' accumulator; thread-safe + static hailo_status add_measurement(const std::string &accumulator_name, MeasurementType type, double measurement); + static Expected get_measurements(MeasurementType type, const std::string &accumulator_name); + // Not thread-safe + static void set_verbosity(bool verbosity); + static void set_precision(uint32_t precision); + static void clear(); + static void show_output_on_destruction(bool show_output); + + ~MeasurementStorage(); + +private: + struct AccumulatorMap { + std::mutex mutex; + std::unordered_map map; + }; + + static MeasurementStorage& get_instance(); + static std::string indent_string(const std::string &str, uint8_t indent_level); + + AccumulatorMap &get_storage(MeasurementType type); + std::vector> get_sorted_elements(MeasurementType type); + std::string get_measurement_title(MeasurementType type); + void format_measurements(std::ostream &output_stream, MeasurementType type); + hailo_status add_measurement_impl(const std::string &accumulator_name, MeasurementType type, double measurement); + Expected get_measurements_impl(MeasurementType type, const std::string &accumulator_name); + void set_verbosity_impl(bool verbosity); + void set_precision_impl(uint32_t precision); + void clear_impl(); + void show_output_on_destruction_impl(bool show_output); + + bool m_verbose = false; + uint32_t m_precision = AccumulatorResultsHelper::DEFAULT_FLOATING_POINT_PRECISION; + bool m_show_output_on_destruction = true; + AccumulatorMap m_time_acc_storage; + AccumulatorMap m_fps_acc_storage; + AccumulatorMap m_value_acc_storage; +}; + +class Measure +{ +public: + virtual ~Measure() + { + const auto status = MeasurementStorage::add_measurement(m_accumulator_name, m_type, m_measurement); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed adding data point to {}", m_accumulator_name); + } + } + + Measure(Measure &&) = delete; + Measure(const Measure &) = delete; + Measure &operator=(Measure &&) = delete; + Measure &operator=(const Measure &) = delete; + +protected: + // The measurement will be added to the accumulator named m_accumulator_name in the dtor + double m_measurement; + + Measure(MeasurementType type, const std::string &accumulator_name) : + m_measurement(), + m_type(type), + m_accumulator_name(accumulator_name) + {} + +private: + const MeasurementType m_type; + const std::string m_accumulator_name; +}; + +template +class MeasureTimeBase : public Measure +{ +public: + virtual ~MeasureTimeBase() + { + // Set the measurement to the time delta + m_measurement = convert_to_double(std::chrono::steady_clock::now() - m_start_time); + } + +protected: + MeasureTimeBase(MeasurementType type, const std::string &accumulator_name) : + Measure::Measure(type, accumulator_name), + m_start_time(std::chrono::steady_clock::now()) + {} + +private: + using time_point = decltype(std::chrono::steady_clock::now()); + + static double convert_to_double(std::chrono::nanoseconds time_in_ns) + { + return std::chrono::duration(time_in_ns).count(); + } + + // Must be the last member declared, so that the time will be measured correctly + const time_point m_start_time; +}; + +} /* namespace utils */ +} /* namespace hailort */ + +#endif /* _HAILO_MEASUREMENT_UTILS_INTERNAL_HPP_ */ diff --git a/hailort/libhailort/src/utils/profiler/handler.hpp b/hailort/libhailort/src/utils/profiler/handler.hpp index 406a8114..7b82ef6f 100644 --- a/hailort/libhailort/src/utils/profiler/handler.hpp +++ b/hailort/libhailort/src/utils/profiler/handler.hpp @@ -52,10 +52,20 @@ struct AddDeviceTrace : Trace struct MonitorStartTrace : Trace { - MonitorStartTrace() - : Trace("scheduler_start") + MonitorStartTrace(const std::string &unique_vdevice_hash) + : Trace("scheduler_start"), unique_vdevice_hash(unique_vdevice_hash) {} + std::string unique_vdevice_hash; +}; + +struct MonitorEndTrace : Trace +{ + MonitorEndTrace(const std::string &unique_vdevice_hash) + : Trace("scheduler_end"), unique_vdevice_hash(unique_vdevice_hash) + {} + + std::string unique_vdevice_hash; }; struct AddCoreOpTrace : Trace @@ -145,14 +155,30 @@ struct FrameEnqueueD2HTrace : Trace std::string queue_name; }; -struct SwitchCoreOpTrace : Trace +struct ActivateCoreOpTrace : Trace { - SwitchCoreOpTrace(const device_id_t &device_id, scheduler_core_op_handle_t handle) - : Trace("switch_core_op"), device_id(device_id), core_op_handle(handle) + ActivateCoreOpTrace(const device_id_t &device_id, vdevice_core_op_handle_t handle, double duration) + : Trace("activate_core_op"), device_id(device_id), core_op_handle(handle), duration(duration) {} device_id_t device_id; - scheduler_core_op_handle_t core_op_handle; + vdevice_core_op_handle_t core_op_handle; + double duration; +}; + +// Currently, activate and switch are the same trace to make scheduler and fast-switch flow similar (although in the +// scheduler we have no deactivate). +using SwitchCoreOpTrace = ActivateCoreOpTrace; + +struct DeactivateCoreOpTrace : Trace +{ + DeactivateCoreOpTrace(const device_id_t &device_id, vdevice_core_op_handle_t handle, double duration) + : Trace("deactivate_core_op"), device_id(device_id), core_op_handle(handle), duration(duration) + {} + + device_id_t device_id; + vdevice_core_op_handle_t core_op_handle; + double duration; }; struct SetCoreOpTimeoutTrace : Trace @@ -200,6 +226,19 @@ struct OracleDecisionTrace : Trace bool over_timeout; }; +struct HefLoadedTrace : Trace +{ + HefLoadedTrace(const std::string &hef_name, const std::string &dfc_version, const unsigned char *md5_hash) + : Trace("hef_loaded"), hef_name(hef_name), dfc_version(dfc_version) + { + std::memcpy(this->md5_hash, md5_hash, MD5_DIGEST_LENGTH); + } + + std::string hef_name; + std::string dfc_version; + MD5_SUM_t md5_hash; +}; + struct DumpProfilerStateTrace : Trace { DumpProfilerStateTrace() : Trace("dump_profiler_state") {} @@ -218,8 +257,10 @@ class Handler virtual void handle_trace(const FrameDequeueH2DTrace&) {}; virtual void handle_trace(const FrameDequeueD2HTrace&) {}; virtual void handle_trace(const FrameEnqueueD2HTrace&) {}; - virtual void handle_trace(const SwitchCoreOpTrace&) {}; + virtual void handle_trace(const ActivateCoreOpTrace&) {}; + virtual void handle_trace(const DeactivateCoreOpTrace&) {}; virtual void handle_trace(const MonitorStartTrace&) {}; + virtual void handle_trace(const MonitorEndTrace&) {}; virtual void handle_trace(const AddDeviceTrace&) {}; virtual void handle_trace(const SetCoreOpTimeoutTrace&) {}; virtual void handle_trace(const SetCoreOpThresholdTrace&) {}; @@ -227,6 +268,7 @@ class Handler virtual void handle_trace(const OracleDecisionTrace&) {}; virtual void handle_trace(const DumpProfilerStateTrace&) {}; virtual void handle_trace(const InitProfilerProtoTrace&) {}; + virtual void handle_trace(const HefLoadedTrace&) {}; }; diff --git a/hailort/libhailort/src/utils/profiler/monitor_handler.cpp b/hailort/libhailort/src/utils/profiler/monitor_handler.cpp index 25d6c72b..598bb905 100644 --- a/hailort/libhailort/src/utils/profiler/monitor_handler.cpp +++ b/hailort/libhailort/src/utils/profiler/monitor_handler.cpp @@ -37,8 +37,14 @@ void MonitorHandler::clear_monitor() { void MonitorHandler::handle_trace(const MonitorStartTrace &trace) { - (void)trace; - start_mon(); + start_mon(trace.unique_vdevice_hash); +} + +void MonitorHandler::handle_trace(const MonitorEndTrace &trace) +{ + if (m_unique_vdevice_hash == trace.unique_vdevice_hash) { + m_unique_vdevice_hash = {}; + } } void MonitorHandler::handle_trace(const AddCoreOpTrace &trace) @@ -53,19 +59,19 @@ void MonitorHandler::handle_trace(const AddDeviceTrace &trace) m_devices_info.emplace(trace.device_id, device_info); } -void MonitorHandler::handle_trace(const SwitchCoreOpTrace &trace) +void MonitorHandler::handle_trace(const ActivateCoreOpTrace &trace) { // TODO: 'if' should be removed, this is temporary solution since this trace is called out of the scheduler or vdevice. if (!m_is_monitor_currently_working) { return; } - assert(contains(m_devices_info, trace.device_id)); + if (!contains(m_devices_info, trace.device_id)) { return; } // TODO (HRT-8835): Support multiple vdevices m_devices_info.at(trace.device_id).current_core_op_handle = trace.core_op_handle; } void MonitorHandler::handle_trace(const AddStreamH2DTrace &trace) { auto core_op_handle = get_core_op_handle_by_name(trace.core_op_name); - assert(contains(m_core_ops_info, core_op_handle)); - assert(contains(m_devices_info, trace.device_id)); + if (!contains(m_core_ops_info, core_op_handle)) { return; } // TODO (HRT-8835): Support multiple vdevices + if (!contains(m_devices_info, trace.device_id)) { return; } // TODO (HRT-8835): Support multiple vdevices m_core_ops_info[core_op_handle].input_streams_info[trace.stream_name] = StreamsInfo{trace.queue_size}; if (!contains(m_devices_info.at(trace.device_id).requested_transferred_frames_h2d, core_op_handle)) { m_devices_info.at(trace.device_id).requested_transferred_frames_h2d.emplace(core_op_handle, make_shared_nothrow()); @@ -76,8 +82,8 @@ void MonitorHandler::handle_trace(const AddStreamH2DTrace &trace) void MonitorHandler::handle_trace(const AddStreamD2HTrace &trace) { auto core_op_handle = get_core_op_handle_by_name(trace.core_op_name); - assert(contains(m_core_ops_info, core_op_handle)); - assert(contains(m_devices_info, trace.device_id)); + if (!contains(m_core_ops_info, core_op_handle)) { return ;} // TODO (HRT-8835): Support multiple vdevices + if (!contains(m_devices_info, trace.device_id)) { return ;} // TODO (HRT-8835): Support multiple vdevices m_core_ops_info[core_op_handle].output_streams_info[trace.stream_name] = StreamsInfo{trace.queue_size}; if (!contains(m_devices_info.at(trace.device_id).finished_transferred_frames_d2h, core_op_handle)) { m_devices_info.at(trace.device_id).finished_transferred_frames_d2h.emplace(core_op_handle, make_shared_nothrow()); @@ -87,8 +93,8 @@ void MonitorHandler::handle_trace(const AddStreamD2HTrace &trace) void MonitorHandler::handle_trace(const FrameEnqueueH2DTrace &trace) { - assert(contains(m_core_ops_info, trace.core_op_handle)); - assert(contains(m_core_ops_info[trace.core_op_handle].input_streams_info, trace.queue_name)); + if (!contains(m_core_ops_info, trace.core_op_handle)) { return ;} // TODO (HRT-8835): Support multiple vdevices + if (!contains(m_core_ops_info[trace.core_op_handle].input_streams_info, trace.queue_name)) { return ;} // TODO (HRT-8835): Support multiple vdevices auto &queue = m_core_ops_info[trace.core_op_handle].input_streams_info[trace.queue_name]; queue.pending_frames_count->fetch_add(1); queue.pending_frames_count_acc->add_data_point(queue.pending_frames_count->load()); @@ -96,8 +102,8 @@ void MonitorHandler::handle_trace(const FrameEnqueueH2DTrace &trace) void MonitorHandler::handle_trace(const FrameDequeueD2HTrace &trace) { - assert(contains(m_core_ops_info, trace.core_op_handle)); - assert(contains(m_core_ops_info[trace.core_op_handle].output_streams_info, trace.queue_name)); + if (!contains(m_core_ops_info, trace.core_op_handle)) { return ;} // TODO (HRT-8835): Support multiple vdevices + if (!contains(m_core_ops_info[trace.core_op_handle].output_streams_info, trace.queue_name)) { return ;} // TODO (HRT-8835): Support multiple vdevices auto &queue = m_core_ops_info[trace.core_op_handle].output_streams_info[trace.queue_name]; queue.pending_frames_count->fetch_sub(1); queue.pending_frames_count_acc->add_data_point(queue.pending_frames_count->load()); @@ -108,11 +114,11 @@ void MonitorHandler::handle_trace(const FrameEnqueueD2HTrace &trace) { // TODO: 'if' should be removed, this is temporary solution since this trace is called out of the scheduler or vdevice. if (!m_is_monitor_currently_working) { return; } - assert(contains(m_core_ops_info, trace.core_op_handle)); - assert(contains(m_core_ops_info[trace.core_op_handle].output_streams_info, trace.queue_name)); + if (!contains(m_core_ops_info, trace.core_op_handle)) { return ;} // TODO (HRT-8835): Support multiple vdevices + if (!contains(m_core_ops_info[trace.core_op_handle].output_streams_info, trace.queue_name)) { return ;} // TODO (HRT-8835): Support multiple vdevices - assert(contains(m_devices_info, trace.device_id)); - assert(contains(m_devices_info.at(trace.device_id).requested_transferred_frames_h2d, trace.core_op_handle)); + if (!contains(m_devices_info, trace.device_id)) { return ;} // TODO (HRT-8835): Support multiple vdevices + if (!contains(m_devices_info.at(trace.device_id).requested_transferred_frames_h2d, trace.core_op_handle)) { return ;} // TODO (HRT-8835): Support multiple vdevices auto &queue = m_core_ops_info[trace.core_op_handle].output_streams_info[trace.queue_name]; queue.pending_frames_count->fetch_add(1); @@ -131,10 +137,10 @@ void MonitorHandler::handle_trace(const FrameDequeueH2DTrace &trace) { // TODO: 'if' should be removed, this is temporary solution since this trace is called out of the scheduler or vdevice. if (!m_is_monitor_currently_working) { return; } - assert(contains(m_core_ops_info, trace.core_op_handle)); - assert(contains(m_core_ops_info[trace.core_op_handle].input_streams_info, trace.queue_name)); - assert(contains(m_devices_info, trace.device_id)); - assert(contains(m_devices_info.at(trace.device_id).requested_transferred_frames_h2d, trace.core_op_handle)); + if (!contains(m_core_ops_info, trace.core_op_handle)) { return ;} // TODO (HRT-8835): Support multiple vdevices + if (!contains(m_core_ops_info[trace.core_op_handle].input_streams_info, trace.queue_name)) { return ;} // TODO (HRT-8835): Support multiple vdevices + if (!contains(m_devices_info, trace.device_id)) { return ;} // TODO (HRT-8835): Support multiple vdevices + if (!contains(m_devices_info.at(trace.device_id).requested_transferred_frames_h2d, trace.core_op_handle)) { return ;} // TODO (HRT-8835): Support multiple vdevices auto &queue = m_core_ops_info[trace.core_op_handle].input_streams_info[trace.queue_name]; queue.pending_frames_count->fetch_sub(1); @@ -155,15 +161,21 @@ scheduler_core_op_handle_t MonitorHandler::get_core_op_handle_by_name(const std: return INVALID_CORE_OP_HANDLE; } -hailo_status MonitorHandler::start_mon() +hailo_status MonitorHandler::start_mon(const std::string &unique_vdevice_hash) { #if defined(__GNUC__) /* Clearing monitor members. Since the owner of monitor_handler is tracer, which is static, the monitor may get rerun without destructor being called. */ if (m_is_monitor_currently_working) { + if (!m_unique_vdevice_hash.empty() && (unique_vdevice_hash != m_unique_vdevice_hash)) { + LOGGER__WARNING("Trying to register a vdevice to hailo-monitor, "\ + "while other vdevice is registered. Monitor currently supports single vdevice, which will result in non-consistent tracing."); + return HAILO_INVALID_OPERATION; + } clear_monitor(); } + m_unique_vdevice_hash = unique_vdevice_hash; m_is_monitor_currently_working = true; auto event_exp = Event::create_shared(Event::State::not_signalled); @@ -193,6 +205,7 @@ hailo_status MonitorHandler::start_mon() return HAILO_SUCCESS; #else + (void)unique_vdevice_hash; return HAILO_NOT_IMPLEMENTED; #endif } diff --git a/hailort/libhailort/src/utils/profiler/monitor_handler.hpp b/hailort/libhailort/src/utils/profiler/monitor_handler.hpp index a62498db..5ae124de 100644 --- a/hailort/libhailort/src/utils/profiler/monitor_handler.hpp +++ b/hailort/libhailort/src/utils/profiler/monitor_handler.hpp @@ -31,6 +31,7 @@ #pragma warning(disable: 4244 4267 4127) #else #pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" #pragma GCC diagnostic ignored "-Wconversion" #endif #include "scheduler_mon.pb.h" @@ -59,25 +60,25 @@ class SchedulerCounter void insert(const stream_name_t &name) { - assert(!contains(m_map, name)); + if(contains(m_map, name)) { return; } // TODO (HRT-8835): Support multiple vdevices m_map[name] = 0; } uint32_t operator[](const stream_name_t &name) const { - assert(contains(m_map, name)); + if (!contains(m_map, name)) { return 0; } // TODO (HRT-8835): Support multiple vdevices return m_map.at(name); } void increase(const stream_name_t &name) { - assert(contains(m_map, name)); + if (!contains(m_map, name)) {return; } // TODO (HRT-8835): Support multiple vdevices m_map[name]++; } void decrease(const stream_name_t &name) { - assert(contains(m_map, name)); + if (!contains(m_map, name)) { return; } // TODO (HRT-8835): Support multiple vdevices assert(m_map[name] > 0); m_map[name]--; } @@ -171,12 +172,13 @@ class MonitorHandler : public Handler virtual void handle_trace(const FrameDequeueD2HTrace&) override; virtual void handle_trace(const FrameDequeueH2DTrace&) override; virtual void handle_trace(const FrameEnqueueD2HTrace&) override; - virtual void handle_trace(const SwitchCoreOpTrace&) override; + virtual void handle_trace(const ActivateCoreOpTrace&) override; virtual void handle_trace(const MonitorStartTrace&) override; + virtual void handle_trace(const MonitorEndTrace&) override; virtual void handle_trace(const AddDeviceTrace&) override; private: - hailo_status start_mon(); + hailo_status start_mon(const std::string &unique_vdevice_hash); #if defined(__GNUC__) Expected> open_temp_mon_file(); void dump_state(); @@ -204,6 +206,7 @@ class MonitorHandler : public Handler // TODO: Consider adding Accumulator classes for more info (min, max, mean, etc..) std::unordered_map m_core_ops_info; std::unordered_map m_devices_info; + std::string m_unique_vdevice_hash; // only one vdevice is allowed at a time. vdevice will be unregistered in its destruction. }; } diff --git a/hailort/libhailort/src/utils/profiler/profiler_utils.hpp b/hailort/libhailort/src/utils/profiler/profiler_utils.hpp index de28bd71..daf284ed 100644 --- a/hailort/libhailort/src/utils/profiler/profiler_utils.hpp +++ b/hailort/libhailort/src/utils/profiler/profiler_utils.hpp @@ -19,6 +19,12 @@ namespace hailort { +#define PCIE_GEN1_SPEED "2.5GT/s" +#define PCIE_GEN2_SPEED "5GT/s" +#define PCIE_GEN3_SPEED "8GT/s" +#define PCIE_GEN4_SPEED "16GT/s" +#define PCIE_GEN5_SPEED "32GT/s" +#define PCIE_GEN6_SPEED "64GT/s" struct ProfilerTime { uint32_t year; @@ -29,6 +35,13 @@ struct ProfilerTime { int64_t time_since_epoch; }; +struct pci_info { + std::string gen; + std::string lanes; + + pci_info() : gen("N/A"), lanes("N/A") {} +}; + #if defined(__linux__) std::string os_name() { @@ -71,6 +84,79 @@ std::uint64_t system_ram_size() return sys_info.totalram; } + +std::string exec(const char *cmd) { + const int buffer_size = 128; + std::array buffer; + std::string result; + std::shared_ptr pipe(popen(cmd, "r"), pclose); + + if (!pipe) { + LOGGER__WARNING("Couldn't execute {}, popen() failed!", cmd); + return ""; + } + + while (!feof(pipe.get())) { + if (fgets(buffer.data(), buffer_size, pipe.get()) != nullptr) { + result += buffer.data(); + } + } + + return result; +} + +pci_info parse_lspci_output(const std::string &output) { + std::istringstream lspci_stream(output); + pci_info pcie_info = {}; + std::string line; + bool in_hailo_section = false; + int hailo_device_count = 0; + + while (std::getline(lspci_stream, line)) { + // Sample output line: "LnkCap: Port #0, Speed 8GT/s, Width x8, ASPM L0s L1, Exit Latency L0s <256ns, L1 <4us" + if (line.find("Co-processor: Hailo") != std::string::npos) { + in_hailo_section = true; + hailo_device_count++; + // TODO: HRT-8834/8835 Support multiple Hailo devices connected to the same host + if (1 < hailo_device_count) { + pcie_info.gen = "N/A"; + pcie_info.lanes = "N/A"; + return pcie_info; + } + } + if (!in_hailo_section) { + continue; + } + if (line.find("LnkCap") != std::string::npos) { + std::istringstream line_stream(line); + std::string token; + while (line_stream >> token) { + if ("Speed" == token) { + line_stream >> token; + if (!token.empty() && token.back() == ',') { + token.pop_back(); + } + if (PCIE_GEN1_SPEED == token) { pcie_info.gen = "1"; } + else if (PCIE_GEN2_SPEED == token) { pcie_info.gen = "2"; } + else if (PCIE_GEN3_SPEED == token) { pcie_info.gen = "3"; } + else if (PCIE_GEN4_SPEED == token) { pcie_info.gen = "4"; } + else if (PCIE_GEN5_SPEED == token) { pcie_info.gen = "5"; } + else if (PCIE_GEN6_SPEED == token) { pcie_info.gen = "6"; } + } + if ("Width" == token) { + line_stream >> token; + pcie_info.lanes = token.substr(1); + } + } + } + } + return pcie_info; +} + +pci_info get_pcie_info() { + std::string lspci_output = exec("lspci -vvv"); + return parse_lspci_output(lspci_output); +} #endif ProfilerTime get_curr_time() diff --git a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp index bc4f1710..c3c56abc 100644 --- a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp +++ b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp @@ -143,6 +143,14 @@ void SchedulerProfilerHandler::handle_trace(const InitProfilerProtoTrace &trace) init->set_os_ver(os_ver()); init->set_cpu_arch(cpu_arch()); init->set_sys_ram_size(system_ram_size()); + if (0 == geteuid()) { + auto pcie_info = get_pcie_info(); + init->mutable_pcie_info()->set_gen(pcie_info.gen); + init->mutable_pcie_info()->set_lanes(pcie_info.lanes); + } else { + init->mutable_pcie_info()->set_gen("Failed fetching info, root privilege is required"); + init->mutable_pcie_info()->set_lanes("Failed fetching info, root privilege is required"); + } #endif init->set_hailort_ver(get_libhailort_version_representation()); init->mutable_time()->set_day(curr_time.day); @@ -154,6 +162,17 @@ void SchedulerProfilerHandler::handle_trace(const InitProfilerProtoTrace &trace) init->set_time_stamp_since_epoch(curr_time.time_since_epoch); } +void SchedulerProfilerHandler::handle_trace(const HefLoadedTrace &trace) +{ + std::lock_guard lock(m_proto_lock); + + auto added_trace = m_profiler_trace_proto.add_added_trace(); + added_trace->mutable_loaded_hef()->set_hef_md5(reinterpret_cast(trace.md5_hash)); + added_trace->mutable_loaded_hef()->set_hef_name(trace.hef_name); + added_trace->mutable_loaded_hef()->set_dfc_version(trace.dfc_version); + added_trace->mutable_loaded_hef()->set_time_stamp(trace.timestamp); +} + void SchedulerProfilerHandler::handle_trace(const AddCoreOpTrace &trace) { log(JSON({ @@ -297,7 +316,7 @@ void SchedulerProfilerHandler::handle_trace(const FrameEnqueueD2HTrace &trace) added_trace->mutable_frame_enqueue()->set_time_stamp(trace.timestamp); } -void SchedulerProfilerHandler::handle_trace(const SwitchCoreOpTrace &trace) +void SchedulerProfilerHandler::handle_trace(const ActivateCoreOpTrace &trace) { log(JSON({ {"action", json_to_string(trace.name)}, @@ -308,9 +327,20 @@ void SchedulerProfilerHandler::handle_trace(const SwitchCoreOpTrace &trace) std::lock_guard lock(m_proto_lock); auto added_trace = m_profiler_trace_proto.add_added_trace(); - added_trace->mutable_switched_core_op()->set_device_id(trace.device_id); - added_trace->mutable_switched_core_op()->set_new_core_op_handle(trace.core_op_handle); - added_trace->mutable_switched_core_op()->set_time_stamp(trace.timestamp); + added_trace->mutable_activate_core_op()->set_device_id(trace.device_id); + added_trace->mutable_activate_core_op()->set_new_core_op_handle(trace.core_op_handle); + added_trace->mutable_activate_core_op()->set_time_stamp(trace.timestamp); + added_trace->mutable_activate_core_op()->set_duration(trace.duration); +} + +void SchedulerProfilerHandler::handle_trace(const DeactivateCoreOpTrace &trace) +{ + std::lock_guard lock(m_proto_lock); + auto added_trace = m_profiler_trace_proto.add_added_trace(); + added_trace->mutable_deactivate_core_op()->set_device_id(trace.device_id); + added_trace->mutable_deactivate_core_op()->set_core_op_handle(trace.core_op_handle); + added_trace->mutable_deactivate_core_op()->set_time_stamp(trace.timestamp); + added_trace->mutable_deactivate_core_op()->set_duration(trace.duration); } void SchedulerProfilerHandler::handle_trace(const SetCoreOpTimeoutTrace &trace) diff --git a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp index 358d06f0..81924df9 100644 --- a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp +++ b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp @@ -46,7 +46,8 @@ class SchedulerProfilerHandler : public Handler virtual void handle_trace(const FrameDequeueH2DTrace&) override; virtual void handle_trace(const FrameDequeueD2HTrace&) override; virtual void handle_trace(const FrameEnqueueD2HTrace&) override; - virtual void handle_trace(const SwitchCoreOpTrace&) override; + virtual void handle_trace(const ActivateCoreOpTrace&) override; + virtual void handle_trace(const DeactivateCoreOpTrace&) override; virtual void handle_trace(const AddDeviceTrace&) override; virtual void handle_trace(const SetCoreOpTimeoutTrace&) override; virtual void handle_trace(const SetCoreOpThresholdTrace&) override; @@ -54,6 +55,7 @@ class SchedulerProfilerHandler : public Handler virtual void handle_trace(const OracleDecisionTrace&) override; virtual void handle_trace(const DumpProfilerStateTrace&) override; virtual void handle_trace(const InitProfilerProtoTrace&) override; + virtual void handle_trace(const HefLoadedTrace&) override; private: void log(JSON json); diff --git a/hailort/libhailort/src/utils/shared_resource_manager.hpp b/hailort/libhailort/src/utils/shared_resource_manager.hpp index afcad3a2..a40c8cb5 100644 --- a/hailort/libhailort/src/utils/shared_resource_manager.hpp +++ b/hailort/libhailort/src/utils/shared_resource_manager.hpp @@ -101,11 +101,15 @@ class SharedResourceManager : m_resources(max_resources()) {} -#ifdef _WIN32 - // On windows, when the process terminates, all threads are and only then the static variable are destroyed. - // If the user hasn't called release_resource, we will leak its objects (since otherwise the object destructor may - // wait on some terminated threads and hang). - // Notice that on graceful cleanup m_resources should be empty. + // On graceful process clean, the destructor of this class will be called, and m_resources should be an empty + // list (since all resources we released). If it is not the case (for example, the user called ExitProcess), we + // don't want to release the objects - just leak them. It is OK to leak the objects since the user didn't call + // release_resource (what they expect us to do?). + // It is important to leak the memory since we may not be able to free the objects when the process is being + // destructed: + // 1. On windows for example, the static variables are destroyed *after* the threads stops. + // Some shared resources waits for their threads to do something, and they can stack for ever. + // 2. The object destruction may relay on other singleton object destruction. ~SharedResourceManager() { for (auto &resource : m_resources) { @@ -113,7 +117,6 @@ class SharedResourceManager resource.release(); } } -#endif /* _WIN32 */ static uint32_t max_resources() { diff --git a/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp b/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp index e5953f34..fff74272 100644 --- a/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp +++ b/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp @@ -116,18 +116,11 @@ Expected PartialClusterReader::get_partial_clusters_layout_bitmap(hail // If file does not exist - get default values for dev_arch if (!Filesystem::does_file_exists(std::string(PARTIAL_CLUSTER_READER_CLUSTER_LAYOUT_FILE_PATH))) { LOGGER__INFO("partial cluster layout bitmap file not found, Enabling all clusters by default"); - auto default_bitmap_exp = get_arch_default_bitmap(dev_arch); - CHECK_EXPECTED(default_bitmap_exp); - fuse_file_data.first = default_bitmap_exp.release(); - - auto sku_value_exp = get_sku_value_from_arch(dev_arch); - CHECK_EXPECTED(sku_value_exp); - fuse_file_data.second = sku_value_exp.release(); + TRY(fuse_file_data.first, get_arch_default_bitmap(dev_arch)); + TRY(fuse_file_data.second, get_sku_value_from_arch(dev_arch)); } else { // This will read bitmap and verify with SKU value - auto fuse_file_exp = read_fuse_file(); - CHECK_EXPECTED(fuse_file_exp); - fuse_file_data = fuse_file_exp.release(); + TRY(fuse_file_data, read_fuse_file()); } const auto sku_value = fuse_file_data.second; @@ -155,10 +148,7 @@ Expected PartialClusterReader::get_actual_dev_arch_ && (HAILO_ARCH_HAILO15H == fw_dev_arch)) { return HAILO_ARCH_HAILO15H; } else { - auto fuse_file_exp = read_fuse_file(); - CHECK_EXPECTED(fuse_file_exp); - const auto fuse_file_data = fuse_file_exp.release(); - + TRY(const auto fuse_file_data, read_fuse_file()); const auto sku_value = fuse_file_data.second; if (HAILO15M_SKU_VALUE == sku_value) { return HAILO_ARCH_HAILO15M; diff --git a/hailort/libhailort/src/utils/thread_safe_queue.hpp b/hailort/libhailort/src/utils/thread_safe_queue.hpp index f7dfe6f0..cd244485 100644 --- a/hailort/libhailort/src/utils/thread_safe_queue.hpp +++ b/hailort/libhailort/src/utils/thread_safe_queue.hpp @@ -137,13 +137,13 @@ class SpscQueue // +1 for each dequeued item // -1 for each enqueued item // Blocks when the queue is full (which happens when it's value reaches zero, hence it starts at queue size) - const auto items_enqueued_sema = Semaphore::create_shared(0); - CHECK_AS_EXPECTED(nullptr != items_enqueued_sema, HAILO_OUT_OF_HOST_MEMORY, "Failed creating items_enqueued_sema semaphore"); + auto items_enqueued_sema = Semaphore::create_shared(0); + CHECK_EXPECTED(items_enqueued_sema, "Failed creating items_enqueued_sema semaphore"); - const auto items_dequeued_sema = Semaphore::create_shared(static_cast(max_size)); - CHECK_AS_EXPECTED(nullptr != items_dequeued_sema, HAILO_OUT_OF_HOST_MEMORY, "Failed creating items_dequeued_sema semaphore"); + auto items_dequeued_sema = Semaphore::create_shared(static_cast(max_size)); + CHECK_EXPECTED(items_dequeued_sema, "Failed creating items_dequeued_sema semaphore"); - return SpscQueue(max_size, items_enqueued_sema, items_dequeued_sema, shutdown_event, default_timeout); + return SpscQueue(max_size, items_enqueued_sema.release(), items_dequeued_sema.release(), shutdown_event, default_timeout); } static std::shared_ptr create_shared(size_t max_size, const EventPtr& shutdown_event, @@ -210,9 +210,15 @@ class SpscQueue return dequeue(m_default_timeout); } - hailo_status enqueue(const T& result, std::chrono::milliseconds timeout) AE_NO_TSAN + hailo_status enqueue(const T& result, std::chrono::milliseconds timeout, bool ignore_shutdown_event = false) AE_NO_TSAN { - const auto wait_result = m_items_dequeued_sema_or_shutdown.wait(timeout); + hailo_status wait_result = HAILO_UNINITIALIZED; + if (ignore_shutdown_event) { + wait_result = m_items_dequeued_sema->wait(timeout); + } else { + wait_result = m_items_dequeued_sema_or_shutdown.wait(timeout); + } + if (HAILO_SHUTDOWN_EVENT_SIGNALED == wait_result) { LOGGER__TRACE("Shutdown event has been signaled"); return wait_result; @@ -234,9 +240,9 @@ class SpscQueue return m_items_enqueued_sema_or_shutdown.signal(); } - inline hailo_status enqueue(const T& result) AE_NO_TSAN + inline hailo_status enqueue(const T& result, bool ignore_shutdown_event = false) AE_NO_TSAN { - return enqueue(result, m_default_timeout); + return enqueue(result, m_default_timeout, ignore_shutdown_event); } // TODO: Do away with two copies of this function? (SDK-16481) diff --git a/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.cpp b/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.cpp index a2d0eaa1..1f2a7b0e 100644 --- a/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.cpp +++ b/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.cpp @@ -69,7 +69,7 @@ hailo_status InferRequestAccumulator::shutdown(std::chrono::milliseconds timeout // Now cancel all partial request for (auto &partial_request : m_partial_infer_requests) { for (auto &stream_transfer_request : partial_request) { - stream_transfer_request.second.callback(HAILO_STREAM_ABORTED_BY_USER); + stream_transfer_request.second.callback(HAILO_STREAM_ABORT); } } m_partial_infer_requests.clear(); diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp index e11272f0..08b99913 100644 --- a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp +++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp @@ -32,7 +32,7 @@ using core_op_priority_t = uint8_t; constexpr const uint16_t SINGLE_CONTEXT_BATCH_SIZE = 1; class VDeviceCoreOp; - +class VdmaConfigCoreOp; class ScheduledCoreOp { diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp index a745f9d4..1c71752b 100644 --- a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp +++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp @@ -21,6 +21,7 @@ namespace hailort /** Input stream **/ Expected> ScheduledInputStream::create( + VDevice &vdevice, std::map> &&streams, const LayerInfo &layer_info, const scheduler_core_op_handle_t &core_op_handle, @@ -35,7 +36,7 @@ Expected> ScheduledInputStream::create( } auto status = HAILO_UNINITIALIZED; - auto local_vdevice_stream = make_unique_nothrow(std::move(streams), core_op_handle, + auto local_vdevice_stream = make_unique_nothrow(vdevice, std::move(streams), core_op_handle, std::move(core_op_activated_event), layer_info, std::move(infer_requests_accumulator), status); CHECK_NOT_NULL_AS_EXPECTED(local_vdevice_stream, HAILO_OUT_OF_HOST_MEMORY); CHECK_SUCCESS_AS_EXPECTED(status); @@ -51,10 +52,12 @@ hailo_stream_interface_t ScheduledInputStream::get_interface() const Expected> ScheduledInputStream::allocate_buffer_pool() { - auto queued_pool = QueuedStreamBufferPool::create(m_infer_requests_accumulator->queue_size(), get_frame_size(), - BufferStorageParams::create_dma()); - CHECK_EXPECTED(queued_pool); - return std::unique_ptr(queued_pool.release()); + TRY(auto queued_pool, QueuedStreamBufferPool::create(m_infer_requests_accumulator->queue_size(), get_frame_size(), + BufferStorageParams::create_dma())); + + CHECK_SUCCESS(queued_pool->dma_map(m_vdevice, HAILO_DMA_BUFFER_DIRECTION_H2D)); + + return std::unique_ptr(std::move(queued_pool)); } size_t ScheduledInputStream::get_max_ongoing_transfers() const @@ -81,6 +84,7 @@ hailo_status ScheduledInputStream::write_async_impl(TransferRequest &&transfer_r /** Output stream **/ Expected> ScheduledOutputStream::create( + VDevice &vdevice, std::map> &&streams, const scheduler_core_op_handle_t &core_op_handle, const LayerInfo &layer_info, @@ -96,7 +100,7 @@ Expected> ScheduledOutputStream::create( auto status = HAILO_UNINITIALIZED; - auto stream = make_unique_nothrow(std::move(streams), core_op_handle, + auto stream = make_unique_nothrow(vdevice, std::move(streams), core_op_handle, layer_info, std::move(core_op_activated_event), std::move(infer_requests_accumulator), status); CHECK_NOT_NULL_AS_EXPECTED(stream, HAILO_OUT_OF_HOST_MEMORY); CHECK_SUCCESS_AS_EXPECTED(status); @@ -112,10 +116,12 @@ hailo_stream_interface_t ScheduledOutputStream::get_interface() const Expected> ScheduledOutputStream::allocate_buffer_pool() { - auto queued_pool = QueuedStreamBufferPool::create(m_infer_requests_accumulator->queue_size(), get_frame_size(), - BufferStorageParams::create_dma()); - CHECK_EXPECTED(queued_pool); - return std::unique_ptr(queued_pool.release()); + TRY(auto queued_pool, QueuedStreamBufferPool::create(m_infer_requests_accumulator->queue_size(), get_frame_size(), + BufferStorageParams::create_dma())); + + CHECK_SUCCESS(queued_pool->dma_map(m_vdevice, HAILO_DMA_BUFFER_DIRECTION_D2H)); + + return std::unique_ptr(std::move(queued_pool)); } size_t ScheduledOutputStream::get_max_ongoing_transfers() const diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp index e96ddf0e..5666481b 100644 --- a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp +++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp @@ -31,6 +31,7 @@ class ScheduledInputStream : public AsyncInputStreamBase { public: static Expected> create( + VDevice &vdevice, std::map> &&streams, const LayerInfo &layer_info, const scheduler_core_op_handle_t &core_op_handle, @@ -38,6 +39,7 @@ class ScheduledInputStream : public AsyncInputStreamBase { std::shared_ptr infer_requests_accumulator); ScheduledInputStream( + VDevice &vdevice, std::map> &&streams, const scheduler_core_op_handle_t &core_op_handle, EventPtr &&core_op_activated_event, @@ -45,6 +47,7 @@ class ScheduledInputStream : public AsyncInputStreamBase { std::shared_ptr &&infer_requests_accumulator, hailo_status &status) : AsyncInputStreamBase(layer_info, std::move(core_op_activated_event), status), + m_vdevice(vdevice), m_streams(std::move(streams)), m_core_op_handle(core_op_handle), m_infer_requests_accumulator(infer_requests_accumulator), @@ -61,6 +64,7 @@ class ScheduledInputStream : public AsyncInputStreamBase { virtual bool is_scheduled() override final { return true; }; private: + VDevice &m_vdevice; std::map> m_streams; scheduler_core_op_handle_t m_core_op_handle; std::shared_ptr m_infer_requests_accumulator; @@ -71,6 +75,7 @@ class ScheduledInputStream : public AsyncInputStreamBase { class ScheduledOutputStream : public AsyncOutputStreamBase { public: static Expected> create( + VDevice &vdevice, std::map> &&streams, const scheduler_core_op_handle_t &core_op_handle, const LayerInfo &layer_info, @@ -78,6 +83,7 @@ class ScheduledOutputStream : public AsyncOutputStreamBase { std::shared_ptr infer_requests_accumulator); ScheduledOutputStream( + VDevice &vdevice, std::map> &&streams, const scheduler_core_op_handle_t &core_op_handle, const LayerInfo &layer_info, @@ -85,6 +91,7 @@ class ScheduledOutputStream : public AsyncOutputStreamBase { std::shared_ptr &&infer_requests_accumulator, hailo_status &status) : AsyncOutputStreamBase(layer_info, std::move(core_op_activated_event), status), + m_vdevice(vdevice), m_streams(std::move(streams)), m_core_op_handle(core_op_handle), m_infer_requests_accumulator(infer_requests_accumulator), @@ -121,6 +128,7 @@ class ScheduledOutputStream : public AsyncOutputStreamBase { private: + VDevice &m_vdevice; std::map> m_streams; scheduler_core_op_handle_t m_core_op_handle; std::shared_ptr m_infer_requests_accumulator; diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp index a8163bd6..060d3b23 100644 --- a/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp +++ b/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp @@ -14,7 +14,6 @@ #include "vdevice/vdevice_core_op.hpp" #include "vdevice/scheduler/scheduler_oracle.hpp" #include "vdma/vdma_config_manager.hpp" -#include "hef/hef_internal.hpp" #include @@ -76,6 +75,7 @@ void CoreOpsScheduler::remove_core_op(scheduler_core_op_handle_t core_op_handle) { std::unique_lock lock(m_scheduler_mutex); m_scheduled_core_ops.at(core_op_handle)->remove_instance(); + m_scheduler_thread.signal(); } void CoreOpsScheduler::shutdown() @@ -124,8 +124,7 @@ hailo_status CoreOpsScheduler::switch_core_op(const scheduler_core_op_handle_t & current_core_op = get_vdma_core_op(curr_device_info->current_core_op_handle, device_id); } - const bool is_batch_switch = (core_op_handle == curr_device_info->current_core_op_handle); - auto status = VdmaConfigManager::switch_core_op(current_core_op, next_core_op, hw_batch_size, is_batch_switch); + auto status = VdmaConfigManager::set_core_op(device_id, current_core_op, next_core_op, hw_batch_size); CHECK_SUCCESS(status, "Failed switching core-op"); } @@ -362,9 +361,13 @@ void CoreOpsScheduler::shutdown_core_op(scheduler_core_op_handle_t core_op_handl auto request = dequeue_infer_request(core_op_handle); assert(request); for (auto &transfer : request->transfers) { - transfer.second.callback(HAILO_STREAM_ABORTED_BY_USER); + transfer.second.callback(HAILO_STREAM_ABORT); } - request->callback(HAILO_STREAM_ABORTED_BY_USER); + + // Before calling infer_callback, we must ensure all stream callbacks were called and released (since the + // user may capture some variables in the callbacks). + request->transfers.clear(); + request->callback(HAILO_STREAM_ABORT); } } @@ -375,7 +378,7 @@ void CoreOpsScheduler::schedule() for (auto &core_op_pair : m_scheduled_core_ops) { auto status = optimize_streaming_if_enabled(core_op_pair.first); if ((HAILO_SUCCESS != status) && - (HAILO_STREAM_ABORTED_BY_USER != status)) { + (HAILO_STREAM_ABORT != status)) { LOGGER__ERROR("optimize_streaming_if_enabled thread failed with status={}", status); } }; diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp index ebcdf092..3d252205 100644 --- a/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp +++ b/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp @@ -17,6 +17,7 @@ #include "common/filesystem.hpp" #include "utils/thread_safe_map.hpp" +#include "utils/thread_safe_queue.hpp" #include "vdevice/scheduler/scheduled_core_op_state.hpp" #include "vdevice/scheduler/scheduler_base.hpp" diff --git a/hailort/libhailort/src/vdevice/vdevice.cpp b/hailort/libhailort/src/vdevice/vdevice.cpp index f6bc795e..c51db77e 100644 --- a/hailort/libhailort/src/vdevice/vdevice.cpp +++ b/hailort/libhailort/src/vdevice/vdevice.cpp @@ -24,6 +24,7 @@ #include "network_group/network_group_internal.hpp" #include "net_flow/pipeline/infer_model_internal.hpp" #include "core_op/core_op.hpp" +#include "hef/hef_internal.hpp" #ifdef HAILO_SUPPORT_MULTI_PROCESS #include "service/rpc_client_utils.hpp" @@ -101,21 +102,6 @@ Expected VDevice::create_configure_params(Hef &hef, cons return hef.create_configure_params(stream_interface.release(), network_group_name); } -hailo_status VDevice::dma_map(void *address, size_t size, hailo_stream_direction_t direction) -{ - (void) address; - (void) size; - (void) direction; - return HAILO_NOT_IMPLEMENTED; -} - -hailo_status VDevice::dma_unmap(void *address, hailo_stream_direction_t direction) -{ - (void) address; - (void) direction; - return HAILO_NOT_IMPLEMENTED; -} - hailo_status VDevice::before_fork() { return HAILO_SUCCESS; @@ -198,13 +184,32 @@ Expected VDeviceHandle::get_default_streams_interface( return vdevice.value()->get_default_streams_interface(); } -Expected> VDeviceHandle::create_infer_model(const std::string &hef_path) +Expected> VDeviceHandle::create_infer_model(const std::string &hef_path, + const std::string &network_name) { auto &manager = SharedResourceManager::get_instance(); auto vdevice = manager.resource_lookup(m_handle); CHECK_EXPECTED(vdevice); - return vdevice.value()->create_infer_model(hef_path); + return vdevice.value()->create_infer_model(hef_path, network_name); +} + +hailo_status VDeviceHandle::dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction) +{ + auto &manager = SharedResourceManager::get_instance(); + auto vdevice = manager.resource_lookup(m_handle); + CHECK_EXPECTED_AS_STATUS(vdevice); + + return vdevice.value()->dma_map(address, size, direction); +} + +hailo_status VDeviceHandle::dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction) +{ + auto &manager = SharedResourceManager::get_instance(); + auto vdevice = manager.resource_lookup(m_handle); + CHECK_EXPECTED_AS_STATUS(vdevice); + + return vdevice.value()->dma_unmap(address, size, direction); } bool VDevice::service_over_ip_mode() @@ -382,8 +387,18 @@ hailo_status VDeviceClient::listener_run_in_thread(VDeviceIdentifier identifier) while (m_is_listener_thread_running) { auto callback_id = client->VDevice_get_callback_id(identifier); - if (callback_id.status() == HAILO_SHUTDOWN_EVENT_SIGNALED) { - LOGGER__INFO("Shutdown event was signaled in listener_run_in_thread"); + if (HAILO_SUCCESS != callback_id.status()) { + std::unique_lock lock(m_mutex); + for (auto &ng_ptr_pair : m_network_groups) { + ng_ptr_pair.second->execute_callbacks_on_error(callback_id.status()); + } + if (callback_id.status() == HAILO_SHUTDOWN_EVENT_SIGNALED) { + LOGGER__INFO("Shutdown event was signaled in listener_run_in_thread"); + } else if (callback_id.status() == HAILO_RPC_FAILED) { + LOGGER__ERROR("Lost communication with the service.."); + } else { + LOGGER__ERROR("Failed to get callback_id from listener thread with {}", callback_id.status()); + } break; } CHECK_EXPECTED_AS_STATUS(callback_id); @@ -413,9 +428,8 @@ hailo_status VDeviceClient::finish_listener_thread() Expected>> VDeviceClient::get_physical_devices() const { + // In case of service-over-ip, the returned list will be empty std::vector> devices_refs; - CHECK_AS_EXPECTED(0 < m_devices.size(), HAILO_INVALID_OPERATION, "get_physical_devices() usage is invalid when working with service over IP. In order to use a local service, unset env var {}", HAILORT_SERVICE_ADDRESS_ENV_VAR); - for (auto &device : m_devices) { devices_refs.push_back(*device); } @@ -433,6 +447,26 @@ Expected VDeviceClient::get_default_streams_interface( return m_client->VDevice_get_default_streams_interface(m_identifier); } +hailo_status VDeviceClient::dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction) +{ + (void) address; + (void) size; + (void) direction; + // It is ok to do nothing on service, because the buffer is copied anyway to the service. + LOGGER__TRACE("VDevice `dma_map()` is doing nothing on service"); + return HAILO_SUCCESS; +} + +hailo_status VDeviceClient::dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction) +{ + (void) address; + (void) size; + (void) direction; + // It is ok to do nothing on service, because the buffer is copied anyway to the service. + LOGGER__TRACE("VDevice `dma_map()` is doing nothing on service"); + return HAILO_SUCCESS; +} + #endif // HAILO_SUPPORT_MULTI_PROCESS @@ -504,7 +538,9 @@ hailo_status VDeviceBase::validate_params(const hailo_vdevice_params_t ¶ms) Expected> VDeviceBase::create(const hailo_vdevice_params_t ¶ms) { TRACE(InitProfilerProtoTrace); - TRACE(MonitorStartTrace); + auto unique_vdevice_hash = std::to_string(std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()).count()); + TRACE(MonitorStartTrace, unique_vdevice_hash); auto devices_expected = create_devices(params); CHECK_EXPECTED(devices_expected); @@ -541,7 +577,7 @@ Expected> VDeviceBase::create(const hailo_vdevice_p } } - auto vdevice = std::unique_ptr(new (std::nothrow) VDeviceBase(std::move(devices), scheduler_ptr)); + auto vdevice = std::unique_ptr(new (std::nothrow) VDeviceBase(std::move(devices), scheduler_ptr, unique_vdevice_hash)); CHECK_AS_EXPECTED(nullptr != vdevice, HAILO_OUT_OF_HOST_MEMORY); return vdevice; @@ -559,6 +595,7 @@ VDeviceBase::~VDeviceBase() m_core_ops_scheduler->shutdown(); } TRACE(DumpProfilerStateTrace); + TRACE(MonitorEndTrace, m_unique_vdevice_hash); } Expected VDeviceBase::configure(Hef &hef, @@ -630,8 +667,10 @@ Expected VDeviceBase::configure(Hef &hef, return added_network_groups; } -Expected> VDevice::create_infer_model(const std::string &hef_path) +Expected> VDevice::create_infer_model(const std::string &hef_path, const std::string &network_name) { + CHECK_AS_EXPECTED(network_name.empty(), HAILO_NOT_IMPLEMENTED, "Passing network name is not supported yet!"); + auto hef_expected = Hef::create(hef_path); CHECK_EXPECTED(hef_expected); auto hef = hef_expected.release(); @@ -830,7 +869,7 @@ Expected> VDeviceBase::create_vdevice_core_op(Hef auto core_op_handle = allocate_core_op_handle(); - return VDeviceCoreOp::create(m_active_core_op_holder, params.second, physical_core_ops, + return VDeviceCoreOp::create(*this, m_active_core_op_holder, params.second, physical_core_ops, m_core_ops_scheduler, core_op_handle, hef.hash()); } diff --git a/hailort/libhailort/src/vdevice/vdevice_core_op.cpp b/hailort/libhailort/src/vdevice/vdevice_core_op.cpp index 153a8850..d0ca5831 100644 --- a/hailort/libhailort/src/vdevice/vdevice_core_op.cpp +++ b/hailort/libhailort/src/vdevice/vdevice_core_op.cpp @@ -18,7 +18,8 @@ namespace hailort { -Expected> VDeviceCoreOp::create(ActiveCoreOpHolder &active_core_op_holder, +Expected> VDeviceCoreOp::create(VDevice &vdevice, + ActiveCoreOpHolder &active_core_op_holder, const ConfigureNetworkParams &configure_params, const std::map> &core_ops, CoreOpsSchedulerWeakPtr core_ops_scheduler, vdevice_core_op_handle_t core_op_handle, @@ -38,11 +39,17 @@ Expected> VDeviceCoreOp::create(ActiveCoreOpHolde } // On HcpConfigCoreOp, we don't support get_async_max_queue_size (and the core op doesn't use the queue). - auto per_device_queue_size = core_ops.begin()->second->get_async_max_queue_size(); - const auto queue_size = per_device_queue_size ? (*per_device_queue_size * core_ops.size()) : 0; + size_t queue_size = 0; + auto iface = core_ops.begin()->second->get_default_streams_interface(); + CHECK_EXPECTED(iface); + if ((iface.value() != HAILO_STREAM_INTERFACE_ETH) && (iface.value() != HAILO_STREAM_INTERFACE_MIPI)) { + auto per_device_queue_size = core_ops.begin()->second->get_async_max_queue_size(); + CHECK_EXPECTED(per_device_queue_size); + queue_size = *per_device_queue_size * core_ops.size(); + } auto status = HAILO_UNINITIALIZED; - auto vdevice_core_op = make_shared_nothrow(active_core_op_holder, configure_params, + auto vdevice_core_op = make_shared_nothrow(vdevice, active_core_op_holder, configure_params, std::move(core_ops), core_ops_scheduler, core_op_handle, hef_hash, queue_size, status); CHECK_NOT_NULL_AS_EXPECTED(vdevice_core_op, HAILO_OUT_OF_HOST_MEMORY); CHECK_SUCCESS_AS_EXPECTED(status); @@ -67,8 +74,8 @@ Expected> VDeviceCoreOp::duplicate(std::shared_pt other->m_infer_requests_accumulator->queue_size() : 0; auto status = HAILO_UNINITIALIZED; - auto vdevice_core_op = make_shared_nothrow(other->m_active_core_op_holder, configure_params, - std::move(copy), other->m_core_ops_scheduler, other->m_core_op_handle, + auto vdevice_core_op = make_shared_nothrow(other->m_vdevice, other->m_active_core_op_holder, + configure_params, std::move(copy), other->m_core_ops_scheduler, other->m_core_op_handle, other->m_hef_hash, queue_size, status); CHECK_NOT_NULL_AS_EXPECTED(vdevice_core_op, HAILO_OUT_OF_HOST_MEMORY); CHECK_SUCCESS_AS_EXPECTED(status); @@ -79,13 +86,15 @@ Expected> VDeviceCoreOp::duplicate(std::shared_pt return vdevice_core_op; } -VDeviceCoreOp::VDeviceCoreOp(ActiveCoreOpHolder &active_core_op_holder, +VDeviceCoreOp::VDeviceCoreOp(VDevice &vdevice, + ActiveCoreOpHolder &active_core_op_holder, const ConfigureNetworkParams &configure_params, const std::map> &core_ops, CoreOpsSchedulerWeakPtr core_ops_scheduler, vdevice_core_op_handle_t core_op_handle, const std::string &hef_hash, size_t max_queue_size, hailo_status &status) : CoreOp(configure_params, core_ops.begin()->second->m_metadata, active_core_op_holder, status), + m_vdevice(vdevice), m_core_ops(std::move(core_ops)), m_core_ops_scheduler(core_ops_scheduler), m_core_op_handle(core_op_handle), @@ -191,7 +200,7 @@ hailo_status VDeviceCoreOp::create_input_vdevice_stream_from_config_params(const if (m_core_ops_scheduler.lock()) { assert(m_infer_requests_accumulator); - auto scheduled_stream = ScheduledInputStream::create(std::move(low_level_streams), + auto scheduled_stream = ScheduledInputStream::create(m_vdevice, std::move(low_level_streams), edge_layer.value(), m_core_op_handle, m_core_op_activated_event, m_infer_requests_accumulator); CHECK_EXPECTED_AS_STATUS(scheduled_stream); @@ -232,7 +241,7 @@ hailo_status VDeviceCoreOp::create_output_vdevice_stream_from_config_params(cons if (m_core_ops_scheduler.lock()) { assert(m_infer_requests_accumulator); - auto scheduled_stream = ScheduledOutputStream::create(std::move(low_level_streams), + auto scheduled_stream = ScheduledOutputStream::create(m_vdevice, std::move(low_level_streams), m_core_op_handle, edge_layer.value(), m_core_op_activated_event, m_infer_requests_accumulator); CHECK_EXPECTED_AS_STATUS(scheduled_stream); diff --git a/hailort/libhailort/src/vdevice/vdevice_core_op.hpp b/hailort/libhailort/src/vdevice/vdevice_core_op.hpp index b9c3af62..94828804 100644 --- a/hailort/libhailort/src/vdevice/vdevice_core_op.hpp +++ b/hailort/libhailort/src/vdevice/vdevice_core_op.hpp @@ -15,6 +15,7 @@ #include "common/utils.hpp" #include "hailo/network_group.hpp" #include "hailo/vstream.hpp" +#include "hailo/vdevice.hpp" #include "vdevice/scheduler/scheduler.hpp" #include "vdevice/scheduler/infer_request_accumulator.hpp" @@ -31,6 +32,7 @@ class VDeviceCoreOp : public CoreOp { public: static Expected> create( + VDevice &vdevice, ActiveCoreOpHolder &active_core_op_holder, const ConfigureNetworkParams &configure_params, const std::map> &core_ops, @@ -44,7 +46,6 @@ class VDeviceCoreOp : public CoreOp VDeviceCoreOp(const VDeviceCoreOp &other) = delete; VDeviceCoreOp &operator=(const VDeviceCoreOp &other) = delete; VDeviceCoreOp &operator=(VDeviceCoreOp &&other) = delete; - VDeviceCoreOp(VDeviceCoreOp &&other) = default; bool equals(const Hef &hef, const std::pair ¶ms_pair) { @@ -92,7 +93,8 @@ class VDeviceCoreOp : public CoreOp virtual Expected run_hw_infer_estimator() override; virtual Expected get_intermediate_buffer(const IntermediateBufferKey &) override; - VDeviceCoreOp(ActiveCoreOpHolder &active_core_op_holder, + VDeviceCoreOp(VDevice &vdevice, + ActiveCoreOpHolder &active_core_op_holder, const ConfigureNetworkParams &configure_params, const std::map> &core_ops, CoreOpsSchedulerWeakPtr core_ops_scheduler, scheduler_core_op_handle_t core_op_handle, @@ -111,6 +113,7 @@ class VDeviceCoreOp : public CoreOp hailo_status add_to_trace(); + VDevice &m_vdevice; std::map> m_core_ops; CoreOpsSchedulerWeakPtr m_core_ops_scheduler; const vdevice_core_op_handle_t m_core_op_handle; diff --git a/hailort/libhailort/src/vdevice/vdevice_internal.hpp b/hailort/libhailort/src/vdevice/vdevice_internal.hpp index 071d7949..5920b90c 100644 --- a/hailort/libhailort/src/vdevice/vdevice_internal.hpp +++ b/hailort/libhailort/src/vdevice/vdevice_internal.hpp @@ -82,7 +82,7 @@ class VDeviceBase : public VDevice // Currently only homogeneous vDevice is allow (= all devices are from the same type) virtual Expected get_default_streams_interface() const override; - virtual hailo_status dma_map(void *address, size_t size, hailo_stream_direction_t direction) override + virtual hailo_status dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction) override { for (const auto &pair : m_devices) { auto &device = pair.second; @@ -92,13 +92,13 @@ class VDeviceBase : public VDevice return HAILO_SUCCESS; } - virtual hailo_status dma_unmap(void *address, hailo_stream_direction_t direction) override + virtual hailo_status dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction) override { hailo_status status = HAILO_SUCCESS; for (const auto &pair : m_devices) { auto &device = pair.second; // Best effort, propagate first error - const auto unmap_status = device->dma_unmap(address, direction); + const auto unmap_status = device->dma_unmap(address, size, direction); if (HAILO_SUCCESS != unmap_status) { LOGGER__ERROR("Failed unmapping user buffer {} with status {}", address, unmap_status); if (HAILO_SUCCESS == status) { @@ -113,8 +113,9 @@ class VDeviceBase : public VDevice static hailo_status validate_params(const hailo_vdevice_params_t ¶ms); private: - VDeviceBase(std::map> &&devices, CoreOpsSchedulerPtr core_ops_scheduler) : - m_devices(std::move(devices)), m_core_ops_scheduler(core_ops_scheduler), m_next_core_op_handle(0) + VDeviceBase(std::map> &&devices, CoreOpsSchedulerPtr core_ops_scheduler, + const std::string &unique_vdevice_hash="") : + m_devices(std::move(devices)), m_core_ops_scheduler(core_ops_scheduler), m_next_core_op_handle(0), m_unique_vdevice_hash(unique_vdevice_hash) {} static Expected>> create_devices(const hailo_vdevice_params_t ¶ms); @@ -133,6 +134,7 @@ class VDeviceBase : public VDevice std::vector> m_network_groups; // TODO: HRT-9547 - Remove when ConfiguredNetworkGroup will be kept in global context ActiveCoreOpHolder m_active_core_op_holder; vdevice_core_op_handle_t m_next_core_op_handle; + const std::string m_unique_vdevice_hash; // Used to identify this vdevice in the monitor. consider removing - TODO (HRT-8835) std::mutex m_mutex; }; @@ -161,6 +163,8 @@ class VDeviceClient : public VDevice virtual hailo_status before_fork() override; virtual hailo_status after_fork_in_parent() override; virtual hailo_status after_fork_in_child() override; + virtual hailo_status dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction) override; + virtual hailo_status dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction) override; private: VDeviceClient(std::unique_ptr client, VDeviceIdentifier &&identifier, std::vector> &&devices); @@ -201,7 +205,10 @@ class VDeviceHandle : public VDevice Expected>> get_physical_devices() const override; Expected> get_physical_devices_ids() const override; Expected get_default_streams_interface() const override; - Expected> create_infer_model(const std::string &hef_path) override; + Expected> create_infer_model(const std::string &hef_path, + const std::string &network_name = "") override; + virtual hailo_status dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction) override; + virtual hailo_status dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction) override; private: VDeviceHandle(uint32_t handle); diff --git a/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp b/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp index 39ab8a38..0cfa0b88 100644 --- a/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp +++ b/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp @@ -22,7 +22,11 @@ Expected> VDeviceNativeInputStream::cr vdevice_core_op_handle_t core_op_handle) { std::unique_ptr reorder_queue = nullptr; - if (auto max_queue_size_per_stream = streams.begin()->second.get().get_async_max_queue_size()) { + // Ifaces of all streams should be the same + auto iface = streams.begin()->second.get().get_interface(); + if ((iface != HAILO_STREAM_INTERFACE_ETH) && (iface != HAILO_STREAM_INTERFACE_MIPI)) { + auto max_queue_size_per_stream = streams.begin()->second.get().get_async_max_queue_size(); + CHECK_EXPECTED(max_queue_size_per_stream); const auto max_queue_size = max_queue_size_per_stream.value() * streams.size(); reorder_queue = make_unique_nothrow(max_queue_size); CHECK_NOT_NULL_AS_EXPECTED(reorder_queue, HAILO_OUT_OF_HOST_MEMORY); @@ -134,7 +138,7 @@ hailo_status VDeviceNativeInputStream::write_impl(const MemoryView &buffer) TRACE(FrameEnqueueH2DTrace, m_core_op_handle, name()); auto status = next_stream().write_impl(buffer); - if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)){ + if ((HAILO_STREAM_ABORT == status) || (HAILO_STREAM_NOT_ACTIVATED == status)){ LOGGER__INFO("Failed write to stream {} (device: {}) with status={}", name(), m_next_transfer_stream, status); return status; } @@ -160,7 +164,9 @@ Expected VDeviceNativeInputStream::get_async_max_queue_size() const // transfers. auto &first_stream = m_streams.begin()->second.get(); const auto max_queue_per_stream = first_stream.get_async_max_queue_size(); - CHECK_EXPECTED(max_queue_per_stream); + if (!max_queue_per_stream) { + return make_unexpected(max_queue_per_stream.status()); // Not all streams has max_queue_size (e.g. eth) , so its not necessarily an error + } if (*max_queue_per_stream >= m_batch_size) { const auto batch_count_queued = *max_queue_per_stream / m_batch_size; @@ -221,7 +227,11 @@ Expected> VDeviceNativeOutputStream:: vdevice_core_op_handle_t core_op_handle) { std::unique_ptr reorder_queue = nullptr; - if (auto max_queue_size_per_stream = streams.begin()->second.get().get_async_max_queue_size()) { + // Ifaces of all streams should be the same + auto iface = streams.begin()->second.get().get_interface(); + if ((iface != HAILO_STREAM_INTERFACE_ETH) && (iface != HAILO_STREAM_INTERFACE_MIPI)) { + auto max_queue_size_per_stream = streams.begin()->second.get().get_async_max_queue_size(); + CHECK_EXPECTED(max_queue_size_per_stream); const auto max_queue_size = max_queue_size_per_stream.value() * streams.size(); reorder_queue = make_unique_nothrow(max_queue_size); CHECK_NOT_NULL_AS_EXPECTED(reorder_queue, HAILO_OUT_OF_HOST_MEMORY); @@ -317,7 +327,7 @@ hailo_stream_interface_t VDeviceNativeOutputStream::get_interface() const hailo_status VDeviceNativeOutputStream::read_impl(MemoryView buffer) { auto status = next_stream().read_impl(buffer); - if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)){ + if ((HAILO_STREAM_ABORT == status) || (HAILO_STREAM_NOT_ACTIVATED == status)){ LOGGER__INFO("Failed read from stream {} (device: {})", status, m_next_transfer_stream); return status; } @@ -347,7 +357,9 @@ Expected VDeviceNativeOutputStream::get_async_max_queue_size() const // transfers. auto &first_stream = m_streams.begin()->second.get(); const auto max_queue_per_stream = first_stream.get_async_max_queue_size(); - CHECK_EXPECTED(max_queue_per_stream); + if (!max_queue_per_stream) { + return make_unexpected(max_queue_per_stream.status()); // Not all streams has max_queue_size (e.g. eth) , so its not necessarily an error + } if (*max_queue_per_stream >= m_batch_size) { const auto batch_count_queued = *max_queue_per_stream / m_batch_size; diff --git a/hailort/libhailort/src/vdma/CMakeLists.txt b/hailort/libhailort/src/vdma/CMakeLists.txt index aed185a3..5641ff67 100644 --- a/hailort/libhailort/src/vdma/CMakeLists.txt +++ b/hailort/libhailort/src/vdma/CMakeLists.txt @@ -1,5 +1,22 @@ cmake_minimum_required(VERSION 3.0.0) +if(WIN32) + set(DRIVER_OS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/driver/os/windows") +elseif(UNIX) + if (CMAKE_SYSTEM_NAME STREQUAL QNX) + set(DRIVER_OS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/driver/os/posix/qnx") + else() + set(DRIVER_OS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/driver/os/posix/linux") + endif() +else() + message(FATAL_ERROR "Unexpeced platform target, stopping build") +endif() + +set(DRIVER_SRC_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/driver/hailort_driver.cpp + ${DRIVER_OS_DIR}/driver_os_specific.cpp +) + set(SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/vdma_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/vdma_config_core_op.cpp @@ -7,20 +24,30 @@ set(SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/vdma_stream.cpp ${CMAKE_CURRENT_SOURCE_DIR}/circular_stream_buffer_pool.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/dma_mapped_buffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/pcie/pcie_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/integrated/integrated_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/channel/boundary_channel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/channel/interrupts_dispatcher.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/channel/transfer_launcher.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory/descriptor_list.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/memory/vdma_buffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/memory/vdma_edge_layer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory/mapped_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory/dma_able_buffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/memory/sg_buffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/memory/continuous_buffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/memory/mapping_manager.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/memory/sg_edge_layer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/memory/continuous_edge_layer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory/buffer_requirements.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/memory/vdma_buffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/memory/continuous_buffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/memory/sg_buffer.cpp + + ${DRIVER_SRC_FILES} ) set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE) + +# Export DRIVER_SRC_FILES as HALIO_DRIVER_SRC_FILES to parent scope +set(HAILO_DRIVER_SRC_FILES ${DRIVER_SRC_FILES} PARENT_SCOPE) \ No newline at end of file diff --git a/hailort/libhailort/src/vdma/channel/boundary_channel.cpp b/hailort/libhailort/src/vdma/channel/boundary_channel.cpp index 32873666..8c331051 100644 --- a/hailort/libhailort/src/vdma/channel/boundary_channel.cpp +++ b/hailort/libhailort/src/vdma/channel/boundary_channel.cpp @@ -12,7 +12,7 @@ #include "common/os_utils.hpp" #include "vdma/channel/boundary_channel.hpp" -#include "vdma/memory/vdma_buffer.hpp" +#include "vdma/memory/vdma_edge_layer.hpp" #include #include @@ -25,31 +25,29 @@ namespace vdma { Expected BoundaryChannel::create(vdma::ChannelId channel_id, Direction direction, - VdmaDevice &vdma_device, uint32_t descs_count, uint16_t desc_page_size, const std::string &stream_name, + HailoRTDriver &driver, uint32_t descs_count, uint16_t desc_page_size, const std::string &stream_name, LatencyMeterPtr latency_meter) { hailo_status status = HAILO_UNINITIALIZED; - auto channel_ptr = make_shared_nothrow(channel_id, direction, vdma_device, descs_count, + auto channel_ptr = make_shared_nothrow(channel_id, direction, driver, descs_count, desc_page_size, stream_name, latency_meter, status); CHECK_NOT_NULL_AS_EXPECTED(channel_ptr, HAILO_OUT_OF_HOST_MEMORY); CHECK_SUCCESS_AS_EXPECTED(status, "Failed creating BoundaryChannel"); return channel_ptr; } -BoundaryChannel::BoundaryChannel(vdma::ChannelId channel_id, Direction direction, VdmaDevice &vdma_device, +BoundaryChannel::BoundaryChannel(vdma::ChannelId channel_id, Direction direction, HailoRTDriver &driver, uint32_t descs_count, uint16_t desc_page_size, const std::string &stream_name, LatencyMeterPtr latency_meter, hailo_status &status) : m_channel_id(channel_id), m_direction(direction), - m_vdma_device(vdma_device), - m_driver(vdma_device.get_driver()), - m_host_registers(vdma_device.get_driver(), channel_id, direction), + m_driver(driver), m_desc_list(nullptr), m_stream_name(stream_name), - m_latency_meter(latency_meter), m_is_channel_activated(false), m_ongoing_transfers((latency_meter != nullptr) ? ONGOING_TRANSFERS_SIZE/2 : ONGOING_TRANSFERS_SIZE), - m_last_bounded_buffer(BoundedBuffer{nullptr, 0, 0}) + m_latency_meter(latency_meter), + m_pending_latency_measurements(ONGOING_TRANSFERS_SIZE) // Make sure there will always be place for latency measure { if (Direction::BOTH == direction) { LOGGER__ERROR("Boundary channels must be unidirectional"); @@ -91,28 +89,13 @@ hailo_status BoundaryChannel::trigger_channel_completion(uint16_t hw_num_process return HAILO_STREAM_NOT_ACTIVATED; } - // Although the hw_num_processed should be a number between 0 and m_descs.size-1, if m_desc.size < 0x10000 - // (the maximum desc size), the actual hw_num_processed is a number between 1 and m_descs.size. Therefore the - // value can be m_descs.size, in this case we change it to zero. - hw_num_processed = static_cast(hw_num_processed & m_descs.size_mask); - if (m_latency_meter != nullptr) { - // The latency meter gets an updated hw_num_processed via a call to vdma_interrupts_read_timestamps - // (the desc index of the last measured timestamp returned from that ioctl). Since update_latency_meter - // processed m_ongoing_transfers based on this hw_num_processed, and this function (i.e. - // trigger_channel_completion) also processes m_ongoing_transfers based on the value of hw_num_processed, - // we want the two to be the same. Hence, we'll use the more up to date num_processed returned by - // update_latency_meter. - // TODO: fix update_latency_meter flow (HRT-10284) - auto latency_meter_hw_num_processed = update_latency_meter(); - CHECK_EXPECTED_AS_STATUS(latency_meter_hw_num_processed); - hw_num_processed = latency_meter_hw_num_processed.value(); + CHECK_SUCCESS(update_latency_meter()); } while (!m_ongoing_transfers.empty()) { // Reading previous_num_processed inside the loop since on_transfer_complete may increase this value. const auto previous_num_processed = static_cast(CB_TAIL(m_descs)); - if (!is_transfer_complete(m_ongoing_transfers.front(), previous_num_processed, hw_num_processed)) { break; } @@ -120,19 +103,7 @@ hailo_status BoundaryChannel::trigger_channel_completion(uint16_t hw_num_process auto transfer = std::move(m_ongoing_transfers.front()); m_ongoing_transfers.pop_front(); - hailo_status complete_status = HAILO_SUCCESS; - - #ifndef NDEBUG - assert(!transfer.last_descs.empty()); - auto &last_desc = (*m_desc_list)[transfer.last_descs.back()]; - if (!last_desc.is_done() || last_desc.is_error()) { - LOGGER__ERROR("Error while processing descriptor {} of DMA {} on device {} DESC_STATUS=0x{:x}.", - transfer.last_descs.back(), m_channel_id, m_driver.device_id(), last_desc.status()); - complete_status = HAILO_INTERNAL_FAILURE; - } - #endif - - on_transfer_complete(lock, transfer, complete_status); + on_transfer_complete(lock, transfer, HAILO_SUCCESS); } return HAILO_SUCCESS; @@ -141,7 +112,7 @@ hailo_status BoundaryChannel::trigger_channel_completion(uint16_t hw_num_process CONTROL_PROTOCOL__host_buffer_info_t BoundaryChannel::get_boundary_buffer_info(uint32_t transfer_size) const { // Boundary channels always have scatter gather buffers - return VdmaBuffer::get_host_buffer_info(VdmaBuffer::Type::SCATTER_GATHER, m_desc_list->dma_address(), + return VdmaEdgeLayer::get_host_buffer_info(VdmaEdgeLayer::Type::SCATTER_GATHER, m_desc_list->dma_address(), m_desc_list->desc_page_size(), m_desc_list->count(), transfer_size); } @@ -171,7 +142,7 @@ hailo_status BoundaryChannel::deactivate() return HAILO_SUCCESS; } -hailo_status BoundaryChannel::launch_transfer(TransferRequest &&transfer_request, bool user_owns_buffer) +hailo_status BoundaryChannel::launch_transfer(TransferRequest &&transfer_request) { std::unique_lock lock(m_channel_mutex); if (!m_is_channel_activated) { @@ -182,58 +153,86 @@ hailo_status BoundaryChannel::launch_transfer(TransferRequest &&transfer_request return HAILO_QUEUE_IS_FULL; } - auto num_available = get_num_available(); + auto num_available = static_cast(CB_HEAD(m_descs)); const uint16_t first_desc = num_available; - std::vector transfer_last_descs; + uint16_t last_desc = std::numeric_limits::max(); uint16_t total_descs_count = 0; - for (size_t i = 0; i < transfer_request.transfer_buffers.size(); i++) { - auto mapped_buffer_exp = transfer_request.transfer_buffers[i].map_buffer(m_vdma_device, m_direction); - CHECK_EXPECTED_AS_STATUS(mapped_buffer_exp); - auto mapped_buffer = mapped_buffer_exp.release(); - - // Syncing the buffer to device change its ownership from host to the device. - // We sync on D2H as well if the user owns the buffer since the buffer might have been changed by - // the host between the time it was mapped and the current async transfer. If the buffer is not owned by the user, - // it won't be accessed for write. - if ((Direction::H2D == m_direction) || user_owns_buffer) { - auto status = transfer_request.transfer_buffers[i].synchronize(m_vdma_device, HailoRTDriver::DmaSyncDirection::TO_DEVICE); - CHECK_SUCCESS(status); - } - - const auto desired_desc_num = m_desc_list->descriptors_in_buffer(transfer_request.transfer_buffers[i].size()); - CHECK(desired_desc_num <= MAX_DESCS_COUNT, HAILO_INTERNAL_FAILURE); - const uint16_t desc_num = static_cast(desired_desc_num); - assert(total_descs_count + desc_num < MAX_DESCS_COUNT); - total_descs_count = static_cast(total_descs_count + desc_num); + const bool should_bind = !m_bounded_buffer; + if (!should_bind) { + CHECK_SUCCESS(validate_bound_buffer(transfer_request)); + } - const auto last_desc_avail = static_cast((num_available + desc_num - 1) & m_descs.size_mask); + std::vector driver_transfer_buffers; - transfer_last_descs.emplace_back(last_desc_avail); + auto current_num_available = num_available; + for (auto &transfer_buffer : transfer_request.transfer_buffers) { + TRY(auto mapped_buffer, transfer_buffer.map_buffer(m_driver, m_direction)); + driver_transfer_buffers.emplace_back(HailoRTDriver::TransferBuffer{ + mapped_buffer->handle(), + transfer_buffer.offset(), + transfer_buffer.size() + }); - // Raise interrupt on last buffer - const auto should_buffer_raise_int = (i == (transfer_request.transfer_buffers.size() - 1)); - auto status = prepare_descriptors(transfer_request.transfer_buffers[i].size(), num_available, mapped_buffer, - transfer_request.transfer_buffers[i].offset(), should_buffer_raise_int); - CHECK_SUCCESS(status); + const auto desired_desc_num = m_desc_list->descriptors_in_buffer(transfer_buffer.size()); + CHECK(desired_desc_num <= MAX_SG_DESCS_COUNT, HAILO_INTERNAL_FAILURE); + const uint16_t desc_num = static_cast(desired_desc_num); + assert(total_descs_count + desc_num < MAX_SG_DESCS_COUNT); + total_descs_count = static_cast(total_descs_count + desc_num); - num_available = static_cast((last_desc_avail + 1) & m_descs.size_mask); + last_desc = static_cast((current_num_available + desc_num - 1) & m_descs.size_mask); + current_num_available = static_cast((last_desc + 1) & m_descs.size_mask); } + auto first_desc_interrupts = InterruptsDomain::NONE; if ((nullptr != m_latency_meter) && (m_direction == Direction::H2D)) { // If we measure latency, we need an interrupt on the first descriptor for each H2D channel. - m_desc_list->program_single_descriptor((*m_desc_list)[first_desc], m_desc_list->desc_page_size(), - InterruptsDomain::HOST); + first_desc_interrupts = InterruptsDomain::HOST; } + const auto last_desc_interrupts = InterruptsDomain::HOST; - add_ongoing_transfer(std::move(transfer_request), first_desc, std::move(transfer_last_descs)); + int num_processed = CB_TAIL(m_descs); + int num_free = CB_AVAIL(m_descs, num_available, num_processed); + if (total_descs_count > num_free) { + return HAILO_OUT_OF_DESCRIPTORS; + } - auto status = inc_num_available(total_descs_count); - CHECK_SUCCESS(status); + m_ongoing_transfers.push_back(OngoingTransfer{std::move(transfer_request), last_desc}); + if (m_latency_meter) { + assert(!m_pending_latency_measurements.full()); + m_pending_latency_measurements.push_back(m_direction == Direction::H2D ? first_desc : last_desc); + } + CB_ENQUEUE(m_descs, total_descs_count); + + TRY(const auto desc_programmed, m_driver.launch_transfer( + m_channel_id, + m_desc_list->handle(), + num_available, + driver_transfer_buffers, + should_bind, + first_desc_interrupts, + last_desc_interrupts + )); + CHECK(total_descs_count == desc_programmed, HAILO_INTERNAL_FAILURE, + "Inconsistent desc programed expecting {} got {}", total_descs_count, desc_programmed); return HAILO_SUCCESS; } +hailo_status BoundaryChannel::bind_buffer(MappedBufferPtr buffer) +{ + CHECK(m_bounded_buffer == nullptr, HAILO_INTERNAL_FAILURE, + "Buffer is already bound to channel {}", m_channel_id); + const auto expected_size = static_cast(m_desc_list->desc_page_size()) * m_desc_list->count(); + CHECK(buffer->size() == expected_size, HAILO_INVALID_ARGUMENT, + "Buffer size {} does not feet in desc list - descs count {} desc page size {}", buffer->size(), + m_desc_list->count(), m_desc_list->desc_page_size()); + static const size_t DEFAULT_BUFFER_OFFSET = 0; + CHECK_SUCCESS(m_desc_list->configure_to_use_buffer(*buffer, buffer->size(), DEFAULT_BUFFER_OFFSET, m_channel_id)); + m_bounded_buffer = buffer; + return HAILO_SUCCESS; +} + void BoundaryChannel::cancel_pending_transfers() { std::unique_lock lock(m_channel_mutex); @@ -241,7 +240,7 @@ void BoundaryChannel::cancel_pending_transfers() auto transfer = std::move(m_ongoing_transfers.front()); m_ongoing_transfers.pop_front(); - on_transfer_complete(lock, transfer, HAILO_STREAM_ABORTED_BY_USER); + on_transfer_complete(lock, transfer, HAILO_STREAM_ABORT); } } @@ -255,16 +254,12 @@ size_t BoundaryChannel::get_max_ongoing_transfers(size_t transfer_size) const return std::min(max_transfers_in_buffer, m_ongoing_transfers.capacity()); } -Expected BoundaryChannel::update_latency_meter() +hailo_status BoundaryChannel::update_latency_meter() { - uint16_t last_num_processed = m_last_timestamp_num_processed; - - auto timestamp_list = m_driver.vdma_interrupts_read_timestamps(m_channel_id); - CHECK_EXPECTED(timestamp_list); - - if (0 == timestamp_list->count) { - // No new timestamps for this channel, return the previous result - return Expected(last_num_processed); + TRY(auto timestamp_list, m_driver.vdma_interrupts_read_timestamps(m_channel_id)); + if (0 == timestamp_list.count) { + // No new timestamps for this channel. + return HAILO_SUCCESS; } // TODO: now we have more iterations than we need. We know that the pending buffers + the timestamp list @@ -272,157 +267,59 @@ Expected BoundaryChannel::update_latency_meter() // also ongoing_transfers[i+1,i+2,...] // not in those timestamps - for (const auto &transfer : m_ongoing_transfers) { - uint16_t latency_desc = static_cast(transfer.latency_measure_desc); - for (size_t i = 0; i < timestamp_list->count; i++) { - const auto &irq_timestamp = timestamp_list->timestamp_list[i]; - const auto desc_num_processed = static_cast(irq_timestamp.desc_num_processed & m_descs.size_mask); - if (is_desc_between(last_num_processed, desc_num_processed, latency_desc)) { - if (m_direction == Direction::H2D) { - m_latency_meter->add_start_sample(irq_timestamp.timestamp); - } - else { - m_latency_meter->add_end_sample(m_stream_name, irq_timestamp.timestamp); - } - break; + auto find_timestamp = [&](uint16_t latency_desc) -> Expected { + for (size_t i = 0; i < timestamp_list.count; i++) { + const auto &irq_timestamp = timestamp_list.timestamp_list[i]; + if (is_desc_between(m_last_timestamp_num_processed, irq_timestamp.desc_num_processed, latency_desc)) { + return std::chrono::nanoseconds{irq_timestamp.timestamp}; } } + return make_unexpected(HAILO_NOT_FOUND); + }; + + while (!m_pending_latency_measurements.empty()) { + auto timestamp = find_timestamp(m_pending_latency_measurements.front()); + if (!timestamp) { + break; + } + + if (m_direction == Direction::H2D) { + m_latency_meter->add_start_sample(*timestamp); + } else { + m_latency_meter->add_end_sample(m_stream_name, *timestamp); + } + m_pending_latency_measurements.pop_front(); } - m_last_timestamp_num_processed = static_cast( - timestamp_list->timestamp_list[timestamp_list->count-1].desc_num_processed & m_descs.size_mask); - return Expected(m_last_timestamp_num_processed); + m_last_timestamp_num_processed = timestamp_list.timestamp_list[timestamp_list.count-1].desc_num_processed; + return HAILO_SUCCESS; } bool BoundaryChannel::is_transfer_complete(const OngoingTransfer &transfer, uint16_t previous_num_processed, uint16_t current_num_processed) const { // Transfer is complete if its last descriptor is in [previous_num_processed, current_num_processed) or - // the the buffer is empty (previous_num_processed == get_num_available()) - assert(!transfer.last_descs.empty()); - return is_desc_between(previous_num_processed, current_num_processed, transfer.last_descs.back()) || - (current_num_processed == get_num_available()); + // the the buffer is empty (previous_num_processed == CB_HEAD(m_descs)) + return is_desc_between(previous_num_processed, current_num_processed, transfer.last_desc) || + (current_num_processed == CB_HEAD(m_descs)); } void BoundaryChannel::on_transfer_complete(std::unique_lock &lock, OngoingTransfer &transfer, hailo_status complete_status) { - // Clear relevant descriptors from previous transfer - if (nullptr != m_latency_meter) { - m_desc_list->clear_descriptor(transfer.latency_measure_desc); - } - - assert(!transfer.last_descs.empty()); - for (const auto& last_desc : transfer.last_descs) { - m_desc_list->clear_descriptor(last_desc); - } - // We increase desc num_proc (can happen only in this flow). After it is increased - // 1. On D2H channels - the output can be read by the user. // 2. On H2D channels - new input can be written to the buffer. - _CB_SET(m_descs.tail, (transfer.last_descs.back() + 1) & m_descs.size_mask); + _CB_SET(m_descs.tail, (transfer.last_desc + 1) & m_descs.size_mask); // Finally, we notify user callbacks registered with the transfer. // We want to make sure that the callbacks are called after the descriptors can be reused (So the user will // be able to start new transfer). lock.unlock(); - - if (Direction::D2H == m_direction) { - for (auto& transfer_buffer : transfer.request.transfer_buffers) { - auto sync_status = transfer_buffer.synchronize(m_vdma_device, HailoRTDriver::DmaSyncDirection::TO_HOST); - if (HAILO_SUCCESS != sync_status) { - LOGGER__ERROR("Failed to sync buffer for output channel {} device {}", m_channel_id, m_driver.device_id()); - if (HAILO_SUCCESS != complete_status) { - complete_status = sync_status; - } - } - } - } - transfer.request.callback(complete_status); lock.lock(); } -hailo_status BoundaryChannel::prepare_descriptors(size_t transfer_size, uint16_t starting_desc, - MappedBufferPtr mapped_buffer, size_t buffer_offset, bool raise_interrupt) -{ - if (mapped_buffer != nullptr) { - CHECK((buffer_offset % m_desc_list->desc_page_size()) == 0, HAILO_INTERNAL_FAILURE, - "Buffer offset {} must be desc page size aligned {}", buffer_offset, m_desc_list->desc_page_size()); - const size_t buffer_offset_in_descs = buffer_offset / m_desc_list->desc_page_size(); - if (!is_buffer_already_configured(mapped_buffer, buffer_offset_in_descs, starting_desc)) { - // We need to configure the buffer now. - - // First, store information on the buffer. - m_last_bounded_buffer.buffer = mapped_buffer; - m_last_bounded_buffer.starting_desc = starting_desc; - m_last_bounded_buffer.buffer_offset_in_descs = static_cast(buffer_offset_in_descs); - - // Now we want that m_desc_list[starting_desc] will be mapped into mapped_buffer[buffer_offset]. - // The descriptors list configure always starts from buffer_offset=0, so in order to achieve our - // configuration, we configure the buffer starting from desc=(starting_desc - buffer_offset_in_desc). - // Then, after configuring buffer_offset bytes from the buffer, the desc_index will be starting desc. - const int desc_diff = static_cast(starting_desc) - static_cast(buffer_offset_in_descs); - const auto configure_starting_desc = static_cast(m_descs.size + desc_diff) % m_descs.size; - - // Finally do the actual configuration. - auto status = m_desc_list->configure_to_use_buffer(*mapped_buffer, m_channel_id, configure_starting_desc); - CHECK_SUCCESS(status); - } - } - - auto last_desc_interrupts_domain = raise_interrupt ? InterruptsDomain::HOST : InterruptsDomain::NONE; - // TODO: HRT-11188 - fix starting_desc parameter - auto actual_desc_count = m_desc_list->program_last_descriptor(transfer_size, last_desc_interrupts_domain, - starting_desc); - CHECK_EXPECTED_AS_STATUS(actual_desc_count, "Failed to program desc_list for channel {}", m_channel_id); - - return HAILO_SUCCESS; -} - -bool BoundaryChannel::is_buffer_already_configured(MappedBufferPtr buffer, size_t buffer_offset_in_descs, - size_t starting_desc) const -{ - if (m_last_bounded_buffer.buffer != buffer) { - // Last buffer is nullptr or not the same as the given. - return false; - } - - // If the diff between starting_desc and m_last_bounded_buffer.starting_desc and the diff between - // buffer_offset_in_descs - m_last_bounded_buffer.buffer_offset_in_descs are equal, it means that the buffer is - // already configured. - // Note that we don't afraid of overflow since buffer_offset_in_descs * desc_page_size() must fit inside the buffer. - const auto starting_desc_diff = (starting_desc - m_last_bounded_buffer.starting_desc) % m_descs.size; - const auto buffer_offset_diff_in_descs = (buffer_offset_in_descs - m_last_bounded_buffer.buffer_offset_in_descs) % m_descs.size; - return starting_desc_diff == buffer_offset_diff_in_descs; -} - -void BoundaryChannel::add_ongoing_transfer(TransferRequest &&transfer_request, uint16_t first_desc, - std::vector &&last_descs) -{ - OngoingTransfer transfer{}; - transfer.request = std::move(transfer_request); - transfer.last_descs = std::move(last_descs); - transfer.latency_measure_desc = (m_direction == HailoRTDriver::DmaDirection::H2D) ? first_desc : - transfer.last_descs.back(); - m_ongoing_transfers.push_back(std::move(transfer)); -} - -hailo_status BoundaryChannel::inc_num_available(uint16_t value) -{ - int num_available = get_num_available(); - int num_processed = CB_TAIL(m_descs); - int num_free = CB_AVAIL(m_descs, num_available, num_processed); - if (value > num_free) { - return HAILO_OUT_OF_DESCRIPTORS; - } - - CB_ENQUEUE(m_descs, value); - num_available = (num_available + value) & m_descs.size_mask; - - return m_host_registers.set_num_available(static_cast(num_available)); -} - bool BoundaryChannel::is_desc_between(uint16_t begin, uint16_t end, uint16_t desc) { if (begin == end) { @@ -439,26 +336,6 @@ bool BoundaryChannel::is_desc_between(uint16_t begin, uint16_t end, uint16_t des } } -uint16_t BoundaryChannel::get_num_available() const -{ - uint16_t num_available = (uint16_t)CB_HEAD(m_descs); - -#ifndef NDEBUG - // Validate synchronization with HW - auto hw_num_avail = m_host_registers.get_num_available(); - assert(hw_num_avail); - - // On case of channel aborted, the num_available is set to 0 (so we don't accept sync) - auto is_aborted_exp = m_host_registers.is_aborted(); - assert(is_aborted_exp); - - if (!is_aborted_exp.value()) { - assert(hw_num_avail.value() == num_available); - } -#endif - return num_available; -} - hailo_status BoundaryChannel::allocate_descriptor_list(uint32_t descs_count, uint16_t desc_page_size) { static const bool CIRCULAR = true; @@ -471,5 +348,23 @@ hailo_status BoundaryChannel::allocate_descriptor_list(uint32_t descs_count, uin return HAILO_SUCCESS; } +hailo_status BoundaryChannel::validate_bound_buffer(TransferRequest &transfer_request) +{ + assert(m_bounded_buffer); + CHECK(transfer_request.transfer_buffers.size() == 1, HAILO_INTERNAL_FAILURE, + "When bound buffer is used, transfer request must contain only one buffer"); + + auto &transfer_buffer = transfer_request.transfer_buffers[0]; + const auto num_available = CB_HEAD(m_descs); + const auto expected_offset = static_cast(m_desc_list->desc_page_size()) * num_available; + CHECK(transfer_buffer.offset() == expected_offset, HAILO_INTERNAL_FAILURE, + "Unexpected buffer offset, expected {} actual {}", expected_offset, transfer_buffer.offset()); + CHECK(transfer_buffer.base_buffer().data() == reinterpret_cast(m_bounded_buffer->user_address()), HAILO_INTERNAL_FAILURE, + "Got the wrong buffer"); + CHECK(transfer_buffer.base_buffer().size() == m_bounded_buffer->size(), HAILO_INTERNAL_FAILURE, + "Got invalid buffer size {}, expected {}", transfer_buffer.base_buffer().size(), m_bounded_buffer->size()); + return HAILO_SUCCESS; +} + } /* namespace vdma */ } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/channel/boundary_channel.hpp b/hailort/libhailort/src/vdma/channel/boundary_channel.hpp index 38b7d026..8b136138 100644 --- a/hailort/libhailort/src/vdma/channel/boundary_channel.hpp +++ b/hailort/libhailort/src/vdma/channel/boundary_channel.hpp @@ -10,8 +10,6 @@ #ifndef _HAILO_VDMA_BOUNDARY_CHANNEL_HPP_ #define _HAILO_VDMA_BOUNDARY_CHANNEL_HPP_ -#include "vdma/vdma_device.hpp" -#include "vdma/channel/vdma_channel_regs.hpp" #include "vdma/channel/channel_id.hpp" #include "vdma/memory/descriptor_list.hpp" #include "stream_common/transfer_common.hpp" @@ -28,8 +26,7 @@ namespace vdma { struct OngoingTransfer { TransferRequest request; - std::vector last_descs; - uint16_t latency_measure_desc; + uint16_t last_desc; }; class BoundaryChannel; @@ -39,10 +36,10 @@ class BoundaryChannel final public: using Direction = HailoRTDriver::DmaDirection; - static Expected create(vdma::ChannelId channel_id, Direction direction, VdmaDevice &vdma_device, + static Expected create(vdma::ChannelId channel_id, Direction direction, HailoRTDriver &driver, uint32_t descs_count, uint16_t desc_page_size, const std::string &stream_name = "", LatencyMeterPtr latency_meter = nullptr); - BoundaryChannel(vdma::ChannelId channel_id, Direction direction, VdmaDevice &vdma_device, uint32_t descs_count, + BoundaryChannel(vdma::ChannelId channel_id, Direction direction, HailoRTDriver &driver, uint32_t descs_count, uint16_t desc_page_size, const std::string &stream_name, LatencyMeterPtr latency_meter, hailo_status &status); BoundaryChannel(const BoundaryChannel &other) = delete; @@ -60,13 +57,16 @@ class BoundaryChannel final hailo_status trigger_channel_completion(uint16_t hw_num_processed); // Calls all pending transfer callbacks (if they exist), marking them as canceled by passing - // HAILO_STREAM_ABORTED_BY_USER as a status to the callbacks. + // HAILO_STREAM_ABORT as a status to the callbacks. // Note: This function is to be called on a deactivated channel object. Calling on an active channel will lead to // unexpected results void cancel_pending_transfers(); - // user_owns_buffer is set when the buffer is owned by the user (otherwise we may have some assumtions). - hailo_status launch_transfer(TransferRequest &&transfer_request, bool user_owns_buffer); + hailo_status launch_transfer(TransferRequest &&transfer_request); + + // To avoid buffer bindings, one can call this function to statically bind a full buffer to the channel. The buffer + // size should be exactly desc_page_size() * descs_count() of current descriptors list. + hailo_status bind_buffer(MappedBufferPtr buffer); size_t get_max_ongoing_transfers(size_t transfer_size) const; @@ -88,63 +88,49 @@ class BoundaryChannel final } private: - static void empty_transfer_done_callback(hailo_status){} - // Returns the desc index of the last desc whose timestamp was measured in the driver - Expected update_latency_meter(); + hailo_status update_latency_meter(); bool is_transfer_complete(const OngoingTransfer &transfer, uint16_t previous_num_processed, uint16_t current_num_processed) const; void on_transfer_complete(std::unique_lock &lock, OngoingTransfer &transfer, hailo_status complete_status); - hailo_status prepare_descriptors(size_t transfer_size, uint16_t starting_desc, - MappedBufferPtr mapped_buffer, size_t buffer_offset, bool raise_interrupt = true); - - bool is_buffer_already_configured(MappedBufferPtr buffer, size_t buffer_offset_in_descs, size_t starting_desc) const; - void add_ongoing_transfer(TransferRequest &&transfer_request, uint16_t first_desc, - std::vector &&last_descs); static bool is_desc_between(uint16_t begin, uint16_t end, uint16_t desc); - uint16_t get_num_available() const; - hailo_status inc_num_available(uint16_t value); hailo_status allocate_descriptor_list(uint32_t descs_count, uint16_t desc_page_size); + hailo_status validate_bound_buffer(TransferRequest &transfer_request); const vdma::ChannelId m_channel_id; const Direction m_direction; - VdmaDevice &m_vdma_device; HailoRTDriver &m_driver; - VdmaChannelRegs m_host_registers; std::shared_ptr m_desc_list; // Host side descriptor list const std::string m_stream_name; circbuf_t m_descs; - LatencyMeterPtr m_latency_meter; bool m_is_channel_activated; std::mutex m_channel_mutex; CircularArray m_ongoing_transfers; - // Contains the last num_processed of the last interrupt (only used on latency measurement) + // About HW latency measurements: + // - For each ongoing transfer, we push some num-proc value to the pending_latency_measurements array. When this + // descriptor is processed, we can add a sample to the latency meter. + // - On H2D, the descriptor is the first descriptor on each transfer, so we start the measure after the first + // vdma descriptor is processed. We don't measure on launch_transfer since the hw may be busy processing + // requests. When the first descriptor is processed, we can be sure the hw has really started processing the + // frame. + // - On D2H, the descriptor is the last descriptor on each transfer, so we end the measure after the transfer is + // processed. + // - To get the timestamp, the read_timestamps ioctl is called. This ioctl returns pairs of num-processed and + // and their interrupt timestamp, then, using m_last_timestamp_num_processed, we can check if some + // pending_latency_measurement is done. + // - We don't use m_ongoing_transfers to store the latency measurements because we to finish an ongoing transfer + // we use hw num processed given by trigger_channel_completion, which may be different that the hw num processed + // returned from read_timestamps_ioctl (one is measured in the ioctl and the other is measured in the interrupt). + LatencyMeterPtr m_latency_meter; + CircularArray m_pending_latency_measurements; uint16_t m_last_timestamp_num_processed; - struct BoundedBuffer { - MappedBufferPtr buffer; - - // The buffer is bounded starting from this descriptor. - uint16_t starting_desc; - - // Offset inside the buffer (in desc_page_size granularity) of the "actual start" of the buffer. - // It implies that: - // desc_list[starting_desc] will point to buffer[buffers_desc_offset * desc_page_size]. - uint16_t buffer_offset_in_descs; - }; - - // We store the last bounded buffer as cache in order to avoid unnecessary descriptors list reprogramming. - // It is good enough to store only the last bounded buffer because we have two modes of execution: - // 1. User allocated buffers - On each transfer we bind new buffer. Even if the user always uses the same - // buffers, due to the circular nature of descriptor list, reprogramming will almost always be needed (So - // cacheing won't help). - // 2. Single circular buffer (internally) - In this case we don't need to bind each time (maybe after the - // channel is re-activated). Caching the last bounded buffer is enough. - BoundedBuffer m_last_bounded_buffer; + // When bind_buffer is called, we keep a reference to the buffer here. This is used to avoid buffer bindings. + std::shared_ptr m_bounded_buffer; }; } /* namespace vdma */ diff --git a/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.hpp b/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.hpp index b039e41e..50b0c49d 100644 --- a/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.hpp +++ b/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.hpp @@ -10,7 +10,7 @@ #ifndef _HAILO_VDMA_INTERRUPTS_DISPATCHER_HPP_ #define _HAILO_VDMA_INTERRUPTS_DISPATCHER_HPP_ -#include "os/hailort_driver.hpp" +#include "vdma/driver/hailort_driver.hpp" #include #include #include diff --git a/hailort/libhailort/src/vdma/channel/transfer_launcher.cpp b/hailort/libhailort/src/vdma/channel/transfer_launcher.cpp new file mode 100644 index 00000000..96c5c93e --- /dev/null +++ b/hailort/libhailort/src/vdma/channel/transfer_launcher.cpp @@ -0,0 +1,128 @@ +/** + * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file transfer_launcher.cpp + * @brief Manages a thread that launches non-bound async vdma read/writes + **/ + +#include "transfer_launcher.hpp" +#include "common/utils.hpp" +#include "common/os_utils.hpp" + +namespace hailort { +namespace vdma { + +Expected> TransferLauncher::create() +{ + auto thread = make_unique_nothrow(); + CHECK_NOT_NULL_AS_EXPECTED(thread, HAILO_OUT_OF_HOST_MEMORY); + return thread; +} + +TransferLauncher::TransferLauncher() : + m_mutex(), + m_cond(), + m_queue(), + m_should_quit(false), + m_thread_active(false), + m_worker_thread([this] { worker_thread(); }) +{} + +TransferLauncher::~TransferLauncher() +{ + const auto status = stop(); + if (status != HAILO_SUCCESS) { + LOGGER__ERROR("Failed stopping transfer launcher thread on destructor"); + } + + if (m_worker_thread.joinable()) { + signal_thread_quit(); + m_worker_thread.join(); + } +} + +hailo_status TransferLauncher::enqueue_transfer(Transfer &&transfer) +{ + { + std::lock_guard lock(m_mutex); + m_queue.emplace(std::move(transfer)); + } + + m_cond.notify_one(); + return HAILO_SUCCESS; +} + +hailo_status TransferLauncher::start() +{ + { + std::lock_guard lock(m_mutex); + CHECK(!m_thread_active, HAILO_INVALID_OPERATION, "Transfer launcher thread already running"); + + m_thread_active = true; + } + m_cond.notify_one(); + + return HAILO_SUCCESS; +} + +hailo_status TransferLauncher::stop() +{ + std::unique_lock lock(m_mutex); + + if (!m_thread_active) { + // Already stopped + return HAILO_SUCCESS; + } + + m_thread_active = false; + + while (!m_queue.empty()) { + m_queue.pop(); + // TODO: need to call the callbacks to signal that they were aborted? (HRT-13110) + // like this: + // auto transfer_request = m_queue.front(); + // m_queue.pop(); + // transfer_request.callback(HAILO_STREAM_ABORT); + // or can it be done in BoundaryChannel::cancel_pending_transfers? + } + + // TODO: Keep stop flow used in interrupt thread? (HRT-13110) + // E.g. look for comment "The wait is needed because otherwise, on a fast stop()..." + + return HAILO_SUCCESS; +} + +void TransferLauncher::worker_thread() +{ + OsUtils::set_current_thread_name("TRANSFR_LNCH"); + + while (true) { + Transfer transfer; + { + std::unique_lock lock(m_mutex); + m_cond.wait(lock, [this] { return m_should_quit || (!m_queue.empty() && m_thread_active); }); + if (m_should_quit) { + return; + } + + // There's work to do + transfer = std::move(m_queue.front()); + m_queue.pop(); + } + transfer(); + } +} + +void TransferLauncher::signal_thread_quit() +{ + { + std::lock_guard lock(m_mutex); + m_should_quit = true; + } + m_cond.notify_all(); +} + +} /* namespace vdma */ +} /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/channel/transfer_launcher.hpp b/hailort/libhailort/src/vdma/channel/transfer_launcher.hpp new file mode 100644 index 00000000..87136848 --- /dev/null +++ b/hailort/libhailort/src/vdma/channel/transfer_launcher.hpp @@ -0,0 +1,62 @@ +/** + * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file transfer_launcher.hpp + * @brief Manages a thread that launches non-bound async vdma read/writes + **/ + +#ifndef _HAILO_TRANSFER_LAUNCHER_HPP_ +#define _HAILO_TRANSFER_LAUNCHER_HPP_ + +#include "hailo/hailort.h" +#include "hailo/expected.hpp" + +#include +#include +#include +#include +#include +#include + +namespace hailort { +namespace vdma { + +class TransferLauncher final +{ +public: + // TODO: fix this to be a proper transfer object (HRT-13110) + using Transfer = std::function; + + static Expected> create(); + TransferLauncher(); + ~TransferLauncher(); + + TransferLauncher(TransferLauncher &&) = delete; + TransferLauncher(const TransferLauncher &) = delete; + TransferLauncher &operator=(TransferLauncher &&) = delete; + TransferLauncher &operator=(const TransferLauncher &) = delete; + + hailo_status enqueue_transfer(Transfer &&transfer); + hailo_status start(); + hailo_status stop(); + +private: + void worker_thread(); + void signal_thread_quit(); + + std::mutex m_mutex; + std::condition_variable m_cond; + // TODO: use SpscQueue (HRT-10554) + std::queue m_queue; + // m_should_quit is used to quit the thread (called on destruction) + bool m_should_quit; + bool m_thread_active; + std::thread m_worker_thread; +}; + +} /* namespace vdma */ +} /* namespace hailort */ + +#endif /* _HAILO_TRANSFER_LAUNCHER_HPP_ */ diff --git a/hailort/libhailort/src/vdma/channel/vdma_channel_regs.hpp b/hailort/libhailort/src/vdma/channel/vdma_channel_regs.hpp deleted file mode 100644 index 22990484..00000000 --- a/hailort/libhailort/src/vdma/channel/vdma_channel_regs.hpp +++ /dev/null @@ -1,81 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file vdma_channel_regs.hpp - * @brief utilties used to parse/modify PLDA Vdma channel registers - **/ - -#ifndef _HAILO_VDMA_CHANNEL__REGS_HPP_ -#define _HAILO_VDMA_CHANNEL__REGS_HPP_ - -#include "hw_consts.hpp" -#include "hailo/expected.hpp" -#include "os/hailort_driver.hpp" - -#include - -namespace hailort -{ - -#define DESCPRIPTOR_LIST_MAX_DEPTH (16) - - -inline bool vdma_channel_control_is_aborted(uint8_t control_reg) -{ - return (control_reg & 1) == 0; -} - - -class VdmaChannelRegs final { -public: - VdmaChannelRegs(HailoRTDriver &driver, vdma::ChannelId channel_id, HailoRTDriver::DmaDirection direction) : - m_driver(driver), - m_channel_id(channel_id), - m_direction(direction) - {} - - Expected get_num_available() const - { - return read_integer(VDMA_CHANNEL_NUM_AVAIL_OFFSET); - } - - hailo_status set_num_available(uint16_t value) - { - return write_integer(VDMA_CHANNEL_NUM_AVAIL_OFFSET, value); - } - -#ifndef NDEBUG - Expected is_aborted() const - { - const auto control_reg = read_integer(VDMA_CHANNEL_CONTROL_OFFSET); - CHECK_EXPECTED(control_reg); - return vdma_channel_control_is_aborted(*control_reg); - } -#endif /* NDEBUG */ - -private: - - template - Expected read_integer(uint32_t offset) const - { - auto value = m_driver.read_vdma_channel_register(m_channel_id, m_direction, offset, sizeof(IntegerType)); - CHECK_EXPECTED(value); - return static_cast(value.release()); - } - - template - hailo_status write_integer(uint32_t offset, IntegerType value) - { - return m_driver.write_vdma_channel_register(m_channel_id, m_direction, offset, sizeof(value), value); - } - - HailoRTDriver &m_driver; - const vdma::ChannelId m_channel_id; - const HailoRTDriver::DmaDirection m_direction; -}; - -} /* namespace hailort */ - -#endif /*_HAILO_VDMA_CHANNEL__REGS_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp index f6d8e4b1..6be62879 100644 --- a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp +++ b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp @@ -8,6 +8,7 @@ #include "circular_stream_buffer_pool.hpp" #include "vdma/memory/descriptor_list.hpp" +#include "utils/buffer_storage.hpp" #include "utils.h" @@ -15,36 +16,37 @@ namespace hailort { Expected> CircularStreamBufferPool::create(VdmaDevice &device, - HailoRTDriver::DmaDirection direction, size_t desc_page_size, size_t descs_count, size_t transfer_size) + hailo_dma_buffer_direction_t direction, size_t desc_page_size, size_t descs_count, size_t transfer_size) { // TODO: HRT-11220 calculate desc_count/desc_page_size base on transfer_size and queue_size - CHECK_AS_EXPECTED(is_powerof2(descs_count), HAILO_INTERNAL_FAILURE, "descs_count {} must be power of 2", descs_count); - CHECK_AS_EXPECTED(is_powerof2(desc_page_size), HAILO_INTERNAL_FAILURE, "desc_page_size {} must be power of 2", + CHECK(is_powerof2(descs_count), HAILO_INTERNAL_FAILURE, "descs_count {} must be power of 2", descs_count); + CHECK(is_powerof2(desc_page_size), HAILO_INTERNAL_FAILURE, "desc_page_size {} must be power of 2", desc_page_size); const auto buffer_size = desc_page_size * descs_count; - CHECK_AS_EXPECTED(transfer_size < buffer_size, HAILO_INTERNAL_FAILURE, "Transfer size {} must be smaller than buffer size {}", + CHECK(transfer_size < buffer_size, HAILO_INTERNAL_FAILURE, "Transfer size {} must be smaller than buffer size {}", transfer_size, buffer_size); - auto mapped_buffer = allocate_buffer(device, direction, buffer_size); - CHECK_EXPECTED(mapped_buffer); + TRY(auto base_buffer, allocate_buffer(device, buffer_size)); + TRY(auto mapping, DmaMappedBuffer::create(device, base_buffer.data(), base_buffer.size(), direction)); auto circular_buffer_pool = make_unique_nothrow(desc_page_size, descs_count, - transfer_size, mapped_buffer.release()); - CHECK_NOT_NULL_AS_EXPECTED(circular_buffer_pool, HAILO_OUT_OF_HOST_MEMORY); + transfer_size, std::move(base_buffer), std::move(mapping)); + CHECK_NOT_NULL(circular_buffer_pool, HAILO_OUT_OF_HOST_MEMORY); return circular_buffer_pool; } CircularStreamBufferPool::CircularStreamBufferPool(size_t desc_page_size, size_t descs_count, size_t transfer_size, - BufferPtr &&mapped_buffer) : + Buffer &&base_buffer, DmaMappedBuffer &&mappings) : m_desc_page_size(desc_page_size), m_transfer_size(transfer_size), - m_mapped_buffer(std::move(mapped_buffer)), + m_base_buffer(std::move(base_buffer)), + m_mappings(std::move(mappings)), m_next_enqueue_desc_offset(0) { assert(is_powerof2(descs_count) && (descs_count > 0)); - assert(m_mapped_buffer->size() == (m_desc_page_size * descs_count)); + assert(m_base_buffer.size() == (m_desc_page_size * descs_count)); CB_INIT(m_queue, descs_count); m_queue.head = static_cast(descs_count - 1); } @@ -67,7 +69,7 @@ Expected CircularStreamBufferPool::dequeue() const size_t offset_in_buffer = CB_TAIL(m_queue) * m_desc_page_size; CB_DEQUEUE(m_queue, descs_in_transfer()); return TransferBuffer { - m_mapped_buffer, + MemoryView(m_base_buffer), m_transfer_size, offset_in_buffer }; @@ -78,7 +80,7 @@ hailo_status CircularStreamBufferPool::enqueue(TransferBuffer &&buffer_info) const size_t descs_required = descs_in_transfer(); const size_t descs_available = CB_AVAIL(m_queue, CB_HEAD(m_queue), CB_TAIL(m_queue)); CHECK(descs_available >= descs_required, HAILO_INTERNAL_FAILURE, "Can enqueue without previous dequeue"); - CHECK(buffer_info.base_buffer() == m_mapped_buffer, HAILO_INTERNAL_FAILURE, "Got the wrong buffer"); + CHECK(buffer_info.base_buffer().data() == m_base_buffer.data(), HAILO_INTERNAL_FAILURE, "Got the wrong buffer"); CHECK(buffer_info.size() == m_transfer_size, HAILO_INTERNAL_FAILURE, "Got invalid buffer size {}, expected {}", buffer_info.size(), m_transfer_size); @@ -99,24 +101,14 @@ void CircularStreamBufferPool::reset_pointers() m_next_enqueue_desc_offset = 0; } -Expected CircularStreamBufferPool::allocate_buffer(VdmaDevice &device, - HailoRTDriver::DmaDirection direction, size_t size) +Expected CircularStreamBufferPool::allocate_buffer(VdmaDevice &device, size_t size) { - auto dma_able_buffer = vdma::DmaAbleBuffer::create_by_allocation(size, device.get_driver()); - CHECK_EXPECTED(dma_able_buffer); + TRY(auto dma_able_buffer, vdma::DmaAbleBuffer::create_by_allocation(size, device.get_driver())); - auto dma_storage = make_shared_nothrow(dma_able_buffer.release()); + auto dma_storage = make_shared_nothrow(std::move(dma_able_buffer)); CHECK_NOT_NULL_AS_EXPECTED(dma_storage, HAILO_OUT_OF_HOST_MEMORY); - // TODO HRT-11595: We map the buffer here to avoid mapping buffer during descriptors list creation (it cause - // deadlock on the linux driver). After HRT-11595, we won't need to call dma_map. - auto map_result = dma_storage->dma_map(device, to_hailo_dma_direction(direction)); - CHECK_EXPECTED(map_result); - - auto mapped_buffer = make_shared_nothrow(std::move(dma_storage)); - CHECK_NOT_NULL_AS_EXPECTED(mapped_buffer, HAILO_OUT_OF_HOST_MEMORY); - - return mapped_buffer; + return Buffer::create(dma_storage); } size_t CircularStreamBufferPool::descs_in_transfer() const diff --git a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp index 04fd9028..4fd87653 100644 --- a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp +++ b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp @@ -14,6 +14,7 @@ #include "common/circular_buffer.hpp" #include "stream_common/stream_buffer_pool.hpp" #include "vdma/vdma_device.hpp" +#include "hailo/dma_mapped_buffer.hpp" #include @@ -29,10 +30,10 @@ namespace hailort class CircularStreamBufferPool final : public StreamBufferPool { public: static Expected> create(VdmaDevice &device, - HailoRTDriver::DmaDirection direction, size_t desc_page_size, size_t descs_count, size_t transfer_size); + hailo_dma_buffer_direction_t direction, size_t desc_page_size, size_t descs_count, size_t transfer_size); CircularStreamBufferPool(size_t desc_page_size, size_t descs_count, size_t transfer_size, - BufferPtr &&mapped_buffer); + Buffer &&base_buffer, DmaMappedBuffer &&mappings); virtual size_t max_queue_size() const override; size_t buffers_ready_to_dequeue() const; @@ -41,13 +42,12 @@ class CircularStreamBufferPool final : public StreamBufferPool { virtual hailo_status enqueue(TransferBuffer &&buffer_info) override; - BufferPtr get_mapped_buffer() { return m_mapped_buffer; } + Buffer &get_base_buffer() { return m_base_buffer; } virtual void reset_pointers() override; private: - static Expected allocate_buffer(VdmaDevice &device, - HailoRTDriver::DmaDirection direction, size_t size); + static Expected allocate_buffer(VdmaDevice &device, size_t size); size_t descs_in_transfer() const; @@ -57,7 +57,8 @@ class CircularStreamBufferPool final : public StreamBufferPool { const size_t m_transfer_size; // m_mapped_buffer.size() must be CB_SIZE(m_queue) * m_desc_page_size - BufferPtr m_mapped_buffer; + Buffer m_base_buffer; + DmaMappedBuffer m_mappings; // Head/tail based queue that manages the buffer pool. // The head and tail are in m_desc_page_size granularity. diff --git a/hailort/libhailort/src/vdma/dma_mapped_buffer.cpp b/hailort/libhailort/src/vdma/dma_mapped_buffer.cpp new file mode 100644 index 00000000..b770b921 --- /dev/null +++ b/hailort/libhailort/src/vdma/dma_mapped_buffer.cpp @@ -0,0 +1,93 @@ +/** + * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file dma_mapped_buffer.cpp + **/ + +#include "hailo/dma_mapped_buffer.hpp" +#include "hailo/hailort.h" +#include "hailo/vdevice.hpp" + +#include "common/logger_macros.hpp" +#include "common/utils.hpp" + +namespace hailort +{ + +class DmaMappedBuffer::Impl final { +public: + Impl(VDevice &vdevice, void *address, size_t size, hailo_dma_buffer_direction_t direction, hailo_status &status) + { + create_mapping(vdevice, address, size, direction, status); + } + + Impl(Device &device, void *address, size_t size, hailo_dma_buffer_direction_t direction, hailo_status &status) + { + create_mapping(device, address, size, direction, status); + } + + ~Impl() + { + if (m_unmap) { + m_unmap(); + } + } + + Impl(const Impl&) = delete; + Impl& operator=(const Impl&) = delete; + +private: + + template + void create_mapping(DeviceType &device, void *address, size_t size, hailo_dma_buffer_direction_t direction, hailo_status &status) { + status = device.dma_map(address, size, direction); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to map dma buffer, status: {}", status); + return; + } + + m_unmap = [&device, address, size, direction]() { + auto status = device.dma_unmap(address, size, direction); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to unmap dma buffer, status: {}", status); + } + }; + } + + std::function m_unmap; +}; + +Expected DmaMappedBuffer::create(VDevice &vdevice, void *user_address, size_t size, + hailo_dma_buffer_direction_t direction) { + + hailo_status status = HAILO_UNINITIALIZED; + std::unique_ptr impl(new (std::nothrow) Impl(vdevice, user_address, size, direction, status)); + CHECK_NOT_NULL_AS_EXPECTED(impl, HAILO_OUT_OF_HOST_MEMORY); + CHECK_SUCCESS_AS_EXPECTED(status); + + return Expected(DmaMappedBuffer{std::move(impl)}); +} + +Expected DmaMappedBuffer::create(Device &device, void *user_address, size_t size, + hailo_dma_buffer_direction_t direction) { + + hailo_status status = HAILO_UNINITIALIZED; + std::unique_ptr impl(new (std::nothrow) Impl(device, user_address, size, direction, status)); + CHECK_NOT_NULL_AS_EXPECTED(impl, HAILO_OUT_OF_HOST_MEMORY); + CHECK_SUCCESS_AS_EXPECTED(status); + + return Expected(DmaMappedBuffer{std::move(impl)}); +} + +// Defined in cpp since Impl definition is needed. +DmaMappedBuffer::~DmaMappedBuffer() = default; +DmaMappedBuffer::DmaMappedBuffer(DmaMappedBuffer &&) = default; +DmaMappedBuffer &DmaMappedBuffer::operator=(DmaMappedBuffer &&) = default; + +DmaMappedBuffer::DmaMappedBuffer(std::unique_ptr impl) : + m_impl(std::move(impl)) +{} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/os/posix/hailort_driver.cpp b/hailort/libhailort/src/vdma/driver/hailort_driver.cpp similarity index 62% rename from hailort/libhailort/src/os/posix/hailort_driver.cpp rename to hailort/libhailort/src/vdma/driver/hailort_driver.cpp index c3720fc8..36f0c204 100755 --- a/hailort/libhailort/src/os/posix/hailort_driver.cpp +++ b/hailort/libhailort/src/vdma/driver/hailort_driver.cpp @@ -1,22 +1,29 @@ -#include "os/hailort_driver.hpp" -#include "os/driver_scan.hpp" -#include "hailo_ioctl_common.h" +/** + * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file hailort_driver.cpp + * @brief Low level interface to PCI driver + **/ + +#include "vdma/driver/hailort_driver.hpp" +#include "vdma/driver/os/driver_os_specific.hpp" + #include "common/logger_macros.hpp" #include "common/utils.hpp" +#include "hailo_ioctl_common.h" -#include -#include +#if defined(__linux__) #include -#include -#include -#include +#elif defined(__QNX__) #include -#include -#include -#include -#include -#include -#include +#include +#elif defined(_WIN32) +#pragma comment(lib, "cfgmgr32.lib") +#else +#error "unsupported platform!" +#endif namespace hailort { @@ -24,6 +31,20 @@ namespace hailort static_assert(VDMA_CHANNELS_PER_ENGINE == MAX_VDMA_CHANNELS_PER_ENGINE, "Driver and libhailort parameters mismatch"); static_assert(MAX_VDMA_ENGINES == MAX_VDMA_ENGINES_COUNT, "Driver and libhailort parameters mismatch"); static_assert(MIN_D2H_CHANNEL_INDEX == VDMA_DEST_CHANNELS_START, "Driver and libhailort parameters mismatch"); +static_assert(ONGOING_TRANSFERS_SIZE == HAILO_VDMA_MAX_ONGOING_TRANSFERS, "Driver and libhailort parameters mismatch"); +static_assert(MAX_IRQ_TIMESTAMPS_SIZE == CHANNEL_IRQ_TIMESTAMPS_SIZE, "Driver and libhailort parameters mismatch"); + +static_assert(static_cast(InterruptsDomain::NONE) == HAILO_VDMA_INTERRUPTS_DOMAIN_NONE, "Driver and libhailort parameters mismatch"); +static_assert(static_cast(InterruptsDomain::HOST) == HAILO_VDMA_INTERRUPTS_DOMAIN_HOST, "Driver and libhailort parameters mismatch"); +static_assert(static_cast(InterruptsDomain::DEVICE) == HAILO_VDMA_INTERRUPTS_DOMAIN_DEVICE, "Driver and libhailort parameters mismatch"); +static_assert(static_cast(InterruptsDomain::BOTH) == + (HAILO_VDMA_INTERRUPTS_DOMAIN_DEVICE | HAILO_VDMA_INTERRUPTS_DOMAIN_HOST), "Driver and libhailort parameters mismatch"); + + +#define CHECK_IOCTL_RESULT(err, message) do { \ + auto __err = (err); \ + CHECK(0 == __err, HAILO_DRIVER_FAIL, message " errno: {}", __err); \ + } while (0) static hailo_dma_data_direction direction_to_dma_data_direction(HailoRTDriver::DmaDirection direction) { switch (direction) { @@ -41,7 +62,7 @@ static hailo_dma_data_direction direction_to_dma_data_direction(HailoRTDriver::D } static enum hailo_cpu_id translate_cpu_id(hailo_cpu_id_t cpu_id) -{ +{ switch (cpu_id) { case HAILO_CPU_ID_0: return HAILO_CPU_ID_CPU0; @@ -87,31 +108,22 @@ static hailo_transfer_memory_type translate_memory_type(HailoRTDriver::MemoryTyp return HAILO_TRANSFER_MEMORY_MAX_ENUM; } -static Expected create_interrupt_timestamp_list( - hailo_vdma_interrupts_read_timestamp_params &inter_data) -{ - CHECK_AS_EXPECTED(inter_data.timestamps_count <= MAX_IRQ_TIMESTAMPS_SIZE, HAILO_DRIVER_FAIL, - "Invalid channel interrupts timestamps count returned {}", inter_data.timestamps_count); - ChannelInterruptTimestampList timestamp_list{}; - - timestamp_list.count = inter_data.timestamps_count; - for (size_t i = 0; i < timestamp_list.count; i++) { - timestamp_list.timestamp_list[i].timestamp = std::chrono::nanoseconds(inter_data.timestamps[i].timestamp_ns); - timestamp_list.timestamp_list[i].desc_num_processed = inter_data.timestamps[i].desc_num_processed; - } - return timestamp_list; -} - // TODO: validate wraparounds for buffer/mapping handles in the driver (HRT-9509) const uintptr_t HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE = INVALID_DRIVER_HANDLE_VALUE; const size_t HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE = INVALID_DRIVER_HANDLE_VALUE; const uint8_t HailoRTDriver::INVALID_VDMA_CHANNEL_INDEX = INVALID_VDMA_CHANNEL; +#if defined(__linux__) || defined(_WIN32) +const vdma_mapped_buffer_driver_identifier HailoRTDriver::INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER = INVALID_DRIVER_HANDLE_VALUE; +#elif __QNX__ +const vdma_mapped_buffer_driver_identifier HailoRTDriver::INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER = -1; +#else +#error "unsupported platform!" +#endif + Expected> HailoRTDriver::create(const DeviceInfo &device_info) { - auto fd = FileDescriptor(open(device_info.dev_path.c_str(), O_RDWR)); - CHECK_AS_EXPECTED(fd >= 0, HAILO_DRIVER_FAIL, - "Failed to open device file {} with error {}", device_info.dev_path, errno); + TRY(auto fd, open_device_file(device_info.dev_path)); hailo_status status = HAILO_UNINITIALIZED; std::unique_ptr driver(new (std::nothrow) HailoRTDriver(device_info, std::move(fd), status)); @@ -121,45 +133,6 @@ Expected> HailoRTDriver::create(const DeviceInfo return driver; } -#if defined(__linux__) -static bool is_blocking_ioctl(unsigned long request) -{ - switch (request) { - case HAILO_VDMA_INTERRUPTS_WAIT: - case HAILO_FW_CONTROL: - case HAILO_READ_NOTIFICATION: - return true; - default: - return false; - } -} - -hailo_status HailoRTDriver::hailo_ioctl(int fd, unsigned long request, void* request_struct, int &error_status) -{ - // We lock m_driver lock on all request but the blocking onces. Read m_driver_lock doc in the header - std::unique_lock lock; - if (!is_blocking_ioctl(request)) { - lock = std::unique_lock(m_driver_lock); - } - - int res = ioctl(fd, request, request_struct); - error_status = errno; - return (res >= 0) ? HAILO_SUCCESS : HAILO_DRIVER_FAIL; -} -#elif defined(__QNX__) -hailo_status HailoRTDriver::hailo_ioctl(int fd, unsigned long request, void* request_struct, int &error_status) -{ - int res = ioctl(fd, static_cast(request), request_struct); - if (0 > res) { - error_status = -res; - return HAILO_DRIVER_FAIL; - } - return HAILO_SUCCESS; -} -#else -#error "Unsupported platform" -#endif - static hailo_status validate_driver_version(const hailo_driver_info &driver_info) { hailo_version_t library_version{}; @@ -179,24 +152,24 @@ HailoRTDriver::HailoRTDriver(const DeviceInfo &device_info, FileDescriptor &&fd, m_device_info(device_info), m_allocate_driver_buffer(false) { - hailo_driver_info driver_info = {}; - int err = 0; - if (HAILO_SUCCESS != (status = hailo_ioctl(m_fd, HAILO_QUERY_DRIVER_INFO, &driver_info, err))) { - LOGGER__ERROR("Failed query driver info, errno {}", err); + hailo_driver_info driver_info{}; + auto err = run_ioctl(HAILO_QUERY_DRIVER_INFO, &driver_info); + if (0 != err) { + LOGGER__ERROR("Failed to query driver info, errno {}", err); + status = HAILO_DRIVER_FAIL; return; } - LOGGER__INFO("Hailo PCIe driver version {}.{}.{}", driver_info.major_version, - driver_info.minor_version, driver_info.revision_version); - status = validate_driver_version(driver_info); if (HAILO_SUCCESS != status) { LOGGER__ERROR("Driver version mismatch, status {}", status); return; } - hailo_device_properties device_properties = {}; - if (HAILO_SUCCESS != (status = hailo_ioctl(m_fd, HAILO_QUERY_DEVICE_PROPERTIES, &device_properties, err))) { + hailo_device_properties device_properties{}; + err = run_ioctl(HAILO_QUERY_DEVICE_PROPERTIES, &device_properties); + if (0 != err) { LOGGER__ERROR("Failed query pcie device properties, errno {}", err); + status = HAILO_DRIVER_FAIL; return; } @@ -226,34 +199,16 @@ HailoRTDriver::HailoRTDriver(const DeviceInfo &device_info, FileDescriptor &&fd, status = HAILO_SUCCESS; } -Expected> HailoRTDriver::read_notification() -{ - hailo_d2h_notification notification_buffer = {}; - - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_READ_NOTIFICATION, ¬ification_buffer, err); - if (HAILO_SUCCESS != status) { - return make_unexpected(HAILO_DRIVER_FAIL); - } - - std::vector notification(notification_buffer.buffer_len); - memcpy(notification.data(), notification_buffer.buffer, notification_buffer.buffer_len); - return notification; -} - -hailo_status HailoRTDriver::disable_notifications() +HailoRTDriver::~HailoRTDriver() { - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_DISABLE_NOTIFICATION, 0, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("HAILO_DISABLE_NOTIFICATION failed with errno: {}", err); - return HAILO_DRIVER_FAIL; + for (const auto &buffer_info : m_mapped_buffer) { + auto status = vdma_buffer_unmap_ioctl(buffer_info.handle); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to unmap buffer handle {} status {}", buffer_info.handle, status); + } } - - return HAILO_SUCCESS; } -#if defined(__linux__) Expected> HailoRTDriver::scan_devices() { auto device_names = list_devices(); @@ -267,74 +222,6 @@ Expected> HailoRTDriver::scan_devices() } return devices_info; } -#elif defined(__QNX__) -Expected> HailoRTDriver::scan_devices() -{ - auto device_names = list_devices(); - CHECK_EXPECTED(device_names, "Failed listing pcie devices"); - - // TODO: HRT-6785 - support multiple devices - currently device_names is vector of one device - in future will be multiple - std::vector devices_info; - uint32_t index = 0; - for (const auto &device_name : device_names.value()) { - auto device_info = query_device_info(device_name, index); - CHECK_EXPECTED(device_info, "failed parsing device info for {}", device_name); - devices_info.push_back(device_info.release()); - index++; - } - return devices_info; -} -#else -static_assert(true, "Error, Unsupported Platform"); -#endif //defined (__linux__) - -Expected HailoRTDriver::read_vdma_channel_register(vdma::ChannelId channel_id, DmaDirection data_direction, - size_t offset, size_t reg_size) -{ - CHECK_AS_EXPECTED(is_valid_channel_id(channel_id), HAILO_INVALID_ARGUMENT, "Invalid channel id {} given", channel_id); - CHECK_AS_EXPECTED(data_direction != DmaDirection::BOTH, HAILO_INVALID_ARGUMENT, "Invalid direction given"); - hailo_vdma_channel_read_register_params params = { - .engine_index = channel_id.engine_index, - .channel_index = channel_id.channel_index, - .direction = direction_to_dma_data_direction(data_direction), - .offset = offset, - .reg_size = reg_size, - .data = 0 - }; - - int err = 0; - auto status = hailo_ioctl(m_fd, HAILO_VDMA_CHANNEL_READ_REGISTER, ¶ms, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("HailoRTDriver::read_vdma_channel_register failed with errno:{}", err); - return make_unexpected(HAILO_DRIVER_FAIL); - } - - return std::move(params.data); -} - -hailo_status HailoRTDriver::write_vdma_channel_register(vdma::ChannelId channel_id, DmaDirection data_direction, - size_t offset, size_t reg_size, uint32_t data) -{ - CHECK(is_valid_channel_id(channel_id), HAILO_INVALID_ARGUMENT, "Invalid channel id {} given", channel_id); - CHECK(data_direction != DmaDirection::BOTH, HAILO_INVALID_ARGUMENT, "Invalid direction given"); - hailo_vdma_channel_write_register_params params = { - .engine_index = channel_id.engine_index, - .channel_index = channel_id.channel_index, - .direction = direction_to_dma_data_direction(data_direction), - .offset = offset, - .reg_size = reg_size, - .data = data - }; - - int err = 0; - auto status = hailo_ioctl(m_fd, HAILO_VDMA_CHANNEL_WRITE_REGISTER, ¶ms, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("HailoRTDriver::write_vdma_channel_register failed with errno:{}", err); - return HAILO_DRIVER_FAIL; - } - - return HAILO_SUCCESS; -} hailo_status HailoRTDriver::read_memory(MemoryType memory_type, uint64_t address, void *buf, size_t size) { @@ -386,97 +273,6 @@ hailo_status HailoRTDriver::write_memory(MemoryType memory_type, uint64_t addres return HAILO_SUCCESS; } -hailo_status HailoRTDriver::read_memory_ioctl(MemoryType memory_type, uint64_t address, void *buf, size_t size) -{ - hailo_memory_transfer_params transfer = { - .transfer_direction = TRANSFER_READ, - .memory_type = translate_memory_type(memory_type), - .address = address, - .count = size, - .buffer = {0} - }; - - if (m_dma_type == DmaType::PCIE) { - CHECK(address < std::numeric_limits::max(), HAILO_INVALID_ARGUMENT, "Address out of range {}", address); - } - - if (size > sizeof(transfer.buffer)) { - LOGGER__ERROR("Invalid size to read, size given {} is larger than max size {}", size, sizeof(transfer.buffer)); - return HAILO_INVALID_ARGUMENT; - } - - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_MEMORY_TRANSFER, &transfer, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("HailoRTDriver::read_memory failed with errno:{}", err); - return HAILO_DRIVER_FAIL; - } - - memcpy(buf, transfer.buffer, transfer.count); - - return HAILO_SUCCESS; -} - -hailo_status HailoRTDriver::write_memory_ioctl(MemoryType memory_type, uint64_t address, const void *buf, size_t size) -{ - hailo_memory_transfer_params transfer = { - .transfer_direction = TRANSFER_WRITE, - .memory_type = translate_memory_type(memory_type), - .address = address, - .count = size, - .buffer = {0} - }; - - if (m_dma_type == DmaType::PCIE) { - CHECK(address < std::numeric_limits::max(), HAILO_INVALID_ARGUMENT, "Address out of range {}", address); - } - - if (size > sizeof(transfer.buffer)) { - LOGGER__ERROR("Invalid size to read, size given {} is larger than max size {}", size, sizeof(transfer.buffer)); - return HAILO_INVALID_ARGUMENT; - } - - memcpy(transfer.buffer, buf, transfer.count); - - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_MEMORY_TRANSFER, &transfer, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("HailoRTDriver::write_memory failed with errno:{}", err); - return HAILO_DRIVER_FAIL; - } - - return HAILO_SUCCESS; -} - -hailo_status HailoRTDriver::vdma_buffer_sync(VdmaBufferHandle handle, DmaSyncDirection sync_direction, - size_t offset, size_t count) -{ -#if defined(__linux__) - hailo_vdma_buffer_sync_params sync_info{ - .handle = handle, - .sync_type = (sync_direction == DmaSyncDirection::TO_HOST) ? HAILO_SYNC_FOR_CPU : HAILO_SYNC_FOR_DEVICE, - .offset = offset, - .count = count - }; - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_BUFFER_SYNC, &sync_info, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("HAILO_VDMA_BUFFER_SYNC failed with errno:{}", err); - return HAILO_DRIVER_FAIL; - } - return HAILO_SUCCESS; -// TODO: HRT-6717 - Remove ifdef when Implement sync ioctl (if determined needed in qnx) -#elif defined( __QNX__) - (void) handle; - (void) sync_direction; - (void) offset; - (void) count; - return HAILO_SUCCESS; -#else -#error "unsupported platform!" -#endif // __linux__ -} - hailo_status HailoRTDriver::vdma_interrupts_enable(const ChannelsBitmap &channels_bitmap, bool enable_timestamps_measure) { CHECK(is_valid_channels_bitmap(channels_bitmap), HAILO_INVALID_ARGUMENT, "Invalid channel bitmap given"); @@ -484,10 +280,7 @@ hailo_status HailoRTDriver::vdma_interrupts_enable(const ChannelsBitmap &channel std::copy(channels_bitmap.begin(), channels_bitmap.end(), params.channels_bitmap_per_engine); params.enable_timestamps_measure = enable_timestamps_measure; - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_INTERRUPTS_ENABLE, ¶ms, err); - CHECK_SUCCESS(status, "Failed to enable vdma interrupts with errno:{}", err); - + CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_INTERRUPTS_ENABLE, ¶ms), "Failed to enabled vdma interrupts"); return HAILO_SUCCESS; } @@ -497,16 +290,25 @@ hailo_status HailoRTDriver::vdma_interrupts_disable(const ChannelsBitmap &channe hailo_vdma_interrupts_disable_params params{}; std::copy(channels_bitmap.begin(), channels_bitmap.end(), params.channels_bitmap_per_engine); - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_INTERRUPTS_DISABLE, ¶ms, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to disable vdma interrupts with errno:{}", err); - return HAILO_DRIVER_FAIL; - } - + CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_INTERRUPTS_DISABLE, ¶ms), "Failed to disable vdma interrupts"); return HAILO_SUCCESS; } +static Expected create_interrupt_timestamp_list( + hailo_vdma_interrupts_read_timestamp_params &inter_data) +{ + CHECK_AS_EXPECTED(inter_data.timestamps_count <= MAX_IRQ_TIMESTAMPS_SIZE, HAILO_DRIVER_FAIL, + "Invalid channel interrupts timestamps count returned {}", inter_data.timestamps_count); + ChannelInterruptTimestampList timestamp_list{}; + + timestamp_list.count = inter_data.timestamps_count; + for (size_t i = 0; i < timestamp_list.count; i++) { + timestamp_list.timestamp_list[i].timestamp = std::chrono::nanoseconds(inter_data.timestamps[i].timestamp_ns); + timestamp_list.timestamp_list[i].desc_num_processed = inter_data.timestamps[i].desc_num_processed; + } + return timestamp_list; +} + static Expected to_irq_data(const hailo_vdma_interrupts_wait_params& params, uint8_t engines_count) { @@ -530,6 +332,7 @@ static Expected to_irq_data(const hailo_vdma_interrupts_wait_params& pa irq.channels_irq_data[i].desc_num_processed = params.irq_data[i].host_num_processed; irq.channels_irq_data[i].host_error = params.irq_data[i].host_error; irq.channels_irq_data[i].device_error = params.irq_data[i].device_error; + irq.channels_irq_data[i].validation_success = params.irq_data[i].validation_success; } return irq; } @@ -540,27 +343,40 @@ Expected HailoRTDriver::vdma_interrupts_wait(const ChannelsBitmap &chan hailo_vdma_interrupts_wait_params params{}; std::copy(channels_bitmap.begin(), channels_bitmap.end(), params.channels_bitmap_per_engine); - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_INTERRUPTS_WAIT, ¶ms, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to wait vdma interrupts with errno:{}", err); - return make_unexpected(HAILO_DRIVER_FAIL); - } + CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_INTERRUPTS_WAIT, ¶ms), "Failed wait vdma interrupts"); return to_irq_data(params, static_cast(m_dma_engines_count)); } Expected HailoRTDriver::vdma_interrupts_read_timestamps(vdma::ChannelId channel_id) { - hailo_vdma_interrupts_read_timestamp_params data{}; - data.engine_index = channel_id.engine_index; - data.channel_index = channel_id.channel_index; + hailo_vdma_interrupts_read_timestamp_params params{}; + params.engine_index = channel_id.engine_index; + params.channel_index = channel_id.channel_index; - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_INTERRUPTS_READ_TIMESTAMPS, &data, err); - CHECK_SUCCESS_AS_EXPECTED(status); + CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_INTERRUPTS_READ_TIMESTAMPS, ¶ms), "Failed read vdma interrupts timestamps"); + + return create_interrupt_timestamp_list(params); +} + +Expected> HailoRTDriver::read_notification() +{ + hailo_d2h_notification notification_buffer{}; + int result = run_ioctl(HAILO_READ_NOTIFICATION, ¬ification_buffer); + if (result != 0) { + LOGGER__DEBUG("Failed read notification, errno {}", result); + return make_unexpected(HAILO_DRIVER_FAIL); + } + + std::vector notification(notification_buffer.buffer_len); + memcpy(notification.data(), notification_buffer.buffer, notification_buffer.buffer_len); + return notification; +} - return create_interrupt_timestamp_list(data); +hailo_status HailoRTDriver::disable_notifications() +{ + CHECK_IOCTL_RESULT(run_ioctl(HAILO_DISABLE_NOTIFICATION, nullptr), "Failed disable notifications"); + return HAILO_SUCCESS; } hailo_status HailoRTDriver::fw_control(const void *request, size_t request_len, const uint8_t request_md5[PCIE_EXPECTED_MD5_LENGTH], @@ -581,15 +397,12 @@ hailo_status HailoRTDriver::fw_control(const void *request, size_t request_len, memcpy(&command.buffer, request, request_len); command.timeout_ms = static_cast(timeout.count()); command.cpu_id = translate_cpu_id(cpu_id); - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_FW_CONTROL, &command, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("HAILO_FW_CONTROL failed with errno:{}", err); - return HAILO_FW_CONTROL_FAILURE; - } + + auto result = run_ioctl(HAILO_FW_CONTROL, &command); + CHECK(result == 0, HAILO_FW_CONTROL_FAILURE, "Failed in fw_control, errno:{}", result); if (*response_len < command.buffer_len) { - LOGGER__ERROR("FW control response len needs to be atleast {} (size given {})", command.buffer_len, *response_len); + LOGGER__ERROR("FW control response len needs to be at least {} (size given {})", command.buffer_len, *response_len); *response_len = command.buffer_len; return HAILO_INSUFFICIENT_BUFFER; } @@ -605,22 +418,15 @@ hailo_status HailoRTDriver::read_log(uint8_t *buffer, size_t buffer_size, size_t CHECK_ARG_NOT_NULL(buffer); CHECK_ARG_NOT_NULL(read_bytes); - hailo_read_log_params params { - .cpu_id = translate_cpu_id(cpu_id), - .buffer = {0}, - .buffer_size = buffer_size, - .read_bytes = 0 - }; + hailo_read_log_params params{}; + params.cpu_id = translate_cpu_id(cpu_id); + params.buffer_size = buffer_size; + params.read_bytes = 0; CHECK(buffer_size <= sizeof(params.buffer), HAILO_DRIVER_FAIL, "Given buffer size {} is bigger than buffer size used to read logs {}", buffer_size, sizeof(params.buffer)); - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_READ_LOG, ¶ms, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to read log with errno:{}", err); - return HAILO_DRIVER_FAIL; - } + CHECK_IOCTL_RESULT(run_ioctl(HAILO_READ_LOG, ¶ms), "Failed to read fw log"); CHECK(params.read_bytes <= sizeof(params.buffer), HAILO_DRIVER_FAIL, "Amount of bytes read from log {} is bigger than size of buffer {}", params.read_bytes, sizeof(params.buffer)); @@ -630,163 +436,472 @@ hailo_status HailoRTDriver::read_log(uint8_t *buffer, size_t buffer_size, size_t return HAILO_SUCCESS; } - + hailo_status HailoRTDriver::reset_nn_core() { - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_RESET_NN_CORE, nullptr, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to reset nn core with errno:{}", err); - return HAILO_DRIVER_FAIL; - } - + CHECK_IOCTL_RESULT(run_ioctl(HAILO_RESET_NN_CORE, nullptr), "Failed reset nn_core"); return HAILO_SUCCESS; } -#if defined(__linux__) Expected HailoRTDriver::vdma_buffer_map(void *user_address, size_t required_size, - DmaDirection data_direction, const vdma_mapped_buffer_driver_identifier &driver_buff_handle) -{ - hailo_vdma_buffer_map_params map_user_buffer_info { - .user_address = user_address, - .size = required_size, - .data_direction = direction_to_dma_data_direction(data_direction), - .allocated_buffer_handle = driver_buff_handle, - .mapped_handle = 0 - }; - - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_BUFFER_MAP, &map_user_buffer_info, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to map user buffer with errno:{}", err); - return make_unexpected(HAILO_DRIVER_FAIL); + DmaDirection data_direction, const vdma_mapped_buffer_driver_identifier &driver_buff_handle) { + + std::unique_lock mapping_lock(m_mapped_buffer_lock); + auto mapped_buffer = std::find_if(m_mapped_buffer.begin(), m_mapped_buffer.end(), + [user_address, required_size, data_direction](const auto& mapped_buffer_info) { + return (mapped_buffer_info.address == user_address) && + (mapped_buffer_info.size == required_size) && + (mapped_buffer_info.direction == data_direction); + }); + if (mapped_buffer != m_mapped_buffer.end()) { + // Buffer already mapped, increase ref count and use it. + assert(mapped_buffer->mapped_count > 0); + CHECK_AS_EXPECTED(mapped_buffer->driver_buff_handle == driver_buff_handle, HAILO_INVALID_ARGUMENT, + "Mapped buffer driver handle {} is different than required handle {}", mapped_buffer->driver_buff_handle, + driver_buff_handle); + + mapped_buffer->mapped_count++; + return Expected(mapped_buffer->handle); + } else { + // Buffer not mapped, map it now + auto handle = vdma_buffer_map_ioctl(user_address, required_size, data_direction, driver_buff_handle); + CHECK_EXPECTED(handle); + + const auto mapping_count = 1; + m_mapped_buffer.emplace_back(MappedBufferInfo { + handle.value(), + user_address, + data_direction, + required_size, + driver_buff_handle, + mapping_count + }); + + return handle.release(); } +} - return VdmaBufferHandle(map_user_buffer_info.mapped_handle); +hailo_status HailoRTDriver::vdma_buffer_unmap(VdmaBufferHandle handle) { + std::unique_lock mapping_lock(m_mapped_buffer_lock); + auto mapped_buffer = std::find_if(m_mapped_buffer.begin(), m_mapped_buffer.end(), + [handle](const auto& mapped_buffer_info) { + return mapped_buffer_info.handle == handle; + }); + CHECK(mapped_buffer != m_mapped_buffer.end(), HAILO_NOT_FOUND, "Mapped buffer handle {} not found", handle); + + assert(mapped_buffer->mapped_count > 0); + mapped_buffer->mapped_count--; + if (mapped_buffer->mapped_count == 0) { + m_mapped_buffer.erase(mapped_buffer); + return vdma_buffer_unmap_ioctl(handle); + } + return HAILO_SUCCESS; } -#elif defined( __QNX__) -Expected HailoRTDriver::vdma_buffer_map(void *user_address, size_t required_size, - DmaDirection data_direction, const vdma_mapped_buffer_driver_identifier &driver_buff_handle) + +hailo_status HailoRTDriver::vdma_buffer_unmap(void *user_address, size_t size, DmaDirection data_direction) { - // Mapping is done by the driver_buff_handle (shm file descriptor), and not by address. - (void)user_address; + std::unique_lock mapping_lock(m_mapped_buffer_lock); + auto mapped_buffer = std::find_if(m_mapped_buffer.begin(), m_mapped_buffer.end(), + [user_address, size, data_direction](const auto& mapped_buffer_info) { + return (mapped_buffer_info.address == user_address) && + (mapped_buffer_info.size == size) && + (mapped_buffer_info.direction == data_direction); + }); + CHECK(mapped_buffer != m_mapped_buffer.end(), HAILO_NOT_FOUND, "Mapped buffer {} {} not found", + user_address, size); + + assert(mapped_buffer->mapped_count > 0); + mapped_buffer->mapped_count--; + if (mapped_buffer->mapped_count == 0) { + const auto handle = mapped_buffer->handle; + m_mapped_buffer.erase(mapped_buffer); + return vdma_buffer_unmap_ioctl(handle); + } + return HAILO_SUCCESS; +} - // Create shared memory handle to send to driver - shm_handle_t shm_handle; - int err = shm_create_handle(driver_buff_handle, m_resource_manager_pid, O_RDWR, - &shm_handle, 0); - if (0 != err) { - LOGGER__ERROR("Error creating shm object handle, errno is: {}", errno); - return make_unexpected(HAILO_INTERNAL_FAILURE); +hailo_status HailoRTDriver::vdma_buffer_sync(VdmaBufferHandle handle, DmaSyncDirection sync_direction, + size_t offset, size_t count) +{ +#ifndef __QNX__ + hailo_vdma_buffer_sync_params sync_info{}; + sync_info.handle = handle; + sync_info.sync_type = (sync_direction == DmaSyncDirection::TO_HOST) ? HAILO_SYNC_FOR_CPU : HAILO_SYNC_FOR_DEVICE; + sync_info.offset = offset; + sync_info.count = count; + CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_BUFFER_SYNC, &sync_info), "Failed sync vdma buffer"); + return HAILO_SUCCESS; +// TODO: HRT-6717 - Remove ifdef when Implement sync ioctl (if determined needed in qnx) +#else /* __QNX__ */ + (void) handle; + (void) sync_direction; + (void) offset; + (void) count; + return HAILO_SUCCESS; +#endif +} + +Expected HailoRTDriver::descriptors_list_create(size_t desc_count, uint16_t desc_page_size, + bool is_circular) +{ + uintptr_t desc_handle = INVALID_DRIVER_HANDLE_VALUE; + uint64_t dma_address = 0; + TRY(std::tie(desc_handle, dma_address), + descriptors_list_create_ioctl(desc_count, desc_page_size, is_circular)); + + auto user_address = descriptors_list_create_mmap(desc_handle, desc_count); + if (!user_address) { + auto status = descriptors_list_release_ioctl(desc_handle); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed releasing descriptors list, status {}", status); + // continue + } + return make_unexpected(user_address.status()); } - hailo_vdma_buffer_map_params map_user_buffer_info { - .shared_memory_handle = shm_handle, - .size = required_size, - .data_direction = direction_to_dma_data_direction(data_direction), - .allocated_buffer_handle = INVALID_DRIVER_HANDLE_VALUE, - .mapped_handle = 0 - }; + return DescriptorsListInfo{desc_handle, dma_address, desc_count, user_address.release()}; +} - // Note: The driver will accept the shm_handle, and will mmap it to its own address space. After the driver maps the - // the shm, calling shm_delete_handle is not needed (but can't harm on the otherhand). - // If the ioctl fails, we can't tell if the shm was mapped or not, so we delete it ourself. - auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_BUFFER_MAP, &map_user_buffer_info, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to map user buffer with errno:{}", err); - shm_delete_handle(shm_handle); - return make_unexpected(HAILO_DRIVER_FAIL); +hailo_status HailoRTDriver::descriptors_list_release(const DescriptorsListInfo &descriptors_list_info) +{ + hailo_status status = HAILO_SUCCESS; + + auto unmap_status = descriptors_list_create_munmap(descriptors_list_info.user_address, descriptors_list_info.desc_count); + if (HAILO_SUCCESS != unmap_status) { + LOGGER__ERROR("Descriptors list unmap failed with {}", unmap_status); + status = unmap_status; + // continue } - return VdmaBufferHandle(map_user_buffer_info.mapped_handle); + auto release_status = descriptors_list_release_ioctl(descriptors_list_info.handle); + if (HAILO_SUCCESS != release_status) { + LOGGER__ERROR("Descriptors list release status failed with {}", release_status); + status = release_status; + // continue + } + + return status; } -#else -#error "unsupported platform!" -#endif // __linux__ -hailo_status HailoRTDriver::vdma_buffer_unmap(VdmaBufferHandle handle) +hailo_status HailoRTDriver::descriptors_list_bind_vdma_buffer(uintptr_t desc_handle, VdmaBufferHandle buffer_handle, + size_t buffer_size, size_t buffer_offset, uint8_t channel_index, uint32_t starting_desc) { - hailo_vdma_buffer_unmap_params unmap_user_buffer_info { - .mapped_handle = handle - }; + hailo_desc_list_bind_vdma_buffer_params config_info{}; + config_info.buffer_handle = buffer_handle; + config_info.buffer_size = buffer_size; + config_info.buffer_offset = buffer_offset; + config_info.desc_handle = desc_handle; + config_info.channel_index = channel_index; + config_info.starting_desc = starting_desc; - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_BUFFER_UNMAP, &unmap_user_buffer_info, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to unmap user buffer with errno:{}", err); - return HAILO_DRIVER_FAIL; + CHECK_IOCTL_RESULT(run_ioctl(HAILO_DESC_LIST_BIND_VDMA_BUFFER, &config_info), "Failed bind buffer to desc list"); + return HAILO_SUCCESS; +} + +Expected HailoRTDriver::launch_transfer(vdma::ChannelId channel_id, uintptr_t desc_handle, + uint32_t starting_desc, const std::vector &transfer_buffers, + bool should_bind, InterruptsDomain first_desc_interrupts, InterruptsDomain last_desc_interrupts) +{ + CHECK(is_valid_channel_id(channel_id), HAILO_INVALID_ARGUMENT, "Invalid channel id {} given", channel_id); + CHECK(transfer_buffers.size() <= ARRAY_ENTRIES(hailo_vdma_launch_transfer_params::buffers), HAILO_INVALID_ARGUMENT, + "Invalid transfer buffers size {} given", transfer_buffers.size()); + + hailo_vdma_launch_transfer_params params{}; + params.engine_index = channel_id.engine_index; + params.channel_index = channel_id.channel_index; + params.desc_handle = desc_handle; + params.starting_desc = starting_desc; + params.buffers_count = static_cast(transfer_buffers.size()); + for (size_t i = 0; i < transfer_buffers.size(); i++) { + params.buffers[i].mapped_buffer_handle = transfer_buffers[i].buffer_handle; + params.buffers[i].offset = static_cast(transfer_buffers[i].offset); + params.buffers[i].size = static_cast(transfer_buffers[i].size); } + params.should_bind = should_bind; + params.first_interrupts_domain = (hailo_vdma_interrupts_domain)first_desc_interrupts; + params.last_interrupts_domain = (hailo_vdma_interrupts_domain)last_desc_interrupts; +#ifdef NDEBUG + params.is_debug = false; +#else + params.is_debug = true; +#endif + + CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_LAUNCH_TRANSFER, ¶ms), "Failed launch transfer"); + return Expected(params.descs_programed); +} + +#if defined(__linux__) +Expected HailoRTDriver::vdma_low_memory_buffer_alloc(size_t size) +{ + hailo_allocate_low_memory_buffer_params params{}; + params.buffer_size = size; + params.buffer_handle = 0; + CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_LOW_MEMORY_BUFFER_ALLOC, ¶ms), "Failed to allocate buffer"); + + return std::move(params.buffer_handle); +} + +hailo_status HailoRTDriver::vdma_low_memory_buffer_free(uintptr_t buffer_handle) +{ + hailo_free_low_memory_buffer_params params{}; + params.buffer_handle = buffer_handle; + CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_LOW_MEMORY_BUFFER_FREE, ¶ms), "Failed to free allocated buffer"); return HAILO_SUCCESS; } -Expected HailoRTDriver::descriptors_list_create(size_t desc_count, bool is_circular) +Expected HailoRTDriver::vdma_continuous_buffer_alloc(size_t size) { - auto handle_to_dma_address_pair = descriptors_list_create_ioctl(desc_count, is_circular); - CHECK_EXPECTED(handle_to_dma_address_pair); + auto handle_to_dma_address_pair = continous_buffer_alloc_ioctl(size); + if (!handle_to_dma_address_pair) { + // Log in continous_buffer_alloc_ioctl + return make_unexpected(handle_to_dma_address_pair.status()); + } const auto desc_handle = handle_to_dma_address_pair->first; const auto dma_address = handle_to_dma_address_pair->second; - auto user_address = descriptors_list_create_mmap(desc_handle, desc_count); + auto user_address = continous_buffer_mmap(desc_handle, size); if (!user_address) { - auto status = descriptors_list_release_ioctl(desc_handle); + auto status = continous_buffer_free_ioctl(desc_handle); if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed releasing descriptors list, status {}", status); + LOGGER__ERROR("Failed releasing conitnous buffer, status {}", status); // continue } return make_unexpected(user_address.status()); } - return DescriptorsListInfo{desc_handle, dma_address, desc_count, user_address.release()}; + return ContinousBufferInfo{desc_handle, dma_address, size, user_address.release()}; } -hailo_status HailoRTDriver::descriptors_list_release(const DescriptorsListInfo &descriptors_list_info) +hailo_status HailoRTDriver::vdma_continuous_buffer_free(const ContinousBufferInfo &buffer_info) { hailo_status status = HAILO_SUCCESS; - auto unmap_status = descriptors_list_create_munmap(descriptors_list_info.user_address, descriptors_list_info.desc_count); + auto unmap_status = continous_buffer_munmap(buffer_info.user_address, buffer_info.size); if (HAILO_SUCCESS != unmap_status) { - LOGGER__ERROR("Descriptors list unmap failed with {}", unmap_status); + LOGGER__ERROR("Continous buffer list unmap failed with {}", unmap_status); status = unmap_status; // continue } - auto release_status = descriptors_list_release_ioctl(descriptors_list_info.handle); + auto release_status = continous_buffer_free_ioctl(buffer_info.handle); if (HAILO_SUCCESS != release_status) { - LOGGER__ERROR("Descriptors list release status failed with {}", release_status); + LOGGER__ERROR("Continous buffer release status failed with {}", release_status); status = release_status; // continue } return status; } +#elif defined(__QNX__) || defined(_WIN32) + +Expected HailoRTDriver::vdma_low_memory_buffer_alloc(size_t /* size */) +{ + LOGGER__ERROR("Low memory buffer not supported for platform"); + return make_unexpected(HAILO_NOT_SUPPORTED); +} + +hailo_status HailoRTDriver::vdma_low_memory_buffer_free(uintptr_t /* buffer_handle */) +{ + LOGGER__ERROR("Low memory buffer not supported for platform"); + return make_unexpected(HAILO_NOT_SUPPORTED); +} + +Expected HailoRTDriver::vdma_continuous_buffer_alloc(size_t /* size */) +{ + LOGGER__ERROR("Continous buffer not supported for platform"); + return make_unexpected(HAILO_NOT_SUPPORTED); +} + +hailo_status HailoRTDriver::vdma_continuous_buffer_free(const ContinousBufferInfo &/* buffer_info */) +{ + LOGGER__ERROR("Continous buffer not supported for platform"); + return HAILO_NOT_SUPPORTED; +} + +#else +#error "unsupported platform!" +#endif + +hailo_status HailoRTDriver::mark_as_used() +{ + hailo_mark_as_in_use_params params{}; + CHECK_IOCTL_RESULT(run_ioctl(HAILO_MARK_AS_IN_USE, ¶ms), "Failed mark as used"); + return params.in_use ? HAILO_DEVICE_IN_USE : HAILO_SUCCESS; +} + +#if defined(__linux__) +static bool is_blocking_ioctl(unsigned long request) +{ + switch (request) { + case HAILO_VDMA_INTERRUPTS_WAIT: + case HAILO_FW_CONTROL: + case HAILO_READ_NOTIFICATION: + return true; + default: + return false; + } +} + +template +int HailoRTDriver::run_ioctl(uint32_t ioctl_code, PointerType param) +{ + // We lock m_driver lock on all request but the blocking onces. Read m_driver_lock doc in the header + std::unique_lock lock; + if (!is_blocking_ioctl(ioctl_code)) { + lock = std::unique_lock(m_driver_lock); + } + + return run_hailo_ioctl(m_fd, ioctl_code, param); +} +#elif defined(__QNX__) || defined(_WIN32) + +template +int HailoRTDriver::run_ioctl(uint32_t ioctl_code, PointerType param) +{ + return run_hailo_ioctl(m_fd, ioctl_code, param); +} +#else +#error "Unsupported platform" +#endif + +hailo_status HailoRTDriver::read_memory_ioctl(MemoryType memory_type, uint64_t address, void *buf, size_t size) +{ + CHECK(size != 0, HAILO_INVALID_ARGUMENT, "Invalid size to read"); + CHECK(buf != nullptr, HAILO_INVALID_ARGUMENT, "Read buffer pointer is NULL"); + + if (m_dma_type == DmaType::PCIE) { + CHECK(address < std::numeric_limits::max(), HAILO_INVALID_ARGUMENT, "Address out of range {}", address); + } + + hailo_memory_transfer_params transfer{}; + transfer.transfer_direction = TRANSFER_READ; + transfer.memory_type = translate_memory_type(memory_type); + transfer.address = address; + transfer.count = size; + memset(transfer.buffer, 0, sizeof(transfer.buffer)); + + CHECK(size <= sizeof(transfer.buffer), HAILO_INVALID_ARGUMENT, + "Invalid size to read, size given {} is larger than max size {}", size, sizeof(transfer.buffer)); + + CHECK_IOCTL_RESULT(run_ioctl(HAILO_MEMORY_TRANSFER, &transfer), "Failed read memory"); + + memcpy(buf, transfer.buffer, transfer.count); + + return HAILO_SUCCESS; +} + +hailo_status HailoRTDriver::write_memory_ioctl(MemoryType memory_type, uint64_t address, const void *buf, size_t size) +{ + CHECK(size != 0, HAILO_INVALID_ARGUMENT, "Invalid size to read"); + CHECK(buf != nullptr, HAILO_INVALID_ARGUMENT, "Read buffer pointer is NULL"); + + if (m_dma_type == DmaType::PCIE) { + CHECK(address < std::numeric_limits::max(), HAILO_INVALID_ARGUMENT, "Address out of range {}", address); + } + + hailo_memory_transfer_params transfer{}; + transfer.transfer_direction = TRANSFER_WRITE; + transfer.memory_type = translate_memory_type(memory_type); + transfer.address = address; + transfer.count = size; + memset(transfer.buffer, 0, sizeof(transfer.buffer)); + + CHECK(size <= sizeof(transfer.buffer), HAILO_INVALID_ARGUMENT, + "Invalid size to write, size given {} is larger than max size {}", size, sizeof(transfer.buffer)); + + memcpy(transfer.buffer, buf, transfer.count); + + CHECK_IOCTL_RESULT(run_ioctl(HAILO_MEMORY_TRANSFER, &transfer), "Failed write memory"); + return HAILO_SUCCESS; +} + +#if defined(__linux__) || defined(_WIN32) +Expected HailoRTDriver::vdma_buffer_map_ioctl(void *user_address, size_t required_size, + DmaDirection data_direction, const vdma_mapped_buffer_driver_identifier &driver_buff_handle) +{ + hailo_vdma_buffer_map_params map_user_buffer_info{}; + map_user_buffer_info.user_address = user_address; + map_user_buffer_info.size = required_size; + map_user_buffer_info.data_direction = direction_to_dma_data_direction(data_direction); + map_user_buffer_info.allocated_buffer_handle = driver_buff_handle; + map_user_buffer_info.mapped_handle = 0; + + CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_BUFFER_MAP, &map_user_buffer_info), "Failed map vdma buffer"); -Expected> HailoRTDriver::descriptors_list_create_ioctl(size_t desc_count, bool is_circular) + return std::move(map_user_buffer_info.mapped_handle); +} +#elif defined(__QNX__) +Expected HailoRTDriver::vdma_buffer_map_ioctl(void *user_address, size_t required_size, + DmaDirection data_direction, const vdma_mapped_buffer_driver_identifier &driver_buff_handle) { + // Mapping is done by the driver_buff_handle (shm file descriptor), and not by address. + (void)user_address; + CHECK(driver_buff_handle != INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER, HAILO_NOT_SUPPORTED, + "On QNX only shared-memory buffers are allowed to be mapped"); + + // Create shared memory handle to send to driver + shm_handle_t shm_handle; + int err = shm_create_handle(driver_buff_handle, m_resource_manager_pid, O_RDWR, + &shm_handle, 0); + if (0 != err) { + LOGGER__ERROR("Error creating shm object handle, errno is: {}", errno); + return make_unexpected(HAILO_INTERNAL_FAILURE); + } + + hailo_vdma_buffer_map_params map_user_buffer_info { + .shared_memory_handle = shm_handle, + .size = required_size, + .data_direction = direction_to_dma_data_direction(data_direction), + .allocated_buffer_handle = INVALID_DRIVER_HANDLE_VALUE, + .mapped_handle = 0 + }; + + // Note: The driver will accept the shm_handle, and will mmap it to its own address space. After the driver maps the + // the shm, calling shm_delete_handle is not needed (but can't harm on the otherhand). + // If the ioctl fails, we can't tell if the shm was mapped or not, so we delete it ourself. + err = run_ioctl(HAILO_VDMA_BUFFER_MAP, &map_user_buffer_info); + if (err != 0) { + LOGGER__ERROR("Failed to map user buffer with errno:{}", err); + shm_delete_handle(shm_handle); + return make_unexpected(HAILO_DRIVER_FAIL); + } + + return VdmaBufferHandle(map_user_buffer_info.mapped_handle); +} +#else +#error "unsupported platform!" +#endif // __linux__ + +hailo_status HailoRTDriver::vdma_buffer_unmap_ioctl(VdmaBufferHandle handle) +{ + hailo_vdma_buffer_unmap_params unmap_user_buffer_info{}; + unmap_user_buffer_info.mapped_handle = handle; + CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_BUFFER_UNMAP, &unmap_user_buffer_info), "Failed unmap vdma buffer"); + return HAILO_SUCCESS; +} + +Expected> HailoRTDriver::descriptors_list_create_ioctl(size_t desc_count, + uint16_t desc_page_size, bool is_circular) +{ + CHECK(is_powerof2(desc_page_size), HAILO_INVALID_ARGUMENT, "Invalid desc page size {}", desc_page_size); + hailo_desc_list_create_params create_desc_info{}; create_desc_info.desc_count = desc_count; + create_desc_info.desc_page_size = desc_page_size; create_desc_info.is_circular = is_circular; - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_DESC_LIST_CREATE, &create_desc_info, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to create descriptors list with errno:{}", err); - return make_unexpected(HAILO_DRIVER_FAIL); - } + CHECK_IOCTL_RESULT(run_ioctl(HAILO_DESC_LIST_CREATE, &create_desc_info), "Failed create desc list"); return std::make_pair(create_desc_info.desc_handle, create_desc_info.dma_address); } hailo_status HailoRTDriver::descriptors_list_release_ioctl(uintptr_t desc_handle) { - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_DESC_LIST_RELEASE, &desc_handle, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to release descriptors list with errno: {}", err); - return HAILO_DRIVER_FAIL; - } - + struct hailo_desc_list_release_params params{}; + params.desc_handle = desc_handle; + CHECK_IOCTL_RESULT(run_ioctl(HAILO_DESC_LIST_RELEASE, ¶ms), "Failed release desc list"); return HAILO_SUCCESS; } @@ -826,12 +941,7 @@ Expected HailoRTDriver::descriptors_list_create_mmap(uintptr_t desc_hand .user_address = nullptr, }; - int err = 0; - auto status = HailoRTDriver::hailo_ioctl(m_fd, HAILO_NON_LINUX_DESC_LIST_MMAP, &map_vdma_list_params, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Mmap descriptors list ioctl failed with errno:{}", err); - return make_unexpected(HAILO_DRIVER_FAIL); - } + CHECK_IOCTL_RESULT(run_ioctl(HAILO_NON_LINUX_DESC_LIST_MMAP, &map_vdma_list_params), "Failed mmap descriptors list"); void *address = mmap(nullptr, buffer_size, PROT_WRITE | PROT_READ | PROT_NOCACHE, MAP_SHARED | MAP_PHYS, NOFD, (off_t)map_vdma_list_params.user_address); @@ -850,119 +960,37 @@ hailo_status HailoRTDriver::descriptors_list_create_munmap(void *address, size_t return HAILO_SUCCESS; } -#else -#error "unsupported platform!" -#endif - -hailo_status HailoRTDriver::descriptors_list_bind_vdma_buffer(uintptr_t desc_handle, VdmaBufferHandle buffer_handle, - uint16_t desc_page_size, uint8_t channel_index, uint32_t starting_desc) +#elif defined(_WIN32) +Expected HailoRTDriver::descriptors_list_create_mmap(uintptr_t desc_handle, size_t desc_count) { - hailo_desc_list_bind_vdma_buffer_params config_info; - config_info.buffer_handle = buffer_handle; - config_info.desc_handle = desc_handle; - config_info.desc_page_size = desc_page_size; - config_info.channel_index = channel_index; - config_info.starting_desc = starting_desc; - - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_DESC_LIST_BIND_VDMA_BUFFER, &config_info, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to bind vdma buffer to descriptors list with errno: {}", err); - return HAILO_DRIVER_FAIL; - } - - return HAILO_SUCCESS; + hailo_non_linux_desc_list_mmap_params params{}; + params.desc_handle = desc_handle; + params.size = desc_count * SIZE_OF_SINGLE_DESCRIPTOR; + CHECK_IOCTL_RESULT(run_ioctl(HAILO_NON_LINUX_DESC_LIST_MMAP, ¶ms), "Failed mmap desc list"); + void *user_address = params.user_address; + return user_address; } -Expected HailoRTDriver::vdma_low_memory_buffer_alloc(size_t size) +hailo_status HailoRTDriver::descriptors_list_create_munmap(void *, size_t ) { - CHECK_AS_EXPECTED(m_allocate_driver_buffer, HAILO_INVALID_OPERATION, - "Tried to allocate buffer from driver even though operation is not supported"); - - hailo_allocate_low_memory_buffer_params allocate_params = { - .buffer_size = size, - .buffer_handle = 0 - }; - - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_LOW_MEMORY_BUFFER_ALLOC, &allocate_params, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to allocate buffer with errno: {}", err); - return make_unexpected(HAILO_DRIVER_FAIL); - } - - return std::move(allocate_params.buffer_handle); -} - -hailo_status HailoRTDriver::vdma_low_memory_buffer_free(uintptr_t buffer_handle) -{ - CHECK(m_allocate_driver_buffer, HAILO_INVALID_OPERATION, - "Tried to free allocated buffer from driver even though operation is not supported"); - - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_LOW_MEMORY_BUFFER_FREE, (void*)buffer_handle, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to free allocated buffer with errno: {}", err); - return HAILO_DRIVER_FAIL; - } - - return HAILO_SUCCESS; + // On windows, the unmap is done on the release ioctl + return HAILO_SUCCESS; } - +#else +#error "unsupported platform!" +#endif #if defined(__linux__) -Expected HailoRTDriver::vdma_continuous_buffer_alloc(size_t size) -{ - auto handle_to_dma_address_pair = continous_buffer_alloc_ioctl(size); - if (!handle_to_dma_address_pair) { - // Log in continous_buffer_alloc_ioctl - return make_unexpected(handle_to_dma_address_pair.status()); - } - - const auto desc_handle = handle_to_dma_address_pair->first; - const auto dma_address = handle_to_dma_address_pair->second; - - auto user_address = continous_buffer_mmap(desc_handle, size); - if (!user_address) { - auto status = continous_buffer_free_ioctl(desc_handle); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed releasing conitnous buffer, status {}", status); - // continue - } - return make_unexpected(user_address.status()); - } - - return ContinousBufferInfo{desc_handle, dma_address, size, user_address.release()}; -} - -hailo_status HailoRTDriver::vdma_continuous_buffer_free(const ContinousBufferInfo &buffer_info) -{ - hailo_status status = HAILO_SUCCESS; - - auto unmap_status = continous_buffer_munmap(buffer_info.user_address, buffer_info.size); - if (HAILO_SUCCESS != unmap_status) { - LOGGER__ERROR("Continous buffer list unmap failed with {}", unmap_status); - status = unmap_status; - // continue - } - - auto release_status = continous_buffer_free_ioctl(buffer_info.handle); - if (HAILO_SUCCESS != release_status) { - LOGGER__ERROR("Continous buffer release status failed with {}", release_status); - status = release_status; - // continue - } - - return status; -} Expected> HailoRTDriver::continous_buffer_alloc_ioctl(size_t size) { - hailo_allocate_continuous_buffer_params params { .buffer_size = size, .buffer_handle = 0, .dma_address = 0 }; + hailo_allocate_continuous_buffer_params params{}; + params.buffer_size = size; + params.buffer_handle = 0; + params.dma_address = 0; - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC, ¶ms, err); - if (HAILO_SUCCESS != status) { + int err = run_ioctl(HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC, ¶ms); + if (err != 0) { if (ENOMEM == err) { LOGGER__WARN("Failed to allocate continuous buffer, size 0x{:x}. This failure means there is not a sufficient amount of CMA memory", size); @@ -975,15 +1003,11 @@ Expected> HailoRTDriver::continous_buffer_alloc_i return std::make_pair(params.buffer_handle, params.dma_address); } -hailo_status HailoRTDriver::continous_buffer_free_ioctl(uintptr_t desc_handle) +hailo_status HailoRTDriver::continous_buffer_free_ioctl(uintptr_t buffer_handle) { - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_CONTINUOUS_BUFFER_FREE, (void*)desc_handle, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to free continuous buffer with errno: {}", err); - return HAILO_DRIVER_FAIL; - } - + hailo_free_continuous_buffer_params params{}; + params.buffer_handle = buffer_handle; + CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_CONTINUOUS_BUFFER_FREE, ¶ms), "Failed free continuous buffer"); return HAILO_SUCCESS; } @@ -1009,41 +1033,8 @@ hailo_status HailoRTDriver::continous_buffer_munmap(void *address, size_t size) return HAILO_SUCCESS; } -#elif defined(__QNX__) - -Expected HailoRTDriver::vdma_continuous_buffer_alloc(size_t /* size */) -{ - LOGGER__ERROR("Continous buffer not supported for platform"); - return make_unexpected(HAILO_NOT_SUPPORTED); -} - -hailo_status HailoRTDriver::vdma_continuous_buffer_free(const ContinousBufferInfo &/* buffer_info */) -{ - LOGGER__ERROR("Continous buffer not supported for platform"); - return HAILO_NOT_SUPPORTED; -} - -#else -#error "unsupported platform!" #endif -hailo_status HailoRTDriver::mark_as_used() -{ - hailo_mark_as_in_use_params params = { - .in_use = false - }; - int err = 0; - auto status = hailo_ioctl(this->m_fd, HAILO_MARK_AS_IN_USE, ¶ms, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to mark device as in use with errno: {}", err); - return HAILO_DRIVER_FAIL; - } - if (params.in_use) { - return HAILO_DEVICE_IN_USE; - } - return HAILO_SUCCESS; -} - bool HailoRTDriver::is_valid_channel_id(const vdma::ChannelId &channel_id) { return (channel_id.engine_index < m_dma_engines_count) && (channel_id.channel_index < MAX_VDMA_CHANNELS_PER_ENGINE); diff --git a/hailort/libhailort/src/os/hailort_driver.hpp b/hailort/libhailort/src/vdma/driver/hailort_driver.hpp similarity index 80% rename from hailort/libhailort/src/os/hailort_driver.hpp rename to hailort/libhailort/src/vdma/driver/hailort_driver.hpp index 50f242f1..b5f99f92 100755 --- a/hailort/libhailort/src/os/hailort_driver.hpp +++ b/hailort/libhailort/src/vdma/driver/hailort_driver.hpp @@ -24,6 +24,8 @@ #include #include #include +#include +#include #ifdef __QNX__ #include @@ -75,6 +77,7 @@ struct ChannelIrqData { uint16_t desc_num_processed; uint8_t host_error; uint8_t device_error; + bool validation_success; }; struct IrqData { @@ -85,7 +88,7 @@ struct IrqData { // Bitmap per engine using ChannelsBitmap = std::array; -#if defined(__linux__) || defined(_MSC_VER) +#if defined(__linux__) || defined(_WIN32) // Unique handle returned from the driver. using vdma_mapped_buffer_driver_identifier = uintptr_t; #elif defined(__QNX__) @@ -93,7 +96,7 @@ using vdma_mapped_buffer_driver_identifier = uintptr_t; using vdma_mapped_buffer_driver_identifier = int; #else #error "unsupported platform!" -#endif // defined(__linux__) || defined(_MSC_VER) +#endif struct DescriptorsListInfo { uintptr_t handle; // Unique identifier for the driver. @@ -109,6 +112,25 @@ struct ContinousBufferInfo { void *user_address; }; +enum class InterruptsDomain +{ + NONE = 0, + DEVICE = 1 << 0, + HOST = 1 << 1, + BOTH = DEVICE | HOST +}; + +inline InterruptsDomain operator|(InterruptsDomain a, InterruptsDomain b) +{ + return static_cast(static_cast(a) | static_cast(b)); +} + +inline InterruptsDomain& operator|=(InterruptsDomain &a, InterruptsDomain b) +{ + a = a | b; + return a; +} + class HailoRTDriver final { public: @@ -157,23 +179,13 @@ class HailoRTDriver final static Expected> create(const DeviceInfo &device_info); -// TODO: HRT-7309 add implementation for Windows -#if defined(__linux__) || defined(__QNX__) - hailo_status hailo_ioctl(int fd, unsigned long request, void* request_struct, int &error_status); -#endif // defined(__linux__) || defined(__QNX__) + ~HailoRTDriver(); static Expected> scan_devices(); hailo_status read_memory(MemoryType memory_type, uint64_t address, void *buf, size_t size); hailo_status write_memory(MemoryType memory_type, uint64_t address, const void *buf, size_t size); - Expected read_vdma_channel_register(vdma::ChannelId channel_id, DmaDirection data_direction, size_t offset, - size_t reg_size); - hailo_status write_vdma_channel_register(vdma::ChannelId channel_id, DmaDirection data_direction, size_t offset, - size_t reg_size, uint32_t data); - - hailo_status vdma_buffer_sync(VdmaBufferHandle buffer, DmaSyncDirection sync_direction, size_t offset, size_t count); - hailo_status vdma_interrupts_enable(const ChannelsBitmap &channels_bitmap, bool enable_timestamps_measure); hailo_status vdma_interrupts_disable(const ChannelsBitmap &channel_id); Expected vdma_interrupts_wait(const ChannelsBitmap &channels_bitmap); @@ -214,15 +226,20 @@ class HailoRTDriver final * Unmaps user buffer mapped using HailoRTDriver::map_buffer. */ hailo_status vdma_buffer_unmap(VdmaBufferHandle handle); + hailo_status vdma_buffer_unmap(void *user_address, size_t size, DmaDirection data_direction); + + hailo_status vdma_buffer_sync(VdmaBufferHandle buffer, DmaSyncDirection sync_direction, size_t offset, size_t count); /** * Allocate vdma descriptors list object that can bind to some buffer. Used for scatter gather vdma. * - * @param[in] desc_count - number of descriptors to allocate. The descriptor max size is DESC_MAX_SIZE. + * @param[in] desc_count - number of descriptors to allocate. The descriptor max size is desc_page_size. + * @param[in] desc_page_size - maximum size of each descriptor. Must be a power of 2. * @param[in] is_circular - if true, the descriptors list can be used in a circular (and desc_count must be power * of 2) */ - Expected descriptors_list_create(size_t desc_count, bool is_circular); + Expected descriptors_list_create(size_t desc_count, uint16_t desc_page_size, + bool is_circular); /** * Frees a vdma descriptors buffer allocated by 'descriptors_list_create'. @@ -233,7 +250,21 @@ class HailoRTDriver final * Configure vdma channel descriptors to point to the given user address. */ hailo_status descriptors_list_bind_vdma_buffer(uintptr_t desc_handle, VdmaBufferHandle buffer_handle, - uint16_t desc_page_size, uint8_t channel_index, uint32_t starting_desc); + size_t buffer_size, size_t buffer_offset, uint8_t channel_index, + uint32_t starting_desc); + + struct TransferBuffer { + VdmaBufferHandle buffer_handle; + size_t offset; + size_t size; + }; + + /** + * Launches some transfer on the given channel. + */ + Expected launch_transfer(vdma::ChannelId channel_id, uintptr_t desc_handle, + uint32_t starting_desc, const std::vector &transfer_buffer, bool should_bind, + InterruptsDomain first_desc_interrupts, InterruptsDomain last_desc_interrupts); Expected vdma_low_memory_buffer_alloc(size_t size); hailo_status vdma_low_memory_buffer_free(uintptr_t buffer_handle); @@ -297,12 +328,21 @@ class HailoRTDriver final static const uintptr_t INVALID_DRIVER_BUFFER_HANDLE_VALUE; static const size_t INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE; static const uint8_t INVALID_VDMA_CHANNEL_INDEX; + static const vdma_mapped_buffer_driver_identifier INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER; private: + template + int run_ioctl(uint32_t ioctl_code, PointerType param); + hailo_status read_memory_ioctl(MemoryType memory_type, uint64_t address, void *buf, size_t size); hailo_status write_memory_ioctl(MemoryType memory_type, uint64_t address, const void *buf, size_t size); - Expected> descriptors_list_create_ioctl(size_t desc_count, bool is_circular); + Expected vdma_buffer_map_ioctl(void *user_address, size_t required_size, + DmaDirection data_direction, const vdma_mapped_buffer_driver_identifier &driver_buff_handle); + hailo_status vdma_buffer_unmap_ioctl(VdmaBufferHandle handle); + + Expected> descriptors_list_create_ioctl(size_t desc_count, uint16_t desc_page_size, + bool is_circular); hailo_status descriptors_list_release_ioctl(uintptr_t desc_handle); Expected descriptors_list_create_mmap(uintptr_t desc_handle, size_t desc_count); hailo_status descriptors_list_create_munmap(void *address, size_t desc_count); @@ -346,6 +386,20 @@ class HailoRTDriver final // Need to refactor the driver lock mechanism and then remove the mutex from here. std::mutex m_driver_lock; #endif + + // TODO HRT-11937: when ioctl is combined, move caching to driver + struct MappedBufferInfo { + VdmaBufferHandle handle; + void *address; + DmaDirection direction; + size_t size; + vdma_mapped_buffer_driver_identifier driver_buff_handle; + size_t mapped_count; + }; + + std::mutex m_mapped_buffer_lock; + std::list m_mapped_buffer; + }; inline hailo_dma_buffer_direction_t to_hailo_dma_direction(HailoRTDriver::DmaDirection dma_direction) @@ -356,6 +410,14 @@ inline hailo_dma_buffer_direction_t to_hailo_dma_direction(HailoRTDriver::DmaDir HAILO_DMA_BUFFER_DIRECTION_MAX_ENUM; } +inline HailoRTDriver::DmaDirection to_hailo_driver_direction(hailo_dma_buffer_direction_t dma_direction) +{ + assert(dma_direction <= HAILO_DMA_BUFFER_DIRECTION_BOTH); + return (dma_direction == HAILO_DMA_BUFFER_DIRECTION_H2D) ? HailoRTDriver::DmaDirection::H2D : + (dma_direction == HAILO_DMA_BUFFER_DIRECTION_D2H) ? HailoRTDriver::DmaDirection::D2H : + HailoRTDriver::DmaDirection::BOTH; +} + } /* namespace hailort */ #endif /* _HAILORT_DRIVER_HPP_ */ diff --git a/hailort/libhailort/src/vdma/driver/os/driver_os_specific.hpp b/hailort/libhailort/src/vdma/driver/os/driver_os_specific.hpp new file mode 100644 index 00000000..01a40d66 --- /dev/null +++ b/hailort/libhailort/src/vdma/driver/os/driver_os_specific.hpp @@ -0,0 +1,71 @@ +/** + * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file driver_os_specific.hpp + * @brief Contains some functions for hailort driver which have OS specific implementation. + **/ + +#ifndef _HAILO_DRIVER_OS_SPECIFIC_HPP_ +#define _HAILO_DRIVER_OS_SPECIFIC_HPP_ + +#include "hailo/expected.hpp" +#include "os/file_descriptor.hpp" +#include "vdma/driver/hailort_driver.hpp" + +#ifdef _WIN32 +#include "hailo_ioctl_common.h" // for tCompatibleHailoIoctlData +#endif + +namespace hailort +{ + +Expected open_device_file(const std::string &path); +Expected> list_devices(); +Expected query_device_info(const std::string &device_name); + +#ifndef _WIN32 + +// Runs the ioctl, returns errno value (or 0 on success) +int run_hailo_ioctl(underlying_handle_t file, uint32_t ioctl_code, void *param); + +#else /* _WIN32 */ + +/** + * On windows, all IOCTLs shares the same structure for input and output (tCompatibleHailoIoctlData). + * To make windows and posix code the same, we need to convert the actual structure type (for example + * hailo_memory_transfer_params) to the compatible structure (tCompatibleHailoIoctlData::Buffer::MemoryTransfer). + * + * This template static class is used to covert to compatible (for input parameters) and from compatible (for output + * parameters). + */ +template +class WindowsIoctlParamCast final { +public: + static tCompatibleHailoIoctlData to_compatible(PointerType param_ptr); + static void from_compatible(const tCompatibleHailoIoctlData& data, PointerType param_ptr); +}; + + +int run_ioctl_compatible_data(underlying_handle_t file, uint32_t ioctl_code, tCompatibleHailoIoctlData& data); + +// Runs the ioctl, returns GetLastError() value (or 0 on success) +template +int run_hailo_ioctl(underlying_handle_t file, uint32_t ioctl_code, PointerType param) +{ + static_assert( + (std::is_pointer::value) || (std::is_same::value), + "run_ioctl is accepting only pointer or nullptr_t as param"); + + tCompatibleHailoIoctlData data = WindowsIoctlParamCast::to_compatible(param); + int result = run_ioctl_compatible_data(file, ioctl_code, data); + WindowsIoctlParamCast::from_compatible(data, param); + return result; +} + +#endif + +} /* namespace hailort */ + +#endif /* _HAILO_DRIVER_OS_SPECIFIC_HPP_ */ diff --git a/hailort/libhailort/src/os/posix/linux/driver_scan.cpp b/hailort/libhailort/src/vdma/driver/os/posix/linux/driver_os_specific.cpp similarity index 76% rename from hailort/libhailort/src/os/posix/linux/driver_scan.cpp rename to hailort/libhailort/src/vdma/driver/os/posix/linux/driver_os_specific.cpp index 6ba7daed..f66c6e2c 100644 --- a/hailort/libhailort/src/os/posix/linux/driver_scan.cpp +++ b/hailort/libhailort/src/vdma/driver/os/posix/linux/driver_os_specific.cpp @@ -3,14 +3,21 @@ * Distributed under the MIT license (https://opensource.org/licenses/MIT) **/ /** - * @file driver_scan.cpp - * @brief Parse pcie driver sysfs + * @file driver_os_specific.cpp + * @brief Implementation for linux. **/ -#include "os/driver_scan.hpp" +#include "vdma/driver/os/driver_os_specific.hpp" + +#include "common/utils.hpp" + #include #include #include +#include +#include +#include +#include namespace hailort { @@ -18,6 +25,13 @@ namespace hailort #define HAILO_CLASS_PATH ("/sys/class/hailo_chardev") #define HAILO_BOARD_LOCATION_FILENAME ("board_location") +Expected open_device_file(const std::string &path) +{ + int fd = open(path.c_str(), O_RDWR); + CHECK(fd >= 0, HAILO_DRIVER_FAIL, + "Failed to open device file {} with error {}", path, errno); + return FileDescriptor(fd); +} Expected> list_devices() { @@ -66,4 +80,9 @@ Expected query_device_info(const std::string &device_ return device_info; } +int run_hailo_ioctl(underlying_handle_t file, uint32_t ioctl_code, void *param) { + int res = ioctl(file, ioctl_code, param); + return (res < 0) ? errno : 0; +} + } /* namespace hailort */ diff --git a/hailort/libhailort/src/os/posix/qnx/driver_scan.cpp b/hailort/libhailort/src/vdma/driver/os/posix/qnx/driver_os_specific.cpp similarity index 75% rename from hailort/libhailort/src/os/posix/qnx/driver_scan.cpp rename to hailort/libhailort/src/vdma/driver/os/posix/qnx/driver_os_specific.cpp index 5c422ebd..99dc2bf0 100644 --- a/hailort/libhailort/src/os/posix/qnx/driver_scan.cpp +++ b/hailort/libhailort/src/vdma/driver/os/posix/qnx/driver_os_specific.cpp @@ -3,12 +3,17 @@ * Distributed under the MIT license (https://opensource.org/licenses/MIT) **/ /** - * @file driver_scan.cpp - * @brief Get list and parse pcie driver info + * @file driver_os_specific.cpp + * @brief Implementation for QNX. **/ -#include "os/driver_scan.hpp" +#include "vdma/driver/os/driver_os_specific.hpp" #include +#include +#include +#include +#include + extern "C" { #include } @@ -21,6 +26,14 @@ namespace hailort // Every device name will start with "hailo" #define HAILO_PCIE_DEVICE_NAME_PREFIX ("hailo") +Expected open_device_file(const std::string &path) +{ + int fd = open(path.c_str(), O_RDWR); + CHECK(fd >= 0, HAILO_DRIVER_FAIL, + "Failed to open device file {} with error {}", path, errno); + return FileDescriptor(fd); +} + Expected> list_devices() { DIR *dir_iter = opendir(HAILO_PCIE_CLASS_PATH); @@ -54,11 +67,12 @@ Expected> list_devices() return devices; } -Expected query_device_info(const std::string &device_name, uint32_t index) +Expected query_device_info(const std::string &device_name) { HailoRTDriver::DeviceInfo dev_info = {}; - // pci_device_find finds all relevant devices - find specific using index + // Multiple devices not supported on QNX + const auto index = 0; pci_bdf_t pci_dev = pci_device_find(index, HAILO_VENDOR_ID, PCI_DID_ANY, PCI_CCODE_ANY); if (PCI_BDF_NONE == pci_dev) { LOGGER__ERROR("Error finding relevant device"); @@ -71,4 +85,9 @@ Expected query_device_info(const std::string &device_ return dev_info; } +int run_hailo_ioctl(underlying_handle_t file, uint32_t ioctl_code, void *param) { + int res = ioctl(file, ioctl_code, param); + return (res < 0) ? -res : 0; +} + } /* namespace hailort */ diff --git a/hailort/libhailort/src/os/windows/driver_scan.cpp b/hailort/libhailort/src/vdma/driver/os/windows/driver_os_specific.cpp similarity index 54% rename from hailort/libhailort/src/os/windows/driver_scan.cpp rename to hailort/libhailort/src/vdma/driver/os/windows/driver_os_specific.cpp index cec1bb6c..04e8c239 100644 --- a/hailort/libhailort/src/os/windows/driver_scan.cpp +++ b/hailort/libhailort/src/vdma/driver/os/windows/driver_os_specific.cpp @@ -3,16 +3,17 @@ * Distributed under the MIT license (https://opensource.org/licenses/MIT) **/ /** - * @file driver_scan.cpp - * @brief Get list and parse pcie driver info + * @file driver_os_specific.cpp + * @brief Implementation for windows. */ +#include "vdma/driver/os/driver_os_specific.hpp" + #include "os/windows/osdep.hpp" #include "common/logger_macros.hpp" #include "common/utils.hpp" #include "common/os/windows/string_conversion.hpp" -#include "os/driver_scan.hpp" -#include "../../../../drivers/win/include/Public.h" +#include "hailo_ioctl_common.h" namespace hailort { @@ -140,6 +141,22 @@ CDeviceInterfaceProperty::CDeviceInterfaceProperty( PostProcess(cr); } +Expected open_device_file(const std::string &dev_path) +{ + auto handle = CreateFileA( + dev_path.c_str(), + GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_READ, + NULL, + OPEN_EXISTING, + FILE_FLAG_OVERLAPPED, + NULL); + CHECK(handle != INVALID_HANDLE_VALUE, HAILO_DRIVER_FAIL, "Failed creating hailo driver file {}, error {}", + dev_path, GetLastError()); + + return FileDescriptor(handle); +} + Expected> list_devices() { GUID guid = GUID_DEVINTERFACE_HailoKM; @@ -205,4 +222,85 @@ Expected query_device_info(const std::string &device_ return device_info; } +/** + * To reduce boilerplate code, we use the COMPATIBLE_PARAM_CAST macro to generate the template specialization for each + * parameter type. The macro accept the struct type and its member name in the compatible structure. + */ +#define COMPATIBLE_PARAM_CAST(ParamType, NameInCompatible) \ + template<> \ + tCompatibleHailoIoctlData WindowsIoctlParamCast::to_compatible(ParamType * param_ptr) { \ + tCompatibleHailoIoctlData data{}; \ + data.Buffer.NameInCompatible = *(param_ptr); \ + return data; \ + } \ + \ + template<> \ + void WindowsIoctlParamCast::from_compatible(const tCompatibleHailoIoctlData &data, \ + ParamType *param_ptr) { \ + *(param_ptr) = data.Buffer.NameInCompatible; \ + } + +COMPATIBLE_PARAM_CAST(hailo_memory_transfer_params, MemoryTransfer); +COMPATIBLE_PARAM_CAST(hailo_vdma_interrupts_enable_params, VdmaInterruptsEnable) +COMPATIBLE_PARAM_CAST(hailo_vdma_interrupts_disable_params, VdmaInterruptsDisable) +COMPATIBLE_PARAM_CAST(hailo_vdma_interrupts_read_timestamp_params, VdmaInterruptsReadTimestamps) +COMPATIBLE_PARAM_CAST(hailo_vdma_interrupts_wait_params, VdmaInterruptsWait) +COMPATIBLE_PARAM_CAST(hailo_vdma_buffer_sync_params, VdmaBufferSync) +COMPATIBLE_PARAM_CAST(hailo_fw_control, FirmwareControl) +COMPATIBLE_PARAM_CAST(hailo_vdma_buffer_map_params, VdmaBufferMap) +COMPATIBLE_PARAM_CAST(hailo_vdma_buffer_unmap_params, VdmaBufferUnmap) +COMPATIBLE_PARAM_CAST(hailo_desc_list_create_params, DescListCreate) +COMPATIBLE_PARAM_CAST(hailo_desc_list_release_params, DescListReleaseParam) +COMPATIBLE_PARAM_CAST(hailo_desc_list_bind_vdma_buffer_params, DescListBind) +COMPATIBLE_PARAM_CAST(hailo_d2h_notification, D2HNotification) +COMPATIBLE_PARAM_CAST(hailo_device_properties, DeviceProperties) +COMPATIBLE_PARAM_CAST(hailo_driver_info, DriverInfo) +COMPATIBLE_PARAM_CAST(hailo_non_linux_desc_list_mmap_params, DescListMmap) +COMPATIBLE_PARAM_CAST(hailo_read_log_params, ReadLog) +COMPATIBLE_PARAM_CAST(hailo_mark_as_in_use_params, MarkAsInUse) +COMPATIBLE_PARAM_CAST(hailo_vdma_launch_transfer_params, LaunchTransfer) + +// Special handle for nullptr_t. This case occurs when there is no parameters passed. +tCompatibleHailoIoctlData WindowsIoctlParamCast::to_compatible(nullptr_t data) +{ + (void) data; + return tCompatibleHailoIoctlData{}; +} + +void WindowsIoctlParamCast::from_compatible(const tCompatibleHailoIoctlData &compatible, nullptr_t data) +{ + (void) compatible; + (void) data; +} + +int run_ioctl_compatible_data(underlying_handle_t file, uint32_t ioctl_code, tCompatibleHailoIoctlData& data) +{ + data.Parameters.u.value = ioctl_code; + FileDescriptor event = CreateEvent(NULL, true, false, NULL); + if (event == nullptr) { + const auto last_error = GetLastError(); + LOGGER__ERROR("Failed creating event {}", last_error); + return static_cast(last_error); + } + + OVERLAPPED overlapped{}; + RtlZeroMemory(&overlapped, sizeof(overlapped)); + overlapped.hEvent = event; + + ULONG returned = 0; + bool res = DeviceIoControl(file, HAILO_IOCTL_COMPATIBLE, &data, sizeof(data), + &data, sizeof(data), &returned, &overlapped); + if (!res) { + ULONG last_error = GetLastError(); + if (last_error != ERROR_IO_PENDING) { + return static_cast(last_error); + } + if (!GetOverlappedResult(file, &overlapped, &returned, true)) { + return static_cast(GetLastError()); + } + } + + return 0; +} + } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp b/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp index 24c0c72b..ec27e22b 100644 --- a/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp +++ b/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp @@ -8,6 +8,7 @@ #include "buffer_requirements.hpp" #include "vdma/memory/descriptor_list.hpp" +#include "vdma/memory/continuous_edge_layer.hpp" #include "utils.h" #include @@ -15,66 +16,43 @@ namespace hailort { namespace vdma { -// Minimum size of ccb buffers in descriptors, taken from the CCB spec. -static constexpr uint32_t MIN_CCB_DESCRIPTORS_COUNT = 16; - -Expected BufferSizesRequirements::get_sg_buffer_requirements_single_transfer( - uint16_t max_desc_page_size, uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size, - bool is_circular, const bool force_default_page_size, const bool force_batch_size, const bool is_vdma_aligned_buffer) -{ - // First, get the result for the min size - auto results = get_sg_buffer_requirements_multiple_transfers(max_desc_page_size, min_batch_size, - {transfer_size}, is_circular, force_default_page_size, force_batch_size); - CHECK_EXPECTED(results); - - // In order to fetch all descriptors, the amount of active descs is lower by one that the amount - // of descs given (Otherwise we won't be able to determine if the buffer is empty or full). - // Therefore we add 1 in order to compensate. - uint32_t descs_per_transfer = DIV_ROUND_UP(transfer_size, results->desc_page_size()); - if (!is_vdma_aligned_buffer) { - // Add desc for boundary channel because might need extra descriptor for user non aligned buffer async API - descs_per_transfer++; - } - uint32_t descs_count = std::min((descs_per_transfer * max_batch_size) + 1, MAX_DESCS_COUNT); - if (is_circular) { - descs_count = get_nearest_powerof_2(descs_count, MIN_DESCS_COUNT); - } - - return BufferSizesRequirements{ descs_count, results->desc_page_size() }; -} - -Expected BufferSizesRequirements::get_sg_buffer_requirements_multiple_transfers( - uint16_t max_desc_page_size, uint16_t batch_size, const std::vector &transfer_sizes, - bool is_circular, const bool force_default_page_size, const bool force_batch_size) +Expected BufferSizesRequirements::get_buffer_requirements_multiple_transfers( + vdma::VdmaBuffer::Type buffer_type, uint16_t max_desc_page_size, uint16_t batch_size, + const std::vector &transfer_sizes, bool is_circular, bool force_default_page_size, + bool force_batch_size) { - const uint16_t initial_desc_page_size = find_initial_desc_page_size(transfer_sizes, max_desc_page_size, force_default_page_size); - - CHECK_AS_EXPECTED(max_desc_page_size <= MAX_DESC_PAGE_SIZE, HAILO_INTERNAL_FAILURE, + const uint32_t MAX_DESCS_COUNT = (buffer_type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ? + MAX_SG_DESCS_COUNT : MAX_CCB_DESCS_COUNT; + const uint32_t MIN_DESCS_COUNT = (buffer_type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ? + MIN_SG_DESCS_COUNT : MIN_CCB_DESCS_COUNT; + const uint16_t MAX_PAGE_SIZE = (buffer_type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ? + MAX_SG_PAGE_SIZE : MAX_CCB_PAGE_SIZE; + const uint16_t MIN_PAGE_SIZE = (buffer_type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ? + MIN_SG_PAGE_SIZE : MIN_CCB_PAGE_SIZE; + + const uint16_t initial_desc_page_size = find_initial_desc_page_size(buffer_type, transfer_sizes, max_desc_page_size, + force_default_page_size, MIN_PAGE_SIZE); + + CHECK_AS_EXPECTED(max_desc_page_size <= MAX_PAGE_SIZE, HAILO_INTERNAL_FAILURE, "max_desc_page_size given {} is bigger than hw max desc page size {}", - max_desc_page_size, MAX_DESC_PAGE_SIZE); - CHECK_AS_EXPECTED(MIN_DESC_PAGE_SIZE <= max_desc_page_size, HAILO_INTERNAL_FAILURE, + max_desc_page_size, MAX_PAGE_SIZE); + CHECK_AS_EXPECTED(MIN_PAGE_SIZE <= max_desc_page_size, HAILO_INTERNAL_FAILURE, "max_desc_page_size given {} is lower that hw min desc page size {}", - max_desc_page_size, MIN_DESC_PAGE_SIZE); + max_desc_page_size, MIN_PAGE_SIZE); - const uint16_t min_desc_page_size = MIN_DESC_PAGE_SIZE; CHECK_AS_EXPECTED(initial_desc_page_size <= max_desc_page_size, HAILO_INTERNAL_FAILURE, "Initial descriptor page size ({}) is larger than maximum descriptor page size ({})", initial_desc_page_size, max_desc_page_size); - CHECK_AS_EXPECTED(initial_desc_page_size >= min_desc_page_size, HAILO_INTERNAL_FAILURE, + CHECK_AS_EXPECTED(initial_desc_page_size >= MIN_PAGE_SIZE, HAILO_INTERNAL_FAILURE, "Initial descriptor page size ({}) is smaller than minimum descriptor page size ({})", - initial_desc_page_size, min_desc_page_size); + initial_desc_page_size, MIN_PAGE_SIZE); CHECK_AS_EXPECTED(MAX_DESCS_COUNT >= get_required_descriptor_count(transfer_sizes, max_desc_page_size), - HAILO_OUT_OF_DESCRIPTORS, - "Network shapes exceeds driver descriptors capabilities." - "Minimal descriptors count: {}, max allowed on the driver: {}." - "(A common cause for this error could be the large transfer size - which is {}).", - get_required_descriptor_count(transfer_sizes, max_desc_page_size), (MAX_DESCS_COUNT - 1), - std::accumulate(transfer_sizes.begin(), transfer_sizes.end(), 0)); + HAILO_CANT_MEET_BUFFER_REQUIREMENTS); // Defined as uint32_t to prevent overflow (as we multiply it by two in each iteration of the while loop bellow) - uint32_t local_desc_page_size = initial_desc_page_size; + auto local_desc_page_size = static_cast(initial_desc_page_size); - uint32_t descs_count = get_required_descriptor_count(transfer_sizes, initial_desc_page_size); + auto descs_count = get_required_descriptor_count(transfer_sizes, initial_desc_page_size); // Too many descriptors; try a larger desc_page_size which will lead to less descriptors used while ((descs_count * batch_size) > (MAX_DESCS_COUNT - 1)) { CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(local_desc_page_size << 1), HAILO_INTERNAL_FAILURE, @@ -83,11 +61,7 @@ Expected BufferSizesRequirements::get_sg_buffer_require if (local_desc_page_size > max_desc_page_size) { if (force_batch_size) { - LOGGER__ERROR("Network shapes and batch size exceeds driver descriptors capabilities. " - "Required descriptors count: {}, max allowed on the driver: {}. " - "(A common cause for this error could be the batch size - which is {}).", - (batch_size * descs_count), (MAX_DESCS_COUNT - 1), batch_size); - return make_unexpected(HAILO_OUT_OF_DESCRIPTORS); + return make_unexpected(HAILO_CANT_MEET_BUFFER_REQUIREMENTS); } else { // If not forcing minimum batch (It's acceptable to run infer on lower batch instead of returning error) // once reached over the max page size, stop @@ -110,44 +84,63 @@ Expected BufferSizesRequirements::get_sg_buffer_require // The length of a descriptor list is always a power of 2. Therefore, on circular buffers the hw will have to // access all descriptors. descs_count = get_nearest_powerof_2(descs_count, MIN_DESCS_COUNT); - CHECK_AS_EXPECTED(descs_count <= MAX_DESCS_COUNT, HAILO_OUT_OF_DESCRIPTORS); + CHECK_AS_EXPECTED(descs_count <= MAX_DESCS_COUNT, HAILO_CANT_MEET_BUFFER_REQUIREMENTS); } return BufferSizesRequirements{descs_count, desc_page_size}; } -Expected BufferSizesRequirements::get_ccb_buffer_requirements_single_transfer(uint16_t batch_size, - uint32_t transfer_size, bool is_circular) +Expected BufferSizesRequirements::get_buffer_requirements_single_transfer( + vdma::VdmaBuffer::Type buffer_type, uint16_t max_desc_page_size, uint16_t min_batch_size, uint16_t max_batch_size, + uint32_t transfer_size, bool is_circular, bool force_default_page_size, bool force_batch_size, bool is_vdma_aligned_buffer) { - const uint16_t desc_page_size = DEFAULT_DESC_PAGE_SIZE; - const auto desc_per_transfer = DIV_ROUND_UP(transfer_size, desc_page_size); - auto descs_count = desc_per_transfer * batch_size; - descs_count = std::max(descs_count, MIN_CCB_DESCRIPTORS_COUNT); + const uint32_t MAX_DESCS_COUNT = (buffer_type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ? + MAX_SG_DESCS_COUNT : MAX_CCB_DESCS_COUNT; + const uint32_t MIN_DESCS_COUNT = (buffer_type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ? + MIN_SG_DESCS_COUNT : MIN_CCB_DESCS_COUNT; + + // First, get the result for the min size + auto results = get_buffer_requirements_multiple_transfers(buffer_type, max_desc_page_size, + min_batch_size, {transfer_size}, is_circular, force_default_page_size, force_batch_size); + if (HAILO_CANT_MEET_BUFFER_REQUIREMENTS == results.status()) { + // In case of failure to meet requirements, return without error printed to the prompt. + return make_unexpected(HAILO_CANT_MEET_BUFFER_REQUIREMENTS); + } + CHECK_EXPECTED(results); + + uint32_t descs_per_transfer = DIV_ROUND_UP(transfer_size, results->desc_page_size()); + if (!is_vdma_aligned_buffer) { + // Add desc for boundary channel because might need extra descriptor for user non aligned buffer async API + descs_per_transfer++; + } + + // In order to fetch all descriptors, the amount of active descs is lower by one that the amount + // of descs given (Otherwise we won't be able to determine if the buffer is empty or full). + // Therefore we add 1 in order to compensate. + uint32_t descs_count = std::min((descs_per_transfer * max_batch_size) + 1, static_cast(MAX_DESCS_COUNT)); + descs_count = std::max(descs_count, MIN_DESCS_COUNT); if (is_circular) { - // The first 12 channels in D2H CCB ("regular channels") requires that the amount of descriptors will be a power - // of 2. - // We can optimize it by checking that channel index is one of the last 4 channels ("enhanced channels"), or - // even allocate those indexes. - // Meanwhile however, we always use power of 2 - descs_count = get_nearest_powerof_2(descs_count, MIN_CCB_DESCRIPTORS_COUNT); + descs_count = get_nearest_powerof_2(descs_count, MIN_DESCS_COUNT); } - return BufferSizesRequirements{descs_count, desc_page_size}; + return BufferSizesRequirements{ descs_count, results->desc_page_size() }; } - -uint16_t BufferSizesRequirements::find_initial_desc_page_size(const std::vector &transfer_sizes, - const uint16_t max_desc_page_size, const bool force_default_page_size) +uint16_t BufferSizesRequirements::find_initial_desc_page_size( + vdma::VdmaBuffer::Type buffer_type, const std::vector &transfer_sizes, + uint16_t max_desc_page_size, bool force_default_page_size, uint16_t min_page_size) { - const uint16_t channel_max_page_size = std::min(DEFAULT_DESC_PAGE_SIZE, max_desc_page_size); + static const uint16_t DEFAULT_PAGE_SIZE = (buffer_type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ? + DEFAULT_SG_PAGE_SIZE : DEFAULT_CCB_PAGE_SIZE; + const uint16_t channel_max_page_size = std::min(DEFAULT_PAGE_SIZE, max_desc_page_size); const auto max_transfer_size = *std::max_element(transfer_sizes.begin(), transfer_sizes.end()); - // Note: If the pages pointed to by the descriptors are copied in their entirety, then DEFAULT_DESC_PAGE_SIZE - // is the optimal value. For transfer_sizes smaller than DEFAULT_DESC_PAGE_SIZE using smaller descriptor page - // sizes will save memory consuption without harming performance. In the case of nms for example, only one bbox - // is copied from each page. Hence, we'll use MIN_DESC_PAGE_SIZE for nms. + // Note: If the pages pointed to by the descriptors are copied in their entirety, then DEFAULT_PAGE_SIZE + // is the optimal value. For transfer_sizes smaller than DEFAULT_PAGE_SIZE using smaller descriptor page + // sizes will save memory consumption without harming performance. In the case of nms for example, only one bbox + // is copied from each page. Hence, we'll use min_page_size for nms. const auto optimize_low_page_size = ((channel_max_page_size > max_transfer_size) && !force_default_page_size); const uint16_t initial_desc_page_size = optimize_low_page_size ? - static_cast(get_nearest_powerof_2(max_transfer_size, MIN_DESC_PAGE_SIZE)) : + static_cast(get_nearest_powerof_2(max_transfer_size, min_page_size)) : channel_max_page_size; if (channel_max_page_size != initial_desc_page_size) { LOGGER__INFO("Using non-default initial_desc_page_size of {}, due to a small transfer size ({})", diff --git a/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp b/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp index c709887f..7d1126a8 100644 --- a/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp +++ b/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp @@ -12,6 +12,7 @@ #define _HAILO_BUFFER_REQUIREMENTS_HPP_ #include "hailo/expected.hpp" +#include "vdma/memory/vdma_edge_layer.hpp" #include #include @@ -35,19 +36,19 @@ class BufferSizesRequirements final { uint16_t desc_page_size() const { return m_desc_page_size; } uint32_t buffer_size() const { return m_descs_count * m_desc_page_size; } - static Expected get_sg_buffer_requirements_single_transfer(uint16_t max_desc_page_size, - uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size, bool is_circular, - const bool force_default_page_size, const bool force_batch_size, const bool is_vdma_aligned_buffer); - static Expected get_sg_buffer_requirements_multiple_transfers(uint16_t max_desc_page_size, + static Expected get_buffer_requirements_multiple_transfers( + vdma::VdmaBuffer::Type buffer_type, uint16_t max_desc_page_size, uint16_t batch_size, const std::vector &transfer_sizes, bool is_circular, - const bool force_default_page_size, const bool force_batch_size); + bool force_default_page_size, bool force_batch_size); - static Expected get_ccb_buffer_requirements_single_transfer(uint16_t batch_size, - uint32_t transfer_size, bool is_circular); + static Expected get_buffer_requirements_single_transfer( + vdma::VdmaBuffer::Type buffer_type, uint16_t max_desc_page_size, + uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size, bool is_circular, + bool force_default_page_size, bool force_batch_size, bool is_vdma_aligned_buffer); private: - static uint16_t find_initial_desc_page_size(const std::vector &transfer_sizes, const uint16_t max_desc_page_size, - const bool force_default_page_size); + static uint16_t find_initial_desc_page_size(vdma::VdmaBuffer::Type buffer_type, const std::vector &transfer_sizes, + uint16_t max_desc_page_size, bool force_default_page_size, uint16_t min_page_size); static uint32_t get_required_descriptor_count(const std::vector &transfer_sizes, uint16_t desc_page_size); const uint32_t m_descs_count; diff --git a/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp b/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp index f975fe17..c2443f05 100644 --- a/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp +++ b/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp @@ -3,25 +3,20 @@ * Distributed under the MIT license (https://opensource.org/licenses/MIT) **/ /** - * @file continuous_buffer.hpp + * @file continuous_buffer.cpp * @brief Continuous physical vdma buffer. **/ #include "continuous_buffer.hpp" -/* TODO - Support non default CCB page sizes */ -#define CCB_PAGE_SIZE (512) -#define MAX_PAGES_PER_INTERRUPT (0x0003FFFF) -#define MAX_CCB_BUFFER_SIZE (CCB_PAGE_SIZE * MAX_PAGES_PER_INTERRUPT) - namespace hailort { namespace vdma { Expected ContinuousBuffer::create(size_t size, HailoRTDriver &driver) { - if (size > MAX_CCB_BUFFER_SIZE) { - LOGGER__INFO("continious memory size {} must be smaller/equal to {}.", size, MAX_CCB_BUFFER_SIZE); - return make_unexpected(HAILO_OUT_OF_HOST_CMA_MEMORY); + if (size < MIN_CCB_PAGE_SIZE * MIN_CCB_DESCS_COUNT) { + LOGGER__ERROR("continuous memory size ({}) must be larger/equal to {}.", size, (MIN_CCB_PAGE_SIZE * MIN_CCB_DESCS_COUNT)); + return make_unexpected(HAILO_INTERNAL_FAILURE); } auto result = driver.vdma_continuous_buffer_alloc(size); @@ -56,17 +51,6 @@ uint64_t ContinuousBuffer::dma_address() const return m_buffer_info.dma_address; } -uint16_t ContinuousBuffer::desc_page_size() const -{ - // Currently we support only the default desc page size, TODO: HRT-5381 support more desc page size? - return DEFAULT_DESC_PAGE_SIZE; -} - -uint32_t ContinuousBuffer::descs_count() const -{ - return descriptors_in_buffer(m_buffer_info.size); -} - hailo_status ContinuousBuffer::read(void *buf_dst, size_t count, size_t offset) { CHECK((count + offset) <= m_buffer_info.size, HAILO_INSUFFICIENT_BUFFER, @@ -87,17 +71,8 @@ hailo_status ContinuousBuffer::write(const void *buf_src, size_t count, size_t o return HAILO_SUCCESS; } -Expected ContinuousBuffer::program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, - size_t desc_offset) -{ - (void)last_desc_interrupts_domain; - (void)desc_offset; - - // The descriptors in continuous mode are programmed by the hw, nothing to do here. - return descriptors_in_buffer(transfer_size); -} - -ContinuousBuffer::ContinuousBuffer(HailoRTDriver &driver, const ContinousBufferInfo &buffer_info) : +ContinuousBuffer::ContinuousBuffer(HailoRTDriver &driver, + const ContinousBufferInfo &buffer_info) : m_driver(driver), m_buffer_info(buffer_info) {} diff --git a/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp b/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp index a4c109b2..1ad50ac7 100644 --- a/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp +++ b/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp @@ -3,17 +3,22 @@ * Distributed under the MIT license (https://opensource.org/licenses/MIT) **/ /** - * @file continuous_buffer.hpp - * @brief Continuous physical vdma buffer. + * @file continuous_edge_layer.hpp + * @brief Continuous physical vdma edge layer. **/ #ifndef _HAILO_VDMA_CONTINUOUS_BUFFER_HPP_ #define _HAILO_VDMA_CONTINUOUS_BUFFER_HPP_ -#include "os/hailort_driver.hpp" +#include "vdma/driver/hailort_driver.hpp" #include "os/mmap_buffer.hpp" #include "vdma/memory/vdma_buffer.hpp" +#define MAX_CCB_DESCS_COUNT (0x00040000) +#define MIN_CCB_DESCS_COUNT (16u) +#define MAX_CCB_PAGE_SIZE (4096) +#define MIN_CCB_PAGE_SIZE (512) +#define DEFAULT_CCB_PAGE_SIZE (512) namespace hailort { namespace vdma { @@ -41,16 +46,10 @@ class ContinuousBuffer final : public VdmaBuffer { } virtual size_t size() const override; - virtual uint64_t dma_address() const override; - virtual uint16_t desc_page_size() const override; - virtual uint32_t descs_count() const override; - virtual hailo_status read(void *buf_dst, size_t count, size_t offset) override; virtual hailo_status write(const void *buf_src, size_t count, size_t offset) override; - virtual Expected program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, - size_t desc_offset) override; - + uint64_t dma_address() const; private: ContinuousBuffer(HailoRTDriver &driver, const ContinousBufferInfo &buffer_info); diff --git a/hailort/libhailort/src/vdma/memory/continuous_edge_layer.cpp b/hailort/libhailort/src/vdma/memory/continuous_edge_layer.cpp new file mode 100644 index 00000000..d3feb008 --- /dev/null +++ b/hailort/libhailort/src/vdma/memory/continuous_edge_layer.cpp @@ -0,0 +1,70 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file continuous_edge_layer.cpp + * @brief Continuous physical vdma edge layer. + **/ + +#include "continuous_edge_layer.hpp" + +namespace hailort { +namespace vdma { + +Expected ContinuousEdgeLayer::create(std::shared_ptr &&buffer, size_t size, size_t offset, + uint16_t page_size, uint32_t num_pages) +{ + if (num_pages > MAX_CCB_DESCS_COUNT) { + LOGGER__INFO("continuous memory number of pages {} must be smaller/equal to {}.", num_pages, MAX_CCB_DESCS_COUNT); + return make_unexpected(HAILO_INTERNAL_FAILURE); + } + + if (page_size > MAX_CCB_PAGE_SIZE) { + LOGGER__INFO("continuous memory page size {} must be smaller/equal to {}.", page_size, MAX_CCB_PAGE_SIZE); + return make_unexpected(HAILO_INTERNAL_FAILURE); + } + + if (buffer->size() < offset + size) { + LOGGER__ERROR("Edge layer is not fully inside the connected buffer. buffer size is {} while edge layer offset {} and size {}", + buffer->size(), offset, size); + return make_unexpected(HAILO_INTERNAL_FAILURE); + } + + return ContinuousEdgeLayer(std::move(buffer), size, offset, page_size, num_pages); +} + +uint64_t ContinuousEdgeLayer::dma_address() const +{ + return (std::dynamic_pointer_cast(m_buffer))->dma_address() + m_offset; +} + +uint16_t ContinuousEdgeLayer::desc_page_size() const +{ + return m_page_size; +} + +uint32_t ContinuousEdgeLayer::descs_count() const +{ + return m_num_pages; +} + +Expected ContinuousEdgeLayer::program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, + size_t desc_offset) +{ + (void)last_desc_interrupts_domain; + (void)desc_offset; + + // The descriptors in continuous mode are programmed by the hw, nothing to do here. + return descriptors_in_buffer(transfer_size); +} + +ContinuousEdgeLayer::ContinuousEdgeLayer(std::shared_ptr &&buffer, size_t size, size_t offset, + uint16_t page_size, uint32_t num_pages) : + VdmaEdgeLayer(std::move(buffer), size, offset), + m_page_size(page_size), + m_num_pages(num_pages) +{} + +}; /* namespace vdma */ +}; /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/memory/continuous_edge_layer.hpp b/hailort/libhailort/src/vdma/memory/continuous_edge_layer.hpp new file mode 100644 index 00000000..515c4f6e --- /dev/null +++ b/hailort/libhailort/src/vdma/memory/continuous_edge_layer.hpp @@ -0,0 +1,57 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file continuous_edge_layer.hpp + * @brief Continuous physical vdma edge layer. + **/ + +#ifndef _HAILO_VDMA_CONTINUOUS_EDGE_LAYER_HPP_ +#define _HAILO_VDMA_CONTINUOUS_EDGE_LAYER_HPP_ + +#include "vdma/driver/hailort_driver.hpp" +#include "os/mmap_buffer.hpp" +#include "vdma/memory/vdma_edge_layer.hpp" +#include "vdma/memory/continuous_buffer.hpp" + + +namespace hailort { +namespace vdma { + +class ContinuousEdgeLayer final : public VdmaEdgeLayer { +public: + static Expected create(std::shared_ptr &&buffer, size_t size, size_t offset, + uint16_t page_size, uint32_t num_pages); + + virtual ~ContinuousEdgeLayer() = default; + + ContinuousEdgeLayer(const ContinuousEdgeLayer &) = delete; + ContinuousEdgeLayer(ContinuousEdgeLayer &&) = default; + ContinuousEdgeLayer& operator=(const ContinuousEdgeLayer &) = delete; + ContinuousEdgeLayer& operator=(ContinuousEdgeLayer &&) = delete; + + virtual Type type() const override + { + return Type::CONTINUOUS; + } + + virtual uint64_t dma_address() const override; + virtual uint16_t desc_page_size() const override; + virtual uint32_t descs_count() const override; + + virtual Expected program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, + size_t desc_offset) override; + +private: + ContinuousEdgeLayer(std::shared_ptr &&buffer, size_t size, size_t offset, + uint16_t page_size, uint32_t num_pages); + + const uint16_t m_page_size; + const uint32_t m_num_pages; +}; + +}; /* namespace vdma */ +}; /* namespace hailort */ + +#endif /* _HAILO_VDMA_CONTINUOUS_EDGE_LAYER_HPP_ */ diff --git a/hailort/libhailort/src/vdma/memory/descriptor_list.cpp b/hailort/libhailort/src/vdma/memory/descriptor_list.cpp index baf39dd3..2452cb4f 100644 --- a/hailort/libhailort/src/vdma/memory/descriptor_list.cpp +++ b/hailort/libhailort/src/vdma/memory/descriptor_list.cpp @@ -37,8 +37,8 @@ Expected DescriptorList::create(uint32_t desc_count, uint16_t de hailo_status status = HAILO_UNINITIALIZED; assert(desc_page_size <= driver.desc_max_page_size()); - CHECK_AS_EXPECTED(desc_count <= MAX_DESCS_COUNT, HAILO_INVALID_ARGUMENT, - "descs_count {} must be smaller/equal to {}", desc_count, MAX_DESCS_COUNT); + CHECK_AS_EXPECTED(desc_count <= MAX_SG_DESCS_COUNT, HAILO_INVALID_ARGUMENT, + "descs_count {} must be smaller/equal to {}", desc_count, MAX_SG_DESCS_COUNT); DescriptorList object(desc_count, desc_page_size, is_circular, driver, status); if (HAILO_SUCCESS != status) { @@ -63,7 +63,7 @@ DescriptorList::DescriptorList(uint32_t desc_count, uint16_t desc_page_size, boo return; } - auto desc_list_info = m_driver.descriptors_list_create(desc_count, m_is_circular); + auto desc_list_info = m_driver.descriptors_list_create(desc_count, m_desc_page_size, m_is_circular); if (!desc_list_info) { status = desc_list_info.status(); return; @@ -96,15 +96,16 @@ DescriptorList::DescriptorList(DescriptorList &&other) noexcept : m_desc_list_info.user_address = std::exchange(other.m_desc_list_info.user_address, nullptr); } -hailo_status DescriptorList::configure_to_use_buffer(MappedBuffer& buffer, ChannelId channel_id, uint32_t starting_desc) +hailo_status DescriptorList::configure_to_use_buffer(MappedBuffer& buffer, size_t buffer_size, + size_t buffer_offset, ChannelId channel_id, uint32_t starting_desc) { const auto desc_list_capacity = m_desc_page_size * count(); - CHECK(buffer.size() <= desc_list_capacity, HAILO_INVALID_ARGUMENT, + CHECK(buffer_size <= desc_list_capacity, HAILO_INVALID_ARGUMENT, "Can't bind a buffer larger than the descriptor list's capacity. Buffer size {}, descriptor list capacity {}", - buffer.size(), desc_list_capacity); + buffer_size, desc_list_capacity); - return m_driver.descriptors_list_bind_vdma_buffer(m_desc_list_info.handle, buffer.handle(), m_desc_page_size, - channel_id.channel_index, starting_desc); + return m_driver.descriptors_list_bind_vdma_buffer(m_desc_list_info.handle, buffer.handle(), buffer_size, + buffer_offset, channel_id.channel_index, starting_desc); } Expected DescriptorList::program_last_descriptor(size_t transfer_size, @@ -123,7 +124,7 @@ Expected DescriptorList::program_last_descriptor(size_t transfer_size, auto resuide = transfer_size - (required_descriptors - 1) * m_desc_page_size; assert(IS_FIT_IN_UINT16(resuide)); size_t last_desc = (desc_offset + required_descriptors - 1) % count(); - program_single_descriptor((*this)[last_desc], static_cast(resuide), last_desc_interrupts_domain); + program_single_descriptor(last_desc, static_cast(resuide), last_desc_interrupts_domain); return std::move(static_cast(required_descriptors)); } @@ -145,9 +146,9 @@ uint32_t DescriptorList::calculate_descriptors_count(uint32_t buffer_size, uint1 // of descs given (Otherwise we won't be able to determine if the buffer is empty or full). // Therefore we add 1 in order to compensate. uint32_t descs_count = std::min(((descriptors_in_buffer(buffer_size, desc_page_size) * batch_size) + 1), - MAX_DESCS_COUNT); + MAX_SG_DESCS_COUNT); - return get_nearest_powerof_2(descs_count, MIN_DESCS_COUNT); + return get_nearest_powerof_2(descs_count, MIN_SG_DESCS_COUNT); } uint32_t DescriptorList::get_interrupts_bitmask(InterruptsDomain interrupts_domain) @@ -179,9 +180,11 @@ uint32_t DescriptorList::get_interrupts_bitmask(InterruptsDomain interrupts_doma return bitmask; } -void DescriptorList::program_single_descriptor(VdmaDescriptor &descriptor, uint16_t page_size, +void DescriptorList::program_single_descriptor(size_t desc_index, uint16_t page_size, InterruptsDomain interrupts_domain) { + auto &descriptor = (*this)[desc_index]; + // Update the descriptor's PAGE_SIZE field in the control register with the maximum size of the DMA page. // Make all edits to the local variable local_pagesize_desc_ctrl that is on the stack to save read/writes to DDR auto local_pagesize_desc_ctrl = static_cast(page_size << DESC_PAGE_SIZE_SHIFT) & DESC_PAGE_SIZE_MASK; @@ -203,11 +206,5 @@ void DescriptorList::program_single_descriptor(VdmaDescriptor &descriptor, uint1 #endif } -void DescriptorList::clear_descriptor(const size_t desc_index) -{ - // Clear previous descriptor properties - program_single_descriptor((*this)[desc_index], m_desc_page_size, InterruptsDomain::NONE); -} - } /* namespace vdma */ } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/memory/descriptor_list.hpp b/hailort/libhailort/src/vdma/memory/descriptor_list.hpp index de3715ef..7f8222a4 100644 --- a/hailort/libhailort/src/vdma/memory/descriptor_list.hpp +++ b/hailort/libhailort/src/vdma/memory/descriptor_list.hpp @@ -17,8 +17,8 @@ #include "vdma/channel/channel_id.hpp" #include "vdma/memory/mapped_buffer.hpp" +#include "vdma/driver/hailort_driver.hpp" -#include "os/hailort_driver.hpp" #include "os/mmap_buffer.hpp" @@ -26,14 +26,14 @@ namespace hailort { namespace vdma { -#define MAX_DESCS_COUNT (64 * 1024u) -#define MIN_DESCS_COUNT (2u) +#define MAX_SG_DESCS_COUNT (64 * 1024u) +#define MIN_SG_DESCS_COUNT (2u) #define DEFAULT_DESC_COUNT (64 * 1024u) -static_assert(is_powerof2(MAX_DESCS_COUNT), "MAX_DESCS_COUNT must be a power of 2"); -static_assert(is_powerof2(MIN_DESCS_COUNT), "MIN_DESCS_COUNT must be a power of 2"); +static_assert(is_powerof2(MAX_SG_DESCS_COUNT), "MAX_SG_DESCS_COUNT must be a power of 2"); +static_assert(is_powerof2(MIN_SG_DESCS_COUNT), "MIN_SG_DESCS_COUNT must be a power of 2"); static_assert(is_powerof2(DEFAULT_DESC_COUNT), "DEFAULT_DESC_COUNT must be a power of 2"); -static_assert(DEFAULT_DESC_COUNT <= MAX_DESCS_COUNT && DEFAULT_DESC_COUNT >= MIN_DESCS_COUNT, +static_assert(DEFAULT_DESC_COUNT <= MAX_SG_DESCS_COUNT && DEFAULT_DESC_COUNT >= MIN_SG_DESCS_COUNT, "DEFAULT_DESC_COUNT not in range"); // From PLDA's vDMA controller reference: @@ -42,16 +42,16 @@ static_assert(DEFAULT_DESC_COUNT <= MAX_DESCS_COUNT && DEFAULT_DESC_COUNT >= MIN // - G_PAGE_SIZE_MAX dictates the maximum desc page size: // max_page_size = 2 ^ (G_PAGE_SIZE_MAX - 1) // In our case max_page_size = 2 ^ (13 - 1) = 4096 -static constexpr uint16_t MIN_DESC_PAGE_SIZE = 64; -static constexpr uint16_t MAX_DESC_PAGE_SIZE = 4096; -static constexpr uint16_t DEFAULT_DESC_PAGE_SIZE = 512; +static constexpr uint16_t MIN_SG_PAGE_SIZE = 64; +static constexpr uint16_t MAX_SG_PAGE_SIZE = 4096; +static constexpr uint16_t DEFAULT_SG_PAGE_SIZE = 512; -static_assert(is_powerof2(MIN_DESC_PAGE_SIZE), "MIN_DESC_PAGE_SIZE must be a power of 2"); -static_assert(MIN_DESC_PAGE_SIZE > 0, "MIN_DESC_PAGE_SIZE must be larger then 0"); -static_assert(is_powerof2(MAX_DESC_PAGE_SIZE), "MAX_DESC_PAGE_SIZE must be a power of 2"); -static_assert(MAX_DESC_PAGE_SIZE > 0, "MAX_DESC_PAGE_SIZE must be larger then 0"); -static_assert(is_powerof2(DEFAULT_DESC_PAGE_SIZE), "DEFAULT_DESC_PAGE_SIZE must be a power of 2"); -static_assert(DEFAULT_DESC_PAGE_SIZE > 0, "DEFAULT_DESC_PAGE_SIZE must be larger then 0"); +static_assert(is_powerof2(MIN_SG_PAGE_SIZE), "MIN_SG_PAGE_SIZE must be a power of 2"); +static_assert(MIN_SG_PAGE_SIZE > 0, "MIN_SG_PAGE_SIZE must be larger then 0"); +static_assert(is_powerof2(MAX_SG_PAGE_SIZE), "MAX_SG_PAGE_SIZE must be a power of 2"); +static_assert(MAX_SG_PAGE_SIZE > 0, "MAX_SG_PAGE_SIZE must be larger then 0"); +static_assert(is_powerof2(DEFAULT_SG_PAGE_SIZE), "DEFAULT_SG_PAGE_SIZE must be a power of 2"); +static_assert(DEFAULT_SG_PAGE_SIZE > 0, "DEFAULT_SG_PAGE_SIZE must be larger then 0"); static constexpr auto DESCRIPTOR_STATUS_MASK = 0xFF; @@ -87,25 +87,6 @@ struct VdmaDescriptor static_assert(SIZE_OF_SINGLE_DESCRIPTOR == sizeof(VdmaDescriptor), "Invalid size of descriptor"); -enum class InterruptsDomain -{ - NONE = 0, - DEVICE = 1 << 0, - HOST = 1 << 1, - BOTH = DEVICE | HOST -}; - -inline InterruptsDomain operator|(InterruptsDomain a, InterruptsDomain b) -{ - return static_cast(static_cast(a) | static_cast(b)); -} - -inline InterruptsDomain& operator|=(InterruptsDomain &a, InterruptsDomain b) -{ - a = a | b; - return a; -} - inline bool host_interuptes_enabled(InterruptsDomain interrupts_domain) { return 0 != (static_cast(interrupts_domain) & static_cast(InterruptsDomain::HOST)); @@ -164,14 +145,14 @@ class DescriptorList // Map descriptors starting at offset to the start of buffer, wrapping around the descriptor list as needed // On hailo8, we allow configuring buffer without specific channel index (default is INVALID_VDMA_CHANNEL_INDEX). - hailo_status configure_to_use_buffer(MappedBuffer& buffer, ChannelId channel_id, uint32_t starting_desc = 0); + hailo_status configure_to_use_buffer(MappedBuffer& buffer, size_t buffer_size, size_t buffer_offset, + ChannelId channel_id, uint32_t starting_desc = 0); // All descritors are initialized to have size of m_desc_page_size - so all we do is set the last descritor for the // Interrupt - and then after transfer has finished clear the previously used first and last decsriptors. // This saves us write/ reads to the desscriptor list which is DMA memory. Expected program_last_descriptor(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, size_t desc_offset); - void program_single_descriptor(VdmaDescriptor &descriptor, uint16_t page_size, InterruptsDomain interrupts_domain); - void clear_descriptor(const size_t desc_index); + void program_single_descriptor(size_t desc_index, uint16_t page_size, InterruptsDomain interrupts_domain); uint32_t descriptors_in_buffer(size_t buffer_size) const; static uint32_t descriptors_in_buffer(size_t buffer_size, uint16_t desc_page_size); diff --git a/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp b/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp index a03b2e04..e76860f5 100644 --- a/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp +++ b/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp @@ -24,8 +24,6 @@ namespace hailort { namespace vdma { -#if defined(__linux__) || defined(_MSC_VER) - // User buffer. This class does not own the buffer. class UserAllocatedDmaAbleBuffer : public DmaAbleBuffer { public: @@ -52,13 +50,14 @@ class UserAllocatedDmaAbleBuffer : public DmaAbleBuffer { virtual size_t size() const override { return m_size; } virtual void *user_address() override { return m_user_address; } - virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE; } + virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER; } private: const size_t m_size; void *m_user_address; }; +#if defined(__linux__) || defined(_MSC_VER) #if defined(__linux__) class PageAlignedDmaAbleBuffer : public DmaAbleBuffer { @@ -80,7 +79,7 @@ class PageAlignedDmaAbleBuffer : public DmaAbleBuffer { virtual void* user_address() override { return m_mmapped_buffer.address(); } virtual size_t size() const override { return m_mmapped_buffer.size(); } - virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE; } + virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER; } private: // Using mmap instead of aligned_alloc to enable MEM_SHARE flag - used for multi-process fork. @@ -106,7 +105,7 @@ class PageAlignedDmaAbleBuffer : public DmaAbleBuffer { virtual size_t size() const override { return m_memory_guard.size(); } virtual void *user_address() override { return m_memory_guard.address(); } - virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE; } + virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER; } private: VirtualAllocGuard m_memory_guard; @@ -252,11 +251,9 @@ class SharedMemoryDmaAbleBuffer : public DmaAbleBuffer { MmapBuffer m_mmapped_buffer; }; -Expected DmaAbleBuffer::create_from_user_address(void */* user_address */, size_t /* size */) +Expected DmaAbleBuffer::create_from_user_address(void *user_address, size_t size) { - LOGGER__ERROR("Mapping user address is not supported on QNX"); - - return make_unexpected(HAILO_NOT_SUPPORTED); + return UserAllocatedDmaAbleBuffer::create(user_address, size); } Expected DmaAbleBuffer::create_by_allocation(size_t size) diff --git a/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp b/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp index 0123e62b..79f56aa6 100644 --- a/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp +++ b/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp @@ -19,7 +19,7 @@ #define _HAILO_DMA_ABLE_BUFFER_HPP_ #include "hailo/expected.hpp" -#include "os/hailort_driver.hpp" +#include "vdma/driver/hailort_driver.hpp" #include "os/mmap_buffer.hpp" namespace hailort { diff --git a/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp b/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp index 884c15e4..1c2e8fae 100644 --- a/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp +++ b/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp @@ -18,11 +18,11 @@ * so we need to allocate the pages in driver. **/ -#ifndef _HAILO_DMA_MAPPED_BUFFER_HPP_ -#define _HAILO_DMA_MAPPED_BUFFER_HPP_ +#ifndef _HAILO_VDMA_MAPPED_BUFFER_HPP_ +#define _HAILO_VDMA_MAPPED_BUFFER_HPP_ #include "hailo/expected.hpp" -#include "os/hailort_driver.hpp" +#include "vdma/driver/hailort_driver.hpp" #include "vdma/memory/dma_able_buffer.hpp" #include @@ -98,4 +98,4 @@ class MappedBuffer final } /* namespace vdma */ } /* namespace hailort */ -#endif /* _HAILO_DMA_MAPPED_BUFFER_HPP_ */ \ No newline at end of file +#endif /* _HAILO_VDMA_MAPPED_BUFFER_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/vdma/memory/mapping_manager.cpp b/hailort/libhailort/src/vdma/memory/mapping_manager.cpp deleted file mode 100644 index ba2a21d0..00000000 --- a/hailort/libhailort/src/vdma/memory/mapping_manager.cpp +++ /dev/null @@ -1,99 +0,0 @@ -/** - * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) -**/ -/** - * @file mapping_manager.cpp - * @brief DMA mapping registry on a given device - **/ - -#include "mapping_manager.hpp" -#include "hailo/hailort.h" - -namespace hailort { -namespace vdma { - -MappingManager::MappingManager(HailoRTDriver &driver) : - m_driver(driver), - m_mutex(), - m_h2d_mappings(), - m_d2h_mappings() -{} - -hailo_status MappingManager::map_buffer(void *address, size_t size, hailo_stream_direction_t direction) -{ - static const auto CREATE_DMAABLE_BUFFER = nullptr; - auto mapping_result = try_dma_map(CREATE_DMAABLE_BUFFER, address, size, direction); - CHECK_EXPECTED_AS_STATUS(mapping_result); - - const auto new_mapping = mapping_result->second; - return new_mapping ? HAILO_SUCCESS : HAILO_DMA_MAPPING_ALREADY_EXISTS; -} - -hailo_status MappingManager::unmap_buffer(void *address, hailo_stream_direction_t direction) -{ - auto &mappings = get_mapping_storage(direction); - std::lock_guard lock_guard(m_mutex); - auto it = mappings.find(address); - if (it == mappings.end()) { - LOGGER__TRACE("Buffer {} not mapped in direction {}", address, direction); - return HAILO_NOT_FOUND; - } - - mappings.erase(it); - return HAILO_SUCCESS; -} - -Expected> MappingManager::try_dma_map(DmaAbleBufferPtr buffer, - hailo_stream_direction_t direction) -{ - CHECK_ARG_NOT_NULL_AS_EXPECTED(buffer); - - return try_dma_map(buffer, buffer->user_address(), buffer->size(), direction); -} - -Expected> MappingManager::try_dma_map(DmaAbleBufferPtr buffer, - void *address, size_t size, hailo_stream_direction_t direction) -{ - assert((nullptr == buffer) || ((buffer->user_address() == address) && (buffer->size() == size))); - CHECK_ARG_NOT_NULL_AS_EXPECTED(address); - CHECK_AS_EXPECTED(0 < size, HAILO_INVALID_ARGUMENT); - CHECK_AS_EXPECTED(HAILO_STREAM_DIRECTION_MAX_ENUM > direction, HAILO_INVALID_ARGUMENT); - - auto &mappings = get_mapping_storage(direction); - std::lock_guard lock_guard(m_mutex); - if (mappings.end() != mappings.find(address)) { - // Mapping exists - return std::make_pair(mappings[address], false); - } - - // New mapping - if (nullptr == buffer) { - // We only want to create a dma-able buffer if the address hasn't been mapped and we haven't gotten - // a dma-able buffer from the user - auto buffer_exp = DmaAbleBuffer::create_from_user_address(address, size); - CHECK_EXPECTED(buffer_exp); - buffer = buffer_exp.release(); - } - - const auto data_direction = (direction == HAILO_H2D_STREAM) ? - HailoRTDriver::DmaDirection::H2D : - HailoRTDriver::DmaDirection::D2H; - auto mapped_buffer = MappedBuffer::create_shared(buffer, m_driver, data_direction); - CHECK_EXPECTED(mapped_buffer); - - mappings[address] = mapped_buffer.release(); - - return std::make_pair(mappings[address], true); -} - -std::unordered_map &MappingManager::get_mapping_storage(hailo_stream_direction_t direction) -{ - // No point in failing if direction is invalid (i.e. HAILO_STREAM_DIRECTION_MAX_ENUM), - // because the direction is checked before mappings are added (see try_dma_map). So an invalid direction - // will result in the mapping not being found - return (direction == HAILO_H2D_STREAM) ? m_h2d_mappings : m_d2h_mappings; -} - -} /* namespace vdma */ -} /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/memory/mapping_manager.hpp b/hailort/libhailort/src/vdma/memory/mapping_manager.hpp deleted file mode 100644 index a211f9df..00000000 --- a/hailort/libhailort/src/vdma/memory/mapping_manager.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) -**/ -/** - * @file mapping_manager.hpp - * @brief DMA mapping registry on a given device - **/ - -#ifndef _HAILO_MAPPING_MANAGER_HPP_ -#define _HAILO_MAPPING_MANAGER_HPP_ - -#include "hailo/hailort.h" -#include "vdma/memory/mapped_buffer.hpp" -#include "os/hailort_driver.hpp" - -#include -#include -#include - -namespace hailort { -namespace vdma { - -class MappingManager final -{ -public: - MappingManager(HailoRTDriver &driver); - MappingManager(MappingManager &&) = delete; - MappingManager(const MappingManager &) = delete; - MappingManager &operator=(MappingManager &&) = delete; - MappingManager &operator=(const MappingManager &) = delete; - ~MappingManager() = default; - - hailo_status map_buffer(void *address, size_t size, hailo_stream_direction_t direction); - hailo_status unmap_buffer(void *address, hailo_stream_direction_t direction); - // Returns (MappedBufferPtr, true) if the mapping is new - // Returns (MappedBufferPtr, false) if the mapping is pre-existing - Expected> try_dma_map(DmaAbleBufferPtr buffer, hailo_stream_direction_t direction); - -private: - inline std::unordered_map &get_mapping_storage(hailo_stream_direction_t direction); - Expected> try_dma_map(DmaAbleBufferPtr buffer, void *address, size_t size, - hailo_stream_direction_t direction); - - HailoRTDriver &m_driver; - std::mutex m_mutex; - std::unordered_map m_h2d_mappings; - std::unordered_map m_d2h_mappings; -}; - -} /* namespace vdma */ -} /* namespace hailort */ - -#endif /* _HAILO_mapping_manager_HPP_ */ diff --git a/hailort/libhailort/src/vdma/memory/sg_buffer.cpp b/hailort/libhailort/src/vdma/memory/sg_buffer.cpp index 5ef0132b..50fe63e6 100644 --- a/hailort/libhailort/src/vdma/memory/sg_buffer.cpp +++ b/hailort/libhailort/src/vdma/memory/sg_buffer.cpp @@ -3,45 +3,26 @@ * Distributed under the MIT license (https://opensource.org/licenses/MIT) **/ /** - * @file vdma_sg_buffer.cpp + * @file sg_buffer.cpp * @brief Scatter-gather vdma buffer. **/ #include "vdma/memory/sg_buffer.hpp" -#include "vdma/channel/channel_id.hpp" namespace hailort { namespace vdma { -Expected SgBuffer::create(HailoRTDriver &driver, size_t size, uint32_t desc_count, uint16_t desc_page_size, - bool is_circular, HailoRTDriver::DmaDirection data_direction, ChannelId channel_id) +Expected SgBuffer::create(HailoRTDriver &driver, size_t size, HailoRTDriver::DmaDirection data_direction) { - CHECK_AS_EXPECTED(size <= (desc_count * desc_page_size), HAILO_INTERNAL_FAILURE, - "Requested buffer size {} must be smaller than {}", size, (desc_count * desc_page_size)); - CHECK_AS_EXPECTED((size % desc_page_size) == 0, HAILO_INTERNAL_FAILURE, - "SgBuffer size must be a multiple of descriptors page size (size {})", size); - auto mapped_buffer = MappedBuffer::create_shared_by_allocation(size, driver, data_direction); CHECK_EXPECTED(mapped_buffer); - auto desc_list_exp = DescriptorList::create(desc_count, desc_page_size, is_circular, driver); - CHECK_EXPECTED(desc_list_exp); - - auto desc_list = make_shared_nothrow(desc_list_exp.release()); - CHECK_NOT_NULL_AS_EXPECTED(desc_list, HAILO_OUT_OF_HOST_MEMORY); - - assert((desc_count * desc_page_size) <= std::numeric_limits::max()); - - auto status = desc_list->configure_to_use_buffer(*mapped_buffer.value(), channel_id); - CHECK_SUCCESS_AS_EXPECTED(status); - - return SgBuffer(mapped_buffer.release(), desc_list); + return SgBuffer(mapped_buffer.release()); } -SgBuffer::SgBuffer(std::shared_ptr mapped_buffer, std::shared_ptr desc_list) : - m_mapped_buffer(mapped_buffer), - m_desc_list(desc_list) +SgBuffer::SgBuffer(std::shared_ptr mapped_buffer) : + m_mapped_buffer(mapped_buffer) {} size_t SgBuffer::size() const @@ -49,21 +30,6 @@ size_t SgBuffer::size() const return m_mapped_buffer->size(); } -uint64_t SgBuffer::dma_address() const -{ - return m_desc_list->dma_address(); -} - -uint16_t SgBuffer::desc_page_size() const -{ - return m_desc_list->desc_page_size(); -} - -uint32_t SgBuffer::descs_count() const -{ - return static_cast(m_desc_list->count()); -} - hailo_status SgBuffer::read(void *buf_dst, size_t count, size_t offset) { return m_mapped_buffer->read(buf_dst, count, offset); @@ -73,10 +39,9 @@ hailo_status SgBuffer::write(const void *buf_src, size_t count, size_t offset) return m_mapped_buffer->write(buf_src, count, offset); } -Expected SgBuffer::program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, - size_t desc_offset) +std::shared_ptr SgBuffer::get_mapped_buffer() { - return m_desc_list->program_last_descriptor(transfer_size, last_desc_interrupts_domain, desc_offset); + return m_mapped_buffer; } } diff --git a/hailort/libhailort/src/vdma/memory/sg_buffer.hpp b/hailort/libhailort/src/vdma/memory/sg_buffer.hpp index 38c6d45f..e7b2acd8 100644 --- a/hailort/libhailort/src/vdma/memory/sg_buffer.hpp +++ b/hailort/libhailort/src/vdma/memory/sg_buffer.hpp @@ -6,16 +6,12 @@ * @file sg_buffer.hpp * @brief Scatter-gather vdma buffer, from the user-mode point of view the buffer is continuous, * but not from the physical-memory point of view. - * The sg buffer contains 2 parts: - * - MappedBuffer - the actual buffer stores the data. - * - Descriptors list - each descritpor points to a single "dma page" in the MappedBuffer. - * The hw accept the descriptors list address and parses it to get the actual data. **/ #ifndef _HAILO_VDMA_SG_BUFFER_HPP_ #define _HAILO_VDMA_SG_BUFFER_HPP_ -#include "os/hailort_driver.hpp" +#include "vdma/driver/hailort_driver.hpp" #include "vdma/memory/vdma_buffer.hpp" #include "vdma/memory/descriptor_list.hpp" #include "vdma/memory/mapped_buffer.hpp" @@ -26,8 +22,7 @@ namespace vdma { class SgBuffer final : public VdmaBuffer { public: - static Expected create(HailoRTDriver &driver, size_t size, uint32_t desc_count, uint16_t desc_page_size, - bool is_circular, HailoRTDriver::DmaDirection data_direction, vdma::ChannelId channel_id); + static Expected create(HailoRTDriver &driver, size_t size, HailoRTDriver::DmaDirection data_direction); virtual ~SgBuffer() = default; @@ -42,22 +37,14 @@ class SgBuffer final : public VdmaBuffer { } virtual size_t size() const override; - virtual uint64_t dma_address() const override; - virtual uint16_t desc_page_size() const override; - virtual uint32_t descs_count() const override; - virtual hailo_status read(void *buf_dst, size_t count, size_t offset) override; virtual hailo_status write(const void *buf_src, size_t count, size_t offset) override; - - virtual Expected program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, - size_t desc_offset) override; + std::shared_ptr get_mapped_buffer(); private: - SgBuffer(std::shared_ptr mapped_buffer, std::shared_ptr desc_list); + SgBuffer(std::shared_ptr mapped_buffer); - // Initialization Dependency: The descriptor list points into the mapped buffer so it must be freed before it std::shared_ptr m_mapped_buffer; - std::shared_ptr m_desc_list; }; } /* vdma */ diff --git a/hailort/libhailort/src/vdma/memory/sg_edge_layer.cpp b/hailort/libhailort/src/vdma/memory/sg_edge_layer.cpp new file mode 100644 index 00000000..371f52ba --- /dev/null +++ b/hailort/libhailort/src/vdma/memory/sg_edge_layer.cpp @@ -0,0 +1,70 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file sg_edge_layer.cpp + * @brief Scatter-gather vdma edge layer. + **/ + +#include "vdma/memory/sg_edge_layer.hpp" +#include "vdma/channel/channel_id.hpp" + + +namespace hailort { +namespace vdma { + +Expected SgEdgeLayer::create(std::shared_ptr &&buffer, size_t size, size_t offset, + HailoRTDriver &driver, uint32_t desc_count, uint16_t desc_page_size, bool is_circular, ChannelId channel_id) +{ + CHECK_AS_EXPECTED(size <= (desc_count * desc_page_size), HAILO_INTERNAL_FAILURE, + "Requested buffer size {} must be smaller or equal to {}", size, (desc_count * desc_page_size)); + CHECK_AS_EXPECTED((size % desc_page_size) == 0, HAILO_INTERNAL_FAILURE, + "SgEdgeLayer size must be a multiple of descriptors page size (size {})", size); + CHECK_AS_EXPECTED((offset % desc_page_size) == 0, HAILO_INTERNAL_FAILURE, + "SgEdgeLayer offset must be a multiple of descriptors page size (offset {}. Page size {})", offset, desc_page_size); + + CHECK_AS_EXPECTED(buffer->size() >= (offset + size), HAILO_INTERNAL_FAILURE, + "Edge layer is not fully inside the connected buffer. buffer size is {} while edge layer offset {} and size {}", + buffer->size(), offset, size); + + auto desc_list_exp = DescriptorList::create(desc_count, desc_page_size, is_circular, driver); + CHECK_EXPECTED(desc_list_exp); + + assert((desc_count * desc_page_size) <= std::numeric_limits::max()); + + auto status = desc_list_exp->configure_to_use_buffer(*(buffer->get_mapped_buffer()), size , offset, channel_id); + CHECK_SUCCESS_AS_EXPECTED(status); + + return SgEdgeLayer(std::move(buffer), desc_list_exp.release(), size, offset); +} + +SgEdgeLayer::SgEdgeLayer(std::shared_ptr &&buffer, DescriptorList &&desc_list, + size_t size, size_t offset) : + VdmaEdgeLayer(std::move(buffer), size, offset), + m_desc_list(std::move(desc_list)) +{} + +uint64_t SgEdgeLayer::dma_address() const +{ + return m_desc_list.dma_address(); +} + +uint16_t SgEdgeLayer::desc_page_size() const +{ + return m_desc_list.desc_page_size(); +} + +uint32_t SgEdgeLayer::descs_count() const +{ + return static_cast(m_desc_list.count()); +} + +Expected SgEdgeLayer::program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, + size_t desc_offset) +{ + return m_desc_list.program_last_descriptor(transfer_size, last_desc_interrupts_domain, desc_offset); +} + +} +} \ No newline at end of file diff --git a/hailort/libhailort/src/vdma/memory/sg_edge_layer.hpp b/hailort/libhailort/src/vdma/memory/sg_edge_layer.hpp new file mode 100644 index 00000000..bd9716cc --- /dev/null +++ b/hailort/libhailort/src/vdma/memory/sg_edge_layer.hpp @@ -0,0 +1,64 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file sg_edge_layer.hpp + * @brief Scatter-gather vdma buffer, from the user-mode point of view the buffer is continuous, + * but not from the physical-memory point of view. + * The sg buffer contains 2 parts: + * - MappedBuffer - the actual buffer stores the data. + * - Descriptors list - each descriptor points to a single "dma page" in the MappedBuffer. + * The hw accept the descriptors list address and parses it to get the actual data. + **/ + +#ifndef _HAILO_VDMA_SG_EDGE_LAYER_HPP_ +#define _HAILO_VDMA_SG_EDGE_LAYER_HPP_ + +#include "vdma/driver/hailort_driver.hpp" +#include "vdma/memory/vdma_edge_layer.hpp" +#include "vdma/memory/sg_buffer.hpp" +#include "vdma/memory/descriptor_list.hpp" +#include "vdma/memory/mapped_buffer.hpp" + + +namespace hailort { +namespace vdma { + +class SgEdgeLayer final : public VdmaEdgeLayer { +public: + static Expected create(std::shared_ptr &&buffer, size_t size, size_t offset, + HailoRTDriver &driver, uint32_t desc_count, uint16_t desc_page_size, bool is_circular, ChannelId channel_id); + + virtual ~SgEdgeLayer() = default; + + SgEdgeLayer(const SgEdgeLayer &) = delete; + SgEdgeLayer(SgEdgeLayer &&) = default; + SgEdgeLayer& operator=(const SgEdgeLayer &) = delete; + SgEdgeLayer& operator=(SgEdgeLayer &&) = delete; + + virtual Type type() const override + { + return Type::SCATTER_GATHER; + } + + virtual uint64_t dma_address() const override; + virtual uint16_t desc_page_size() const override; + virtual uint32_t descs_count() const override; + + virtual Expected program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, + size_t desc_offset) override; + +private: + SgEdgeLayer(std::shared_ptr &&buffer, DescriptorList &&desc_list, + size_t size, size_t offset); + + // Initialization Dependency: The descriptor list points into the mapped buffer so it must be freed before it + std::shared_ptr m_buffer; + DescriptorList m_desc_list; +}; + +} /* vdma */ +} /* hailort */ + +#endif /* _HAILO_VDMA_SG_EDGE_LAYER_HPP_ */ diff --git a/hailort/libhailort/src/vdma/memory/vdma_buffer.cpp b/hailort/libhailort/src/vdma/memory/vdma_buffer.cpp index 97a00d90..ccb0e024 100644 --- a/hailort/libhailort/src/vdma/memory/vdma_buffer.cpp +++ b/hailort/libhailort/src/vdma/memory/vdma_buffer.cpp @@ -13,25 +13,5 @@ namespace hailort { namespace vdma { -CONTROL_PROTOCOL__host_buffer_info_t VdmaBuffer::get_host_buffer_info(uint32_t transfer_size) -{ - return get_host_buffer_info(type(), dma_address(), desc_page_size(), descs_count(), transfer_size); -} - -CONTROL_PROTOCOL__host_buffer_info_t VdmaBuffer::get_host_buffer_info(Type type, uint64_t dma_address, - uint16_t desc_page_size, uint32_t desc_count, uint32_t transfer_size) -{ - CONTROL_PROTOCOL__host_buffer_info_t buffer_info{}; - buffer_info.buffer_type = static_cast((type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ? - CONTROL_PROTOCOL__HOST_BUFFER_TYPE_EXTERNAL_DESC : - CONTROL_PROTOCOL__HOST_BUFFER_TYPE_CCB); - buffer_info.dma_address = dma_address; - buffer_info.desc_page_size = desc_page_size; - buffer_info.total_desc_count = desc_count; - buffer_info.bytes_in_pattern = transfer_size; - - return buffer_info; -} - } } \ No newline at end of file diff --git a/hailort/libhailort/src/vdma/memory/vdma_buffer.hpp b/hailort/libhailort/src/vdma/memory/vdma_buffer.hpp index 97e6e75d..763c4c55 100644 --- a/hailort/libhailort/src/vdma/memory/vdma_buffer.hpp +++ b/hailort/libhailort/src/vdma/memory/vdma_buffer.hpp @@ -3,15 +3,15 @@ * Distributed under the MIT license (https://opensource.org/licenses/MIT) **/ /** - * @file vdma_buffer.hpp - * @brief Abstract layer representing a vdma buffer (buffer that can be read/written to the device over vdma.) - * The buffer can be either non-continuous with attach descriptors list (SgBuffer) or continuous buffer. + * @file vdma_edge_layer.hpp + * @brief Abstract layer representing a vdma edge layer (buffer that can be read/written to the device over vdma.) + * The buffer can be either non-continuous with attach descriptors list (SgEdgeLayer) or continuous buffer. **/ #ifndef _HAILO_VDMA_VDMA_BUFFER_HPP_ #define _HAILO_VDMA_VDMA_BUFFER_HPP_ -#include "os/hailort_driver.hpp" +#include "vdma/driver/hailort_driver.hpp" #include "vdma/memory/descriptor_list.hpp" #include "control_protocol.h" @@ -37,26 +37,8 @@ class VdmaBuffer { virtual Type type() const = 0; virtual size_t size() const = 0; - virtual uint64_t dma_address() const = 0; - virtual uint16_t desc_page_size() const = 0; - virtual uint32_t descs_count() const = 0; - - uint32_t descriptors_in_buffer(size_t buffer_size) const - { - assert(buffer_size < std::numeric_limits::max()); - const auto page_size = desc_page_size(); - return static_cast(DIV_ROUND_UP(buffer_size, page_size)); - } - virtual hailo_status read(void *buf_dst, size_t count, size_t offset) = 0; virtual hailo_status write(const void *buf_src, size_t count, size_t offset) = 0; - - virtual Expected program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, - size_t desc_offset) = 0; - - CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info(uint32_t transfer_size); - static CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info(Type type, uint64_t dma_address, - uint16_t desc_page_size, uint32_t total_desc_count, uint32_t transfer_size); }; } /* vdma */ diff --git a/hailort/libhailort/src/vdma/memory/vdma_edge_layer.cpp b/hailort/libhailort/src/vdma/memory/vdma_edge_layer.cpp new file mode 100644 index 00000000..65f3425d --- /dev/null +++ b/hailort/libhailort/src/vdma/memory/vdma_edge_layer.cpp @@ -0,0 +1,52 @@ +/** + * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file vdma_edge_layer.cpp + * @brief vdma edge layer. + **/ + +#include "vdma_edge_layer.hpp" +#include "control_protocol.h" + +namespace hailort { +namespace vdma { + +VdmaEdgeLayer::VdmaEdgeLayer(std::shared_ptr &&buffer, const size_t size, const size_t offset) : + m_buffer(std::move(buffer)), + m_size(size), + m_offset(offset) +{} + +CONTROL_PROTOCOL__host_buffer_info_t VdmaEdgeLayer::get_host_buffer_info(uint32_t transfer_size) +{ + return get_host_buffer_info(type(), dma_address(), desc_page_size(), descs_count(), transfer_size); +} + +CONTROL_PROTOCOL__host_buffer_info_t VdmaEdgeLayer::get_host_buffer_info(Type type, uint64_t dma_address, + uint16_t desc_page_size, uint32_t desc_count, uint32_t transfer_size) +{ + CONTROL_PROTOCOL__host_buffer_info_t buffer_info{}; + buffer_info.buffer_type = static_cast((type == vdma::VdmaEdgeLayer::Type::SCATTER_GATHER) ? + CONTROL_PROTOCOL__HOST_BUFFER_TYPE_EXTERNAL_DESC : + CONTROL_PROTOCOL__HOST_BUFFER_TYPE_CCB); + buffer_info.dma_address = dma_address; + buffer_info.desc_page_size = desc_page_size; + buffer_info.total_desc_count = desc_count; + buffer_info.bytes_in_pattern = transfer_size; + + return buffer_info; +} + +hailo_status VdmaEdgeLayer::read(void *buf_dst, size_t count, size_t offset) +{ + return m_buffer->read(buf_dst, count, m_offset + offset); +} +hailo_status VdmaEdgeLayer::write(const void *buf_src, size_t count, size_t offset) +{ + return m_buffer->write(buf_src, count, m_offset + offset); +} + +} +} \ No newline at end of file diff --git a/hailort/libhailort/src/vdma/memory/vdma_edge_layer.hpp b/hailort/libhailort/src/vdma/memory/vdma_edge_layer.hpp new file mode 100644 index 00000000..8814f260 --- /dev/null +++ b/hailort/libhailort/src/vdma/memory/vdma_edge_layer.hpp @@ -0,0 +1,74 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file vdma_edge_layer.hpp + * @brief Abstract layer representing a vdma edge layer (buffer that can be read/written to the device over vdma.) + * The buffer can be either non-continuous with attach descriptors list (SgEdgeLayer) or continuous buffer. + **/ + +#ifndef _HAILO_VDMA_VDMA_EDGE_LAYER_HPP_ +#define _HAILO_VDMA_VDMA_EDGE_LAYER_HPP_ + +#include "vdma/driver/hailort_driver.hpp" +#include "vdma/memory/descriptor_list.hpp" +#include "control_protocol.h" +#include "vdma/memory/vdma_buffer.hpp" + +namespace hailort { +namespace vdma { + +class VdmaEdgeLayer { +public: + + enum class Type { + SCATTER_GATHER, + CONTINUOUS + }; + + virtual ~VdmaEdgeLayer() = default; + + VdmaEdgeLayer(const VdmaEdgeLayer &) = delete; + VdmaEdgeLayer(VdmaEdgeLayer &&) = default; + VdmaEdgeLayer& operator=(const VdmaEdgeLayer &) = delete; + VdmaEdgeLayer& operator=(VdmaEdgeLayer &&) = delete; + + virtual Type type() const = 0; + virtual uint64_t dma_address() const = 0; + virtual uint16_t desc_page_size() const = 0; + virtual uint32_t descs_count() const = 0; + + size_t size() const + { + return m_size; + } + + uint32_t descriptors_in_buffer(size_t buffer_size) const + { + assert(buffer_size < std::numeric_limits::max()); + const auto page_size = desc_page_size(); + return static_cast(DIV_ROUND_UP(buffer_size, page_size)); + } + + hailo_status read(void *buf_dst, size_t count, size_t offset); + hailo_status write(const void *buf_src, size_t count, size_t offset); + + virtual Expected program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, + size_t desc_offset) = 0; + + CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info(uint32_t transfer_size); + static CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info(Type type, uint64_t dma_address, + uint16_t desc_page_size, uint32_t total_desc_count, uint32_t transfer_size); +protected: + VdmaEdgeLayer(std::shared_ptr &&buffer, const size_t size, const size_t offset); + + std::shared_ptr m_buffer; + const size_t m_size; + const size_t m_offset; +}; + +} /* vdma */ +} /* hailort */ + +#endif /* _HAILO_VDMA_VDMA_EDGE_LAYER_HPP_ */ diff --git a/hailort/libhailort/src/vdma/pcie/pcie_device.cpp b/hailort/libhailort/src/vdma/pcie/pcie_device.cpp index e8a7075f..b8bb5374 100644 --- a/hailort/libhailort/src/vdma/pcie/pcie_device.cpp +++ b/hailort/libhailort/src/vdma/pcie/pcie_device.cpp @@ -18,7 +18,7 @@ #include "vdma/pcie/pcie_device.hpp" #include "device_common/control.hpp" -#include "os/hailort_driver.hpp" +#include "vdma/driver/hailort_driver.hpp" #include "core_op/resource_manager/resource_manager.hpp" #include "vdma/vdma_config_manager.hpp" diff --git a/hailort/libhailort/src/vdma/vdma_config_activated_core_op.cpp b/hailort/libhailort/src/vdma/vdma_config_activated_core_op.cpp index 90eea950..b19b2e4e 100644 --- a/hailort/libhailort/src/vdma/vdma_config_activated_core_op.cpp +++ b/hailort/libhailort/src/vdma/vdma_config_activated_core_op.cpp @@ -39,7 +39,7 @@ Expected VdmaConfigActivatedCoreOp::create( VdmaConfigActivatedCoreOp object(core_op_name, network_group_params, dynamic_batch_size, input_streams, output_streams, std::move(resources_manager), active_core_op_holder, std::move(core_op_activated_event), deactivation_time_accumulator, core_op, status); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { return make_unexpected(status); } CHECK_SUCCESS_AS_EXPECTED(status); @@ -74,7 +74,7 @@ VdmaConfigActivatedCoreOp::VdmaConfigActivatedCoreOp( // We know core_op is a VdmaConfigCoreOp status = core_op.activate_impl(dynamic_batch_size); - if (HAILO_STREAM_ABORTED_BY_USER == status) { + if (HAILO_STREAM_ABORT == status) { LOGGER__INFO("Core-op activation failed because it was aborted by user"); return; } diff --git a/hailort/libhailort/src/vdma/vdma_config_core_op.cpp b/hailort/libhailort/src/vdma/vdma_config_core_op.cpp index 1e363b13..4afe078b 100644 --- a/hailort/libhailort/src/vdma/vdma_config_core_op.cpp +++ b/hailort/libhailort/src/vdma/vdma_config_core_op.cpp @@ -56,6 +56,7 @@ hailo_status VdmaConfigCoreOp::cancel_pending_transfers() hailo_status VdmaConfigCoreOp::activate_impl(uint16_t dynamic_batch_size) { auto status = HAILO_UNINITIALIZED; + auto start_time = std::chrono::steady_clock::now(); if (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE != dynamic_batch_size) { CHECK(dynamic_batch_size <= get_smallest_configured_batch_size(get_config_params()), @@ -66,25 +67,20 @@ hailo_status VdmaConfigCoreOp::activate_impl(uint16_t dynamic_batch_size) status = m_resources_manager->enable_state_machine(dynamic_batch_size); CHECK_SUCCESS(status, "Failed to activate state-machine"); - status = m_resources_manager->start_vdma_interrupts_dispatcher(); - CHECK_SUCCESS(status, "Failed to start vdma interrupts"); + CHECK_SUCCESS(activate_host_resources(), "Failed to activate host resources"); - // Low-level streams assume that the vdma channels are enabled (happens in `enable_state_machine`), and that - // the interrupt dispatcher is running (so they can wait for interrupts). - status = activate_low_level_streams(); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("Low level streams activation failed because some were aborted by user"); - return status; - } - CHECK_SUCCESS(status, "Failed to activate low level streams"); - - TRACE(SwitchCoreOpTrace, std::string(m_resources_manager->get_dev_id()), vdevice_core_op_handle()); + //TODO: HRT-13019 - Unite with the calculation in core_op.cpp + const auto elapsed_time_ms = std::chrono::duration( + std::chrono::steady_clock::now() - start_time).count(); + TRACE(ActivateCoreOpTrace, std::string(m_resources_manager->get_dev_id()), vdevice_core_op_handle(), elapsed_time_ms); return HAILO_SUCCESS; } hailo_status VdmaConfigCoreOp::deactivate_impl() { + auto start_time = std::chrono::steady_clock::now(); + auto status = deactivate_host_resources(); CHECK_SUCCESS(status); @@ -96,6 +92,11 @@ hailo_status VdmaConfigCoreOp::deactivate_impl() status = cancel_pending_transfers(); CHECK_SUCCESS(status, "Failed to cancel pending transfers"); + //TODO: HRT-13019 - Unite with the calculation in core_op.cpp + const auto elapsed_time_ms = std::chrono::duration( + std::chrono::steady_clock::now() - start_time).count(); + TRACE(DeactivateCoreOpTrace, std::string(m_resources_manager->get_dev_id()), vdevice_core_op_handle(), elapsed_time_ms); + return HAILO_SUCCESS; } @@ -120,15 +121,19 @@ hailo_status VdmaConfigCoreOp::shutdown() return status; } -hailo_status VdmaConfigCoreOp::deactivate_host_resources() +hailo_status VdmaConfigCoreOp::activate_host_resources() { - auto status = deactivate_low_level_streams(); - CHECK_SUCCESS(status, "Failed to deactivate low level streams"); - - // After disabling the vdma interrupts, we may still get some interrupts. On HRT-9430 we need to clean them. - status = m_resources_manager->stop_vdma_interrupts_dispatcher(); - CHECK_SUCCESS(status, "Failed to stop vdma interrupts"); + CHECK_SUCCESS(m_resources_manager->start_vdma_transfer_launcher(), "Failed to start vdma transfer launcher"); + CHECK_SUCCESS(m_resources_manager->start_vdma_interrupts_dispatcher(), "Failed to start vdma interrupts"); + CHECK_SUCCESS(activate_low_level_streams(), "Failed to activate low level streams"); + return HAILO_SUCCESS; +} +hailo_status VdmaConfigCoreOp::deactivate_host_resources() +{ + CHECK_SUCCESS(deactivate_low_level_streams(), "Failed to deactivate low level streams"); + CHECK_SUCCESS(m_resources_manager->stop_vdma_interrupts_dispatcher(), "Failed to stop vdma interrupts"); + CHECK_SUCCESS(m_resources_manager->stop_vdma_transfer_launcher(), "Failed to stop vdma transfers pending launch"); return HAILO_SUCCESS; } diff --git a/hailort/libhailort/src/vdma/vdma_config_core_op.hpp b/hailort/libhailort/src/vdma/vdma_config_core_op.hpp index f923e091..42f60f99 100644 --- a/hailort/libhailort/src/vdma/vdma_config_core_op.hpp +++ b/hailort/libhailort/src/vdma/vdma_config_core_op.hpp @@ -49,8 +49,13 @@ class VdmaConfigCoreOp : public CoreOp // Will first deactivate host resources (via deactivate_host_resources) and then reset the core-op on the fw virtual hailo_status deactivate_impl() override; virtual hailo_status shutdown() override; + + // Activate all resources related to the core-op on the host. + hailo_status activate_host_resources(); + // Deactivate all resources related to the core-op on the host, but without resetting the core-op on the fw hailo_status deactivate_host_resources(); + hailo_status cancel_pending_transfers(); virtual Expected get_default_streams_interface() override; diff --git a/hailort/libhailort/src/vdma/vdma_config_manager.cpp b/hailort/libhailort/src/vdma/vdma_config_manager.cpp index 73c650f4..753d34f1 100644 --- a/hailort/libhailort/src/vdma/vdma_config_manager.cpp +++ b/hailort/libhailort/src/vdma/vdma_config_manager.cpp @@ -8,58 +8,94 @@ **/ #include "vdma_config_manager.hpp" -#include "hailo/hailort.h" +#include "utils/profiler/tracer_macros.hpp" namespace hailort { -hailo_status VdmaConfigManager::switch_core_op(std::shared_ptr current_active_core_op, - std::shared_ptr next_core_op, const uint16_t batch_size, const bool is_batch_switch) + +hailo_status VdmaConfigManager::set_core_op(const std::string &device_id, std::shared_ptr current, + std::shared_ptr next, const uint16_t batch_size) { - CHECK((nullptr != current_active_core_op) || (nullptr != next_core_op), HAILO_INVALID_ARGUMENT); - - if (nullptr == current_active_core_op) { - // Activate first core-op - return next_core_op->activate_impl(batch_size); - } else if (nullptr == next_core_op) { - // Deactivate last core-op - return current_active_core_op->deactivate_impl(); - } else if (is_batch_switch) { - auto status = current_active_core_op->get_resources_manager()->enable_state_machine(batch_size); - CHECK_SUCCESS(status, "Failed to activate state-machine"); + CHECK((nullptr != current) || (nullptr != next), HAILO_INVALID_ARGUMENT); + + const auto start_time = std::chrono::steady_clock::now(); + + const bool is_batch_switch = (current == next) && current->get_resources_manager()->get_can_fast_batch_switch(); + if (is_batch_switch) { + CHECK_SUCCESS(fast_batch_switch(current, batch_size), "Failed to fast batch switch"); } else { - // We're switching from current_active_core_op to next_core_op. - // Deactivate the current core-op on the host, meaning the fw state machine won't be reset. - // This will be handled by activating the next core-op. - auto status = current_active_core_op->deactivate_host_resources(); - CHECK_SUCCESS(status, "Failed deactivating current core-op"); + CHECK_SUCCESS(switch_core_op(current, next, batch_size), "Failed to switch core-op"); + } + + const auto core_op_handle = next ? next->vdevice_core_op_handle() : INVALID_CORE_OP_HANDLE; + const auto elapsed_time_ms = std::chrono::duration( + std::chrono::steady_clock::now() - start_time).count(); + TRACE(SwitchCoreOpTrace, device_id, core_op_handle, elapsed_time_ms); + + return HAILO_SUCCESS; +} + + +hailo_status VdmaConfigManager::deactivate_core_op(std::shared_ptr current_active_core_op) +{ + static const uint16_t DEACTIVATE_BATCH_SIZE = 0; + const std::shared_ptr DEACTIVATE_NEXT_CORE_OP = nullptr; + return switch_core_op(current_active_core_op, DEACTIVATE_NEXT_CORE_OP, DEACTIVATE_BATCH_SIZE); +} + +hailo_status VdmaConfigManager::set_state_machine(std::shared_ptr current, + std::shared_ptr next, uint16_t batch_size) +{ + // TODO: HRT-13253 don't use resources manager instead call m_vdma_device directly. The device should store the + // current active core op. + if (next != nullptr) { + CHECK_SUCCESS(next->get_resources_manager()->enable_state_machine(batch_size), "Failed to enable state machine"); + // In the case of switch NG, we call FW switch to next NG without marking the current NG as deactivated. + // Added setter to mark the current NG as deactivated. + if ((current != nullptr) && (current != next)) { + current->get_resources_manager()->set_is_activated(false); + } + } else { + assert(current != nullptr); + CHECK_SUCCESS(current->get_resources_manager()->reset_state_machine(), "Failed to disable state machine"); + } + return HAILO_SUCCESS; +} + +hailo_status VdmaConfigManager::switch_core_op(std::shared_ptr current, + std::shared_ptr next, const uint16_t batch_size) +{ + assert((nullptr != current) || (nullptr != next)); + + if (current != nullptr) { + CHECK_SUCCESS(current->deactivate_host_resources(), "Failed deactivating host resources for current core-op"); // TODO: In mercury we need to reset after deactivate. This will be fixed in MSW-762 and the "if" will be removed // when we make the nn_manager responsible to reset the nn-core. - if (Device::Type::INTEGRATED == current_active_core_op->get_resources_manager()->get_device().get_type()) { - status = current_active_core_op->get_resources_manager()->reset_state_machine(); - CHECK_SUCCESS(status, "Failed to reset state machine in switch core-op"); + if (Device::Type::INTEGRATED == current->get_resources_manager()->get_device().get_type()) { + CHECK_SUCCESS(current->get_resources_manager()->reset_state_machine(), "Failed to reset state machine in switch core-op"); } + } - // Switch from the current core-op to the next core-op. I.e. current core-op will be deactivated and - // next core-op will be activated - status = next_core_op->activate_impl(batch_size); - CHECK_SUCCESS(status, "Failed activating next core-op"); + CHECK_SUCCESS(set_state_machine(current, next, batch_size), "Failed to set state machine"); + + // Activate next core op resources + if (next != nullptr) { + CHECK_SUCCESS(next->activate_host_resources(), "Failed activating host resources for next core-op"); + } - // Current core-op is now deactivated (we are not on batch switch), so we can cancel pending transfers. - status = current_active_core_op->cancel_pending_transfers(); - CHECK_SUCCESS(status, "Failed canceling pending transfers from previous core-op"); + if (current != nullptr) { + CHECK_SUCCESS(current->cancel_pending_transfers(), "Failed canceling pending transfers from previous core-op"); } return HAILO_SUCCESS; } -hailo_status VdmaConfigManager::deactivate_core_op(std::shared_ptr current_active_core_op) +hailo_status VdmaConfigManager::fast_batch_switch(std::shared_ptr current, const uint16_t batch_size) { - static const uint16_t DEACTIVATE_BATCH_SIZE = 0; - const std::shared_ptr DEACTIVATE_NEXT_CORE_OP = nullptr; - static const bool IS_NOT_BATCH_SWITCH = false; - return switch_core_op(current_active_core_op, DEACTIVATE_NEXT_CORE_OP, DEACTIVATE_BATCH_SIZE, IS_NOT_BATCH_SWITCH); + assert(nullptr != current); + return set_state_machine(current, current, batch_size); } } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/vdma_config_manager.hpp b/hailort/libhailort/src/vdma/vdma_config_manager.hpp index fc13c368..6045f6ef 100644 --- a/hailort/libhailort/src/vdma/vdma_config_manager.hpp +++ b/hailort/libhailort/src/vdma/vdma_config_manager.hpp @@ -26,10 +26,17 @@ class VdmaConfigManager final public: VdmaConfigManager() = delete; - static hailo_status switch_core_op(std::shared_ptr current_active_core_op, - std::shared_ptr next_core_op, const uint16_t batch_size, const bool is_batch_switch); - + static hailo_status set_core_op(const std::string &device_id, std::shared_ptr current, + std::shared_ptr next, uint16_t batch_size); static hailo_status deactivate_core_op(std::shared_ptr current_active_core_op); + +private: + static hailo_status set_state_machine(std::shared_ptr current, + std::shared_ptr next, uint16_t batch_size); + + static hailo_status switch_core_op(std::shared_ptr current, + std::shared_ptr next, uint16_t batch_size); + static hailo_status fast_batch_switch(std::shared_ptr current, uint16_t batch_size); }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/vdma_device.cpp b/hailort/libhailort/src/vdma/vdma_device.cpp index c06c3b07..026a3e9a 100644 --- a/hailort/libhailort/src/vdma/vdma_device.cpp +++ b/hailort/libhailort/src/vdma/vdma_device.cpp @@ -11,13 +11,16 @@ #include "vdma/vdma_device.hpp" #include "vdma/memory/descriptor_list.hpp" -#include "vdma/memory/mapping_manager.hpp" #include "vdma/vdma_config_manager.hpp" #include "vdma/pcie/pcie_device.hpp" #include "vdma/integrated/integrated_device.hpp" #include "device_common/control.hpp" +#include "device_common/device_internal.hpp" #include "core_op/resource_manager/resource_manager_builder.hpp" #include "core_op/core_op.hpp" +#include "common/os_utils.hpp" +#include "utils/buffer_storage.hpp" +#include "hef/hef_internal.hpp" #include #include @@ -35,7 +38,6 @@ static constexpr std::chrono::milliseconds DEFAULT_TIMEOUT(50000); VdmaDevice::VdmaDevice(std::unique_ptr &&driver, Device::Type type) : DeviceBase::DeviceBase(type), m_driver(std::move(driver)), - m_mapping_manager(*m_driver), m_is_configured(false) { activate_notifications(get_dev_id()); @@ -144,9 +146,10 @@ Expected VdmaDevice::add_hef(Hef &hef, const Netwo CHECK_SUCCESS_AS_EXPECTED(status); assert(nullptr == m_vdma_interrupts_dispatcher); - auto interrupts_dispatcher = vdma::InterruptsDispatcher::create(std::ref(*m_driver)); - CHECK_EXPECTED(interrupts_dispatcher); - m_vdma_interrupts_dispatcher = interrupts_dispatcher.release(); + TRY(m_vdma_interrupts_dispatcher, vdma::InterruptsDispatcher::create(std::ref(*m_driver))); + + assert(nullptr == m_vdma_transfer_launcher); + TRY(m_vdma_transfer_launcher, vdma::TransferLauncher::create()); m_is_configured = true; } @@ -173,7 +176,8 @@ Expected> VdmaDevice::create_configured_ /* build HEF supported features */ auto resource_manager = ResourcesManagerBuilder::build(current_core_op_index, - *this, get_driver(), config_params, core_op_metadata, hef.pimpl->get_device_arch()); + *this, get_driver(), config_params, core_op_metadata, static_cast(hef.pimpl->get_device_arch()), + hef.pimpl->get_shef_file_handle()); CHECK_EXPECTED(resource_manager); @@ -194,7 +198,6 @@ Expected> VdmaDevice::create_configured_ core_ops.emplace_back(core_op_ptr); m_core_ops.emplace_back(core_op_ptr); - // TODO: HRT-8875 auto metadata = hef.pimpl->network_group_metadata(core_op_metadata->core_op_name()); auto network_group_expected = ConfiguredNetworkGroupBase::create(config_params, std::move(core_ops), std::move(metadata)); CHECK_EXPECTED(network_group_expected); @@ -225,6 +228,17 @@ hailo_reset_device_mode_t VdmaDevice::get_default_reset_mode() return HAILO_RESET_DEVICE_MODE_SOFT; } +// TODO - HRT-13234, move to DeviceBase +void VdmaDevice::shutdown_core_ops() +{ + for (auto core_op : m_core_ops) { + auto status = core_op->shutdown(); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to shutdown core op with status {}", status); + } + } +} + hailo_status VdmaDevice::mark_as_used() { return m_driver->mark_as_used(); @@ -236,6 +250,12 @@ ExpectedRef VdmaDevice::get_vdma_interrupts_dispatch return std::ref(*m_vdma_interrupts_dispatcher); } +ExpectedRef VdmaDevice::get_vdma_transfer_launcher() +{ + CHECK_AS_EXPECTED(m_vdma_transfer_launcher, HAILO_INTERNAL_FAILURE, "vDMA transfer launcher wasn't created"); + return std::ref(*m_vdma_transfer_launcher); +} + VdmaDevice::~VdmaDevice() { auto status = stop_notification_fetch_thread(); @@ -250,20 +270,50 @@ VdmaDevice::~VdmaDevice() } } -hailo_status VdmaDevice::dma_map(void *address, size_t size, hailo_stream_direction_t direction) +static std::pair aligned_part_to_map(void *original, size_t size) { - return m_mapping_manager.map_buffer(address, size, direction); + const auto dma_alignment = OsUtils::get_dma_able_alignment(); + const auto aligned_address = HailoRTCommon::align_to(original, dma_alignment); + const auto unaligned_part = reinterpret_cast(aligned_address) - reinterpret_cast(original); + const auto aligned_size = size > unaligned_part ? size - unaligned_part : 0; + return std::make_pair(aligned_address, aligned_size); } -hailo_status VdmaDevice::dma_unmap(void *address, hailo_stream_direction_t direction) +hailo_status VdmaDevice::dma_map(void *address, size_t size, hailo_dma_buffer_direction_t data_direction) { - return m_mapping_manager.unmap_buffer(address, direction); + // Since we can't map unaligned addresses (to dma alignment), we map only the aligned part of the buffer. The other + // unaligned part will be copied into some bounce buffer (which is already mapped). + std::tie(address, size) = aligned_part_to_map(address, size); + + if (size == 0) { + // The aligned part is not in range (Can happen when the buffer is smaller than the dma alignment), nothing to + // map. + return HAILO_SUCCESS; + } + + // Find buffer_identifier if registered to BufferStorageResourceManager. + auto buffer_identifier = HailoRTDriver::INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER; + if (auto storage = BufferStorageResourceManager::get_resource(std::make_pair(address, size))) { + TRY(const auto buffer, storage->get()->get_dma_able_buffer()); + buffer_identifier = buffer->buffer_identifier(); + } + + CHECK_EXPECTED(m_driver->vdma_buffer_map(address, size, to_hailo_driver_direction(data_direction), buffer_identifier)); + return HAILO_SUCCESS; } -Expected> VdmaDevice::try_dma_map(vdma::DmaAbleBufferPtr buffer, - hailo_stream_direction_t direction) +hailo_status VdmaDevice::dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t data_direction) { - return m_mapping_manager.try_dma_map(buffer, direction); + // Since we can't map unaligned addresses (to dma alignment), we map only the aligned part of the buffer. The other + // unaligned part will be copied into some bounce buffer (which is already mapped). + std::tie(address, size) = aligned_part_to_map(address, size); + if (size == 0) { + // The aligned part is not in range (Can happen when the buffer is smaller than the dma alignment), nothing to + // map. + return HAILO_SUCCESS; + } + + return m_driver->vdma_buffer_unmap(address, size, to_hailo_driver_direction(data_direction)); } Expected VdmaDevice::create_networks_group_vector(Hef &hef, const NetworkGroupsParamsMap &configure_params) diff --git a/hailort/libhailort/src/vdma/vdma_device.hpp b/hailort/libhailort/src/vdma/vdma_device.hpp index 30dc64ee..105e6d84 100644 --- a/hailort/libhailort/src/vdma/vdma_device.hpp +++ b/hailort/libhailort/src/vdma/vdma_device.hpp @@ -17,8 +17,8 @@ #include "device_common/device_internal.hpp" #include "network_group/network_group_internal.hpp" #include "vdma/channel/interrupts_dispatcher.hpp" -#include "vdma/memory/mapping_manager.hpp" -#include "os/hailort_driver.hpp" +#include "vdma/channel/transfer_launcher.hpp" +#include "vdma/driver/hailort_driver.hpp" namespace hailort @@ -32,6 +32,7 @@ class VdmaDevice : public DeviceBase { virtual hailo_status wait_for_wakeup() override; virtual void increment_control_sequence() override; + virtual void shutdown_core_ops() override; virtual hailo_reset_device_mode_t get_default_reset_mode() override; hailo_status mark_as_used(); virtual Expected read_log(MemoryView &buffer, hailo_cpu_id_t cpu_id) override; @@ -48,11 +49,10 @@ class VdmaDevice : public DeviceBase { }; ExpectedRef get_vdma_interrupts_dispatcher(); + ExpectedRef get_vdma_transfer_launcher(); - virtual hailo_status dma_map(void *address, size_t size, hailo_stream_direction_t direction) override; - virtual hailo_status dma_unmap(void *address, hailo_stream_direction_t direction) override; - virtual Expected> try_dma_map(vdma::DmaAbleBufferPtr buffer, - hailo_stream_direction_t direction) override; + virtual hailo_status dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction) override; + virtual hailo_status dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction) override; protected: VdmaDevice(std::unique_ptr &&driver, Type type); @@ -63,16 +63,15 @@ class VdmaDevice : public DeviceBase { uint8_t *response_buffer, size_t *response_size, hailo_cpu_id_t cpu_id) override; virtual Expected add_hef(Hef &hef, const NetworkGroupsParamsMap &configure_params) override; - // Initialization dependency: MappingManager holds dma mappings for all buffers relative to this device! - // (CoreOp for example holds streams with mapped buffers) std::unique_ptr m_driver; - vdma::MappingManager m_mapping_manager; + // TODO - HRT-13234, move to DeviceBase std::vector> m_core_ops; std::vector> m_network_groups; // TODO: HRT-9547 - Remove when ConfiguredNetworkGroup will be kept in global context // The vdma interrupts dispatcher contains a callback with a reference to the current activated network group - // (reference to the ResourcesManager). Hence, it must be destructed before the networks groups are destructed. + // (reference to the ResourcesManager). Hence, it must be destroyed before the networks groups are destroyed. std::unique_ptr m_vdma_interrupts_dispatcher; + std::unique_ptr m_vdma_transfer_launcher; ActiveCoreOpHolder m_active_core_op_holder; bool m_is_configured; diff --git a/hailort/libhailort/src/vdma/vdma_stream.cpp b/hailort/libhailort/src/vdma/vdma_stream.cpp index 6379ead3..8c324ad8 100644 --- a/hailort/libhailort/src/vdma/vdma_stream.cpp +++ b/hailort/libhailort/src/vdma/vdma_stream.cpp @@ -11,6 +11,7 @@ #include "vdma/vdma_stream.hpp" #include "vdma/circular_stream_buffer_pool.hpp" #include "utils/profiler/tracer_macros.hpp" +#include "utils/buffer_storage.hpp" #include "common/os_utils.hpp" @@ -24,44 +25,53 @@ Expected> VdmaInputStream::create(hailo_stream_ { assert((interface == HAILO_STREAM_INTERFACE_PCIE) || (interface == HAILO_STREAM_INTERFACE_INTEGRATED)); + TRY(auto bounce_buffers_pool, init_dma_bounce_buffer_pool(device, channel, edge_layer)); + hailo_status status = HAILO_UNINITIALIZED; auto result = make_shared_nothrow(device, channel, edge_layer, - core_op_activated_event, interface, status); + core_op_activated_event, interface, std::move(bounce_buffers_pool), status); CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); CHECK_SUCCESS_AS_EXPECTED(status); return result; } -std::unique_ptr VdmaInputStream::init_dma_bounce_buffer_pool( - vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer, hailo_status &status) +Expected VdmaInputStream::init_dma_bounce_buffer_pool( + VdmaDevice &device, vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer) { const auto dma_able_alignment = OsUtils::get_dma_able_alignment(); const auto dma_bounce_buffer_pool_size = channel->get_max_ongoing_transfers( LayerInfoUtils::get_layer_transfer_size(edge_layer)); - // Checking status for base class c'tor - if (HAILO_SUCCESS != status) { - return nullptr; - } + const auto bounce_buffer_size = std::min( + static_cast(dma_able_alignment), LayerInfoUtils::get_layer_transfer_size(edge_layer)); - // Initialize dma buffer pool for support for non-aligned user buffers - auto dma_queued_pool = QueuedStreamBufferPool::create(dma_bounce_buffer_pool_size, dma_able_alignment, - BufferStorageParams::create_dma()); - if (dma_queued_pool.status() != HAILO_SUCCESS) { - LOGGER__ERROR("Failed creating DMA bounce buffer pool with status {}", dma_queued_pool.status()); - status = dma_queued_pool.status(); - return nullptr; - } + auto bounce_buffers_pool = make_unique_nothrow(dma_bounce_buffer_pool_size); + CHECK_NOT_NULL(bounce_buffers_pool, HAILO_OUT_OF_HOST_MEMORY); + + for (size_t i = 0; i < dma_bounce_buffer_pool_size; i++) { + TRY(auto dma_able_buffer, vdma::DmaAbleBuffer::create_by_allocation(bounce_buffer_size, device.get_driver())); + + auto dma_storage = make_shared_nothrow(std::move(dma_able_buffer)); + CHECK_NOT_NULL(dma_storage, HAILO_OUT_OF_HOST_MEMORY); - return std::unique_ptr(dma_queued_pool.release()); + TRY(auto buffer, Buffer::create(std::move(dma_storage))); + TRY(auto mapping, DmaMappedBuffer::create(device, buffer.data(), buffer.size(), HAILO_DMA_BUFFER_DIRECTION_H2D)); + + auto bounce_buffer = make_shared_nothrow(BounceBuffer{std::move(buffer), std::move(mapping)}); + CHECK_NOT_NULL(bounce_buffer, HAILO_OUT_OF_HOST_MEMORY); + + CHECK_SUCCESS(bounce_buffers_pool->enqueue(std::move(bounce_buffer))); + } + return bounce_buffers_pool; } VdmaInputStream::VdmaInputStream(VdmaDevice &device, vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer, EventPtr core_op_activated_event, - hailo_stream_interface_t stream_interface, hailo_status &status) : + hailo_stream_interface_t stream_interface, BounceBufferQueuePtr &&bounce_buffers_pool, + hailo_status &status) : AsyncInputStreamBase(edge_layer, std::move(core_op_activated_event), status), m_device(device), - m_dma_bounce_buffer_pool(init_dma_bounce_buffer_pool(channel, edge_layer, status)), + m_bounce_buffers_pool(std::move(bounce_buffers_pool)), m_channel(std::move(channel)), m_interface(stream_interface), m_core_op_handle(INVALID_CORE_OP_HANDLE) @@ -96,11 +106,16 @@ void VdmaInputStream::set_vdevice_core_op_handle(vdevice_core_op_handle_t core_o Expected> VdmaInputStream::allocate_buffer_pool() { - auto circular_pool = CircularStreamBufferPool::create(m_device, HailoRTDriver::DmaDirection::H2D, - m_channel->get_desc_list()->desc_page_size(), m_channel->get_desc_list()->count(), get_frame_size()); - CHECK_EXPECTED(circular_pool); + TRY(auto circular_pool, CircularStreamBufferPool::create(m_device, HAILO_DMA_BUFFER_DIRECTION_H2D, + m_channel->get_desc_list()->desc_page_size(), m_channel->get_desc_list()->count(), get_frame_size())); + + // Bind the buffer to the channel to avoid the need to do it on every transfer. + TRY(auto pool_dma_able_buffer, circular_pool->get_base_buffer().storage().get_dma_able_buffer()); + TRY(auto mapped_buffer, vdma::MappedBuffer::create_shared(pool_dma_able_buffer, m_device.get_driver(), + HailoRTDriver::DmaDirection::H2D)); + CHECK_SUCCESS(m_channel->bind_buffer(mapped_buffer)); - return std::unique_ptr(circular_pool.release()); + return std::unique_ptr(std::move(circular_pool)); } size_t VdmaInputStream::get_max_ongoing_transfers() const @@ -112,17 +127,10 @@ Expected VdmaInputStream::align_transfer_request(TransferReques { const auto dma_alignment = OsUtils::get_dma_able_alignment(); std::vector transfer_buffers; - TransferBuffer dma_able_bounce_buffer; - const auto buffer_address = transfer_request.transfer_buffers[0].base_buffer()->data(); + const auto buffer_address = transfer_request.transfer_buffers[0].base_buffer().data(); const auto buffer_size = transfer_request.transfer_buffers[0].size(); - { - std::unique_lock lock(m_dma_pool_mutex); - // Initialize dma able bounce buffer the size of alignment size to read pre alignment data - auto dma_able_bounce_buffer_exp = m_dma_bounce_buffer_pool->dequeue(); - CHECK_EXPECTED(dma_able_bounce_buffer_exp); - dma_able_bounce_buffer = dma_able_bounce_buffer_exp.release(); - } + TRY(const auto dma_able_bounce_buffer, m_bounce_buffers_pool->dequeue()); // If buffer size is larger than alignment size - will create bounce buffer for non aligned buffer part and then use // User's buffer from aligned address - otherwise will create bounce buffer size of user buffer and copy whole frame @@ -135,25 +143,20 @@ Expected VdmaInputStream::align_transfer_request(TransferReques const auto user_buffer_size = buffer_size - bounce_buffer_exact_size; // Create another transfer buffer with same base address but exact size for actual transfer - auto dma_able_exact_bounce_buffer = TransferBuffer(dma_able_bounce_buffer.base_buffer(), bounce_buffer_exact_size, 0); - memcpy((dma_able_exact_bounce_buffer.base_buffer())->data(), buffer_address, bounce_buffer_exact_size); + auto dma_able_exact_bounce_buffer = TransferBuffer(MemoryView(dma_able_bounce_buffer->buffer_storage), + bounce_buffer_exact_size, 0); + dma_able_exact_bounce_buffer.copy_from(MemoryView(buffer_address, bounce_buffer_exact_size)); transfer_buffers.emplace_back(dma_able_exact_bounce_buffer); - - auto dma_able_user_buffer = DmaStorage::create_dma_able_buffer_from_user_size( - reinterpret_cast(aligned_user_buffer_addr), user_buffer_size); - CHECK_EXPECTED(dma_able_user_buffer); - transfer_buffers.emplace_back(dma_able_user_buffer.release()); + transfer_buffers.emplace_back(MemoryView(reinterpret_cast(aligned_user_buffer_addr), user_buffer_size)); } else { - auto dma_able_exact_bounce_buffer = TransferBuffer(dma_able_bounce_buffer.base_buffer(), buffer_size, 0); - memcpy((dma_able_exact_bounce_buffer.base_buffer())->data(), buffer_address, buffer_size); + auto dma_able_exact_bounce_buffer = TransferBuffer(MemoryView(dma_able_bounce_buffer->buffer_storage), buffer_size, 0); + dma_able_exact_bounce_buffer.copy_from(MemoryView(buffer_address, buffer_size)); transfer_buffers.emplace_back(dma_able_exact_bounce_buffer); } - auto wrapped_callback = [user_callback=transfer_request.callback, dma_able_bounce_buffer, this](hailo_status callback_status) { - { - std::unique_lock lock(m_dma_pool_mutex); - m_dma_bounce_buffer_pool->enqueue(TransferBuffer{dma_able_bounce_buffer}); - } + auto wrapped_callback = [user_callback=transfer_request.callback, + dma_able_bounce_buffer=std::move(dma_able_bounce_buffer), this](hailo_status callback_status) mutable { + m_bounce_buffers_pool->enqueue(std::move(dma_able_bounce_buffer)); user_callback(callback_status); }; @@ -163,18 +166,15 @@ Expected VdmaInputStream::align_transfer_request(TransferReques hailo_status VdmaInputStream::write_async_impl(TransferRequest &&transfer_request) { TRACE(FrameDequeueH2DTrace, m_device.get_dev_id(), m_core_op_handle, name()); - const auto user_owns_buffer = (buffer_mode() == StreamBufferMode::NOT_OWNING); const auto dma_able_alignment = OsUtils::get_dma_able_alignment(); - if (reinterpret_cast(transfer_request.transfer_buffers[0].base_buffer()->data()) % dma_able_alignment == 0) { - return m_channel->launch_transfer(std::move(transfer_request), user_owns_buffer); + if (reinterpret_cast(transfer_request.transfer_buffers[0].base_buffer().data()) % dma_able_alignment == 0) { + return m_channel->launch_transfer(std::move(transfer_request)); } else { auto unaligned_transfer_request = align_transfer_request(std::move(transfer_request)); CHECK_EXPECTED_AS_STATUS(unaligned_transfer_request); - return m_channel->launch_transfer(unaligned_transfer_request.release(), user_owns_buffer); + return m_channel->launch_transfer(unaligned_transfer_request.release()); } - - return HAILO_INTERNAL_FAILURE; } hailo_status VdmaInputStream::activate_stream_impl() @@ -246,11 +246,16 @@ hailo_stream_interface_t VdmaOutputStream::get_interface() const Expected> VdmaOutputStream::allocate_buffer_pool() { - auto circular_pool = CircularStreamBufferPool::create(m_device, HailoRTDriver::DmaDirection::D2H, - m_channel->get_desc_list()->desc_page_size(), m_channel->get_desc_list()->count(), m_transfer_size); - CHECK_EXPECTED(circular_pool); + TRY(auto circular_pool, CircularStreamBufferPool::create(m_device, HAILO_DMA_BUFFER_DIRECTION_D2H, + m_channel->get_desc_list()->desc_page_size(), m_channel->get_desc_list()->count(), m_transfer_size)); + + // Bind the buffer to the channel to avoid the need to do it on every transfer. + TRY(auto pool_dma_able_buffer, circular_pool->get_base_buffer().storage().get_dma_able_buffer()); + TRY(auto mapped_buffer, vdma::MappedBuffer::create_shared(pool_dma_able_buffer, m_device.get_driver(), + HailoRTDriver::DmaDirection::D2H)); + CHECK_SUCCESS(m_channel->bind_buffer(mapped_buffer)); - return std::unique_ptr(circular_pool.release()); + return std::unique_ptr(std::move(circular_pool)); } size_t VdmaOutputStream::get_max_ongoing_transfers() const @@ -260,18 +265,18 @@ size_t VdmaOutputStream::get_max_ongoing_transfers() const Expected VdmaOutputStream::align_transfer_request(TransferRequest &&transfer_request) { - auto aligned_bounce_buffer_exp = DmaStorage::create_dma_able_buffer_from_user_size(nullptr, - transfer_request.transfer_buffers[0].size()); - CHECK_EXPECTED(aligned_bounce_buffer_exp); - auto aligned_bounce_buffer = aligned_bounce_buffer_exp.release(); + // Allocate a bounce buffer and store it inside the lambda to keep it alive until not needed. + auto bounce_buffer_exp = Buffer::create_shared(transfer_request.transfer_buffers[0].size(), BufferStorageParams::create_dma()); + CHECK_EXPECTED(bounce_buffer_exp); + auto bounce_buffer = bounce_buffer_exp.release(); auto wrapped_callback = [unaligned_user_buffer = transfer_request.transfer_buffers[0].base_buffer(), - aligned_bounce_buffer, user_callback = transfer_request.callback](hailo_status callback_status) { - memcpy(const_cast(unaligned_user_buffer->data()), aligned_bounce_buffer->data(), unaligned_user_buffer->size()); + bounce_buffer=bounce_buffer, user_callback=transfer_request.callback](hailo_status callback_status) { + memcpy(const_cast(unaligned_user_buffer.data()), bounce_buffer->data(), unaligned_user_buffer.size()); user_callback(callback_status); }; - return TransferRequest(std::move(aligned_bounce_buffer), wrapped_callback); + return TransferRequest(MemoryView(bounce_buffer->data(), bounce_buffer->size()), wrapped_callback); } hailo_status VdmaOutputStream::read_async_impl(TransferRequest &&transfer_request) @@ -285,18 +290,17 @@ hailo_status VdmaOutputStream::read_async_impl(TransferRequest &&transfer_reques original_callback(status); }; } - const auto user_owns_buffer = (buffer_mode() == StreamBufferMode::NOT_OWNING); const auto dma_able_alignment = OsUtils::get_dma_able_alignment(); - if (reinterpret_cast(transfer_request.transfer_buffers[0].base_buffer()->data()) % dma_able_alignment == 0) { - return m_channel->launch_transfer(std::move(transfer_request), user_owns_buffer); + if (reinterpret_cast(transfer_request.transfer_buffers[0].base_buffer().data()) % dma_able_alignment == 0) { + return m_channel->launch_transfer(std::move(transfer_request)); } else { // In case of read unaligned - currently doesnt support using users buffer - so allocate complete new buffer size of user's buffer LOGGER__WARNING("read_async() was provided an unaligned buffer (address=0x{:x}), which causes performance degradation. Use buffers algined to {} bytes for optimal performance", - reinterpret_cast(transfer_request.transfer_buffers[0].base_buffer()->data()), dma_able_alignment); + reinterpret_cast(transfer_request.transfer_buffers[0].base_buffer().data()), dma_able_alignment); auto realigned_transfer_request = align_transfer_request(std::move(transfer_request)); CHECK_EXPECTED_AS_STATUS(realigned_transfer_request); - return m_channel->launch_transfer(realigned_transfer_request.release(), user_owns_buffer); + return m_channel->launch_transfer(realigned_transfer_request.release()); } } diff --git a/hailort/libhailort/src/vdma/vdma_stream.hpp b/hailort/libhailort/src/vdma/vdma_stream.hpp index c2203a45..b3962120 100644 --- a/hailort/libhailort/src/vdma/vdma_stream.hpp +++ b/hailort/libhailort/src/vdma/vdma_stream.hpp @@ -20,6 +20,15 @@ namespace hailort { +struct BounceBuffer { + Buffer buffer_storage; + DmaMappedBuffer mapping; +}; +using BounceBufferPtr = std::shared_ptr; + +using BounceBufferQueue = SafeQueue; +using BounceBufferQueuePtr = std::unique_ptr; + class VdmaInputStream : public AsyncInputStreamBase { public: @@ -28,7 +37,8 @@ class VdmaInputStream : public AsyncInputStreamBase { EventPtr core_op_activated_event); VdmaInputStream(VdmaDevice &device, vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer, - EventPtr core_op_activated_event, hailo_stream_interface_t stream_interface, hailo_status &status); + EventPtr core_op_activated_event, hailo_stream_interface_t stream_interface, + BounceBufferQueuePtr &&bounce_buffers_pool, hailo_status &status); virtual ~VdmaInputStream(); virtual hailo_stream_interface_t get_interface() const override; @@ -42,16 +52,12 @@ class VdmaInputStream : public AsyncInputStreamBase { virtual hailo_status activate_stream_impl() override; virtual hailo_status deactivate_stream_impl() override; - static std::unique_ptr init_dma_bounce_buffer_pool(vdma::BoundaryChannelPtr channel, - const LayerInfo &edge_layer, hailo_status &status); + static Expected init_dma_bounce_buffer_pool(VdmaDevice &device, + vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer); Expected align_transfer_request(TransferRequest &&transfer_request); VdmaDevice &m_device; - - // Buffer pool for DMA able bounce buffers - // TODO HRT-12542- create new class for bounce buffers - std::mutex m_dma_pool_mutex; - std::unique_ptr m_dma_bounce_buffer_pool; + BounceBufferQueuePtr m_bounce_buffers_pool; vdma::BoundaryChannelPtr m_channel; const hailo_stream_interface_t m_interface; diff --git a/hailort/libhailort/tracer_profiler.proto b/hailort/libhailort/tracer_profiler.proto index 5b7e37b0..d4d27c20 100644 --- a/hailort/libhailort/tracer_profiler.proto +++ b/hailort/libhailort/tracer_profiler.proto @@ -18,7 +18,8 @@ message ProtoProfilerTopHeader { string os_ver = 5; string cpu_arch = 6; uint64 sys_ram_size = 7; //bytes - uint64 time_stamp_since_epoch =8; //nanosec + uint64 time_stamp_since_epoch = 8; //nanosec + ProtoProfilerPcieInfo pcie_info = 9; } message ProtoTraceMessage { @@ -27,10 +28,12 @@ message ProtoTraceMessage { ProtoProfilerAddStreamTrace added_stream = 2; ProtoProfilerAddCoreOpTrace added_core_op = 3; ProtoProfilerAddDeviceTrace added_device = 4; - ProtoProfilerSwitchedCoreOpTrace switched_core_op = 5; + ProtoProfilerActivateCoreOpTrace activate_core_op = 5; ProtoProfilerFrameEnqueueTrace frame_enqueue = 6; ProtoProfilerFrameDequeueTrace frame_dequeue = 7; ProtoProfilerCoreOpSwitchDecision switch_core_op_decision = 8; + ProtoProfilerDeactivateCoreOpTrace deactivate_core_op = 9; + ProtoProfilerLoadedHefTrace loaded_hef = 10; } } @@ -89,11 +92,18 @@ message ProtoProfilerCoreOpSwitchDecision { bool switch_because_idle = 5; } -message ProtoProfilerSwitchedCoreOpTrace { +message ProtoProfilerActivateCoreOpTrace { uint64 time_stamp = 1; // nanosec int32 new_core_op_handle = 2; - string core_op_name = 3; - string device_id = 4; + string device_id = 3; + double duration = 4; //millisec +} + +message ProtoProfilerDeactivateCoreOpTrace { + uint64 time_stamp = 1; // nanosec + int32 core_op_handle = 2; + string device_id = 3; + double duration = 4; //millisec } // Low level streams adding @@ -121,3 +131,15 @@ message ProtoProfilerAddDeviceTrace { uint64 time_stamp = 1; // nanosec ProtoProfilerDeviceInfo device_info = 2; } + +message ProtoProfilerPcieInfo { + string lanes = 1; + string gen = 2; +} + +message ProtoProfilerLoadedHefTrace { + uint64 time_stamp = 1; // nanosec + string hef_name = 2; + string dfc_version = 3; + bytes hef_md5 = 4; +} diff --git a/hailort/prepare_externals/CMakeLists.txt b/hailort/prepare_externals/CMakeLists.txt index d73f9bd5..6166c4ce 100644 --- a/hailort/prepare_externals/CMakeLists.txt +++ b/hailort/prepare_externals/CMakeLists.txt @@ -11,6 +11,7 @@ include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/json.cmake) include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/dotwriter.cmake) include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/benchmark.cmake) include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/readerwriterqueue.cmake) +include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/eigen.cmake) include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/cli11.cmake) include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/protobuf.cmake) if(HAILO_BUILD_SERVICE) diff --git a/hailort/rpc/hailort_rpc.proto b/hailort/rpc/hailort_rpc.proto index b0c9dab8..a95d88ca 100644 --- a/hailort/rpc/hailort_rpc.proto +++ b/hailort/rpc/hailort_rpc.proto @@ -45,6 +45,7 @@ service ProtoHailoRtRpc { rpc ConfiguredNetworkGroup_set_nms_score_threshold(ConfiguredNetworkGroup_set_nms_score_threshold_Request) returns (ConfiguredNetworkGroup_set_nms_score_threshold_Reply) {} rpc ConfiguredNetworkGroup_set_nms_iou_threshold(ConfiguredNetworkGroup_set_nms_iou_threshold_Request) returns (ConfiguredNetworkGroup_set_nms_iou_threshold_Reply) {} rpc ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Request) returns (ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Reply) {} + rpc ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Request) returns (ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Reply) {} rpc InputVStreams_create (VStream_create_Request) returns (VStreams_create_Reply) {} @@ -79,6 +80,7 @@ service ProtoHailoRtRpc { rpc OutputVStream_set_nms_score_threshold (VStream_set_nms_score_threshold_Request) returns (VStream_set_nms_score_threshold_Reply) {} rpc OutputVStream_set_nms_iou_threshold (VStream_set_nms_iou_threshold_Request) returns (VStream_set_nms_iou_threshold_Reply) {} rpc OutputVStream_set_nms_max_proposals_per_class (VStream_set_nms_max_proposals_per_class_Request) returns (VStream_set_nms_max_proposals_per_class_Reply) {} + rpc OutputVStream_set_nms_max_accumulated_mask_size (VStream_set_nms_max_accumulated_mask_size_Request) returns (VStream_set_nms_max_accumulated_mask_size_Reply) {} } message empty {} @@ -117,7 +119,6 @@ message ProtoTransferRequest { string stream_name = 1; uint32 direction = 2; bytes data = 3; - uint32 size = 4; uint32 cb_idx = 5; } @@ -301,7 +302,7 @@ message ProtoNamedNetworkParams { message ProtoNmsShape { uint32 number_of_classes = 1; uint32 max_bbox_per_class = 2; - uint32 max_mask_size = 3; + uint32 max_accumulated_mask_size = 3; } message ProtoVStreamInfo { @@ -447,7 +448,8 @@ message ProtoYoloxPostProcessConfig { message ProtoYoloV5SegPostProcessConfig { double mask_threshold = 1; - string layer_name = 2; + uint32 max_accumulated_mask_size = 2; + string layer_name = 3; } message ProtoOpMetadata { @@ -773,6 +775,16 @@ message ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Reply { uint32 status = 1; } +message ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Request { + ProtoConfiguredNetworkGroupIdentifier identifier = 1; + string edge_name = 2; + uint32 max_accumulated_mask_size = 3; +} + +message ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Reply { + uint32 status = 1; +} + message ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request { ProtoConfiguredNetworkGroupIdentifier identifier = 1; string vstream_name = 2; @@ -951,4 +963,13 @@ message VStream_set_nms_max_proposals_per_class_Request { message VStream_set_nms_max_proposals_per_class_Reply { uint32 status = 1; +} + +message VStream_set_nms_max_accumulated_mask_size_Request { + ProtoVStreamIdentifier identifier = 1; + uint32 max_accumulated_mask_size = 2; +} + +message VStream_set_nms_max_accumulated_mask_size_Reply { + uint32 status = 1; } \ No newline at end of file diff --git a/hailort/scripts/download_firmware_eth.cmd b/hailort/scripts/download_firmware_eth.cmd index 593b252e..45446bc2 100644 --- a/hailort/scripts/download_firmware_eth.cmd +++ b/hailort/scripts/download_firmware_eth.cmd @@ -2,7 +2,7 @@ @ECHO OFF set BASE_URI=https://hailo-hailort.s3.eu-west-2.amazonaws.com -set HRT_VERSION=4.16.2 +set HRT_VERSION=4.17.0 set FW_DIR=Hailo8/%HRT_VERSION%/FW set FW=hailo8_fw.%HRT_VERSION%_eth.bin diff --git a/hailort/scripts/download_firmware_eth.sh b/hailort/scripts/download_firmware_eth.sh index c2104ead..53b776ce 100755 --- a/hailort/scripts/download_firmware_eth.sh +++ b/hailort/scripts/download_firmware_eth.sh @@ -2,7 +2,7 @@ set -e readonly BASE_URI="https://hailo-hailort.s3.eu-west-2.amazonaws.com" -readonly HRT_VERSION=4.16.2 +readonly HRT_VERSION=4.17.0 readonly FW_AWS_DIR="Hailo8/${HRT_VERSION}/FW" readonly FW="hailo8_fw.${HRT_VERSION}_eth.bin" diff --git a/hailort/scripts/download_hefs.cmd b/hailort/scripts/download_hefs.cmd index 663ffcc3..448e47db 100644 --- a/hailort/scripts/download_hefs.cmd +++ b/hailort/scripts/download_hefs.cmd @@ -1,11 +1,11 @@ :: cmd @ECHO OFF set BASE_URI=https://hailo-hailort.s3.eu-west-2.amazonaws.com -set HRT_VERSION=4.16.2 +set HRT_VERSION=4.17.0 set REMOTE_HEF_DIR=Hailo8/%HRT_VERSION%/HEFS set LOCAL_EXAMPLES_HEF_DIR=..\libhailort\examples\hefs set LOCAL_TUTORIALS_HEF_DIR=..\libhailort\bindings\python\platform\hailo_tutorials\hefs -set EXAMPLES_HEFS=(multi_network_shortcut_net.hef shortcut_net.hef) +set EXAMPLES_HEFS=(multi_network_shortcut_net.hef shortcut_net.hef shortcut_net_nv12.hef) set TUTORIALS_HEFS=(resnet_v1_18.hef shortcut_net.hef) if not exist %LOCAL_EXAMPLES_HEF_DIR% mkdir %LOCAL_EXAMPLES_HEF_DIR% diff --git a/hailort/scripts/download_hefs.sh b/hailort/scripts/download_hefs.sh index fe7af3ba..6019b38a 100755 --- a/hailort/scripts/download_hefs.sh +++ b/hailort/scripts/download_hefs.sh @@ -2,12 +2,13 @@ set -e readonly BASE_URI="https://hailo-hailort.s3.eu-west-2.amazonaws.com" -readonly HRT_VERSION=4.16.2 +readonly HRT_VERSION=4.17.0 readonly REMOTE_HEF_DIR="Hailo8/${HRT_VERSION}/HEFS" readonly LOCAL_EXAMPLES_HEF_DIR="../libhailort/examples/hefs" readonly LOCAL_TUTORIALS_HEF_DIR="../libhailort/bindings/python/platform/hailo_tutorials/hefs" readonly EXAMPLES_HEFS=( "shortcut_net.hef" + "shortcut_net_nv12.hef" "multi_network_shortcut_net.hef" ) readonly TUTORIALS_HEFS=( diff --git a/hailort/tools/hailo15-scripts/hailo15_env_vars.sh b/hailort/tools/hailo15-scripts/hailo15_env_vars.sh deleted file mode 100644 index 2756ad19..00000000 --- a/hailort/tools/hailo15-scripts/hailo15_env_vars.sh +++ /dev/null @@ -1,8 +0,0 @@ -#! /bin/bash -set -e - -# Environment declarations -script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) -local_platform_sw_path="$script_directory"/../../../ -h15="10.0.0.1" -ssh-copy-id root@$h15 \ No newline at end of file diff --git a/hailort/tools/hailo15-scripts/load_driver.sh b/hailort/tools/hailo15-scripts/load_driver.sh deleted file mode 100755 index 83fa1012..00000000 --- a/hailort/tools/hailo15-scripts/load_driver.sh +++ /dev/null @@ -1,13 +0,0 @@ -#! /bin/bash -set -e - -# Include Environment declarations -script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) -source "$script_directory"/hailo15_env_vars.sh - -cd $local_platform_sw_path -./install.sh comp build_integrated_nnc_driver -path="$local_platform_sw_path"/hailort/drivers/linux/integrated_nnc/hailo_integrated_nnc.ko -scp $path root@$h15:/lib/modules/5.15.32-yocto-standard/kernel/drivers/misc/hailo_integrated_nnc.ko - -ssh root@$h15 "modprobe -r hailo_integrated_nnc && modprobe hailo_integrated_nnc" diff --git a/hailort/tools/hailo15-scripts/load_firmware.sh b/hailort/tools/hailo15-scripts/load_firmware.sh deleted file mode 100755 index c5686308..00000000 --- a/hailort/tools/hailo15-scripts/load_firmware.sh +++ /dev/null @@ -1,11 +0,0 @@ -#! /bin/bash -set -e - -# Include Environment declarations -script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) -source "$script_directory"/hailo15_env_vars.sh - -cd $local_platform_sw_path -./install.sh comp build_fw --fw vpu --hw-arch hailo15 -scp firmware/vpu_firmware/build/hailo15_nnc_fw.bin root@$h15:/lib/firmware/hailo/hailo15_nnc_fw.bin -ssh root@$h15 "modprobe -r hailo_integrated_nnc && modprobe hailo_integrated_nnc" diff --git a/hailort/tools/hailo15-scripts/load_hrt.sh b/hailort/tools/hailo15-scripts/load_hrt.sh deleted file mode 100755 index e85594b8..00000000 --- a/hailort/tools/hailo15-scripts/load_hrt.sh +++ /dev/null @@ -1,15 +0,0 @@ -#! /bin/bash -set -e - -# Include Environment declarations -script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) -source "$script_directory"/hailo15_env_vars.sh - -cd $local_platform_sw_path -build_config=release -./build.sh -n pG -aaarch64 -b$build_config install - -scp lib/linux.aarch64.$build_config/libhailort.* root@$h15:/usr/lib/ -scp bin/linux.aarch64.$build_config/hailortcli root@$h15:/usr/bin/ -scp bin/linux.aarch64.$build_config/debalex root@$h15:/usr/bin/ -scp bin/linux.aarch64.$build_config/board_tests root@$h15:/usr/bin/ diff --git a/hailort/tools/hailo15-scripts/load_pcr.sh b/hailort/tools/hailo15-scripts/load_pcr.sh deleted file mode 100755 index 4123852e..00000000 --- a/hailort/tools/hailo15-scripts/load_pcr.sh +++ /dev/null @@ -1,12 +0,0 @@ -#! /bin/bash -set -e - -# Include Environment declarations -script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) -source "$script_directory"/hailo15_env_vars.sh - -cd $local_platform_sw_path -# Compile PCR -./install.sh comp build_infra_tools --arch aarch64 --build-hailort --build-type release - -scp platform_internals/hailo_platform_internals/low_level_tools/build/linux.aarch64.release/pcr/pcr root@$h15:/usr/bin/ diff --git a/hailort/tools/hailo15-scripts/read_log.sh b/hailort/tools/hailo15-scripts/read_log.sh deleted file mode 100755 index 398e00e9..00000000 --- a/hailort/tools/hailo15-scripts/read_log.sh +++ /dev/null @@ -1,15 +0,0 @@ -#! /bin/bash -set -e - -# Include Environment declarations -script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) -source "$script_directory"/hailo15_env_vars.sh - -cd $local_platform_sw_path -source hailo_platform_venv/bin/activate -ssh root@$h15 "hailortcli fw-logger /tmp/fw_log.dat" -scp root@$h15:/tmp/fw_log.dat /tmp -ssh root@$h15 "rm /tmp/fw_log.dat" - -python ./platform_internals/hailo_platform_internals/tools/firmware/tracelog_parser_tool/tracelog_parser_tool/parse_tracelog.py --fw vpu --core-log-entries firmware/vpu_firmware/build/hailo15_nnc_fw_*_log_entries.csv --core-only --raw-input-file /tmp/fw_log.dat - diff --git a/hailort/tools/hailo15-scripts/sanity_infer.sh b/hailort/tools/hailo15-scripts/sanity_infer.sh deleted file mode 100755 index 03935493..00000000 --- a/hailort/tools/hailo15-scripts/sanity_infer.sh +++ /dev/null @@ -1,8 +0,0 @@ -#! /bin/bash -set -e - -# Include Environment declarations -script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) -source "$script_directory"/hailo15_env_vars.sh - -ssh root@$h15 "hailortcli run /etc/hailo/hefs/hailo15/shortcut_net/28_28_3/shortcut_net.hef -c 1" diff --git a/hailort/tools/hailo15-scripts/update_hrt_and_infer.sh b/hailort/tools/hailo15-scripts/update_hrt_and_infer.sh deleted file mode 100755 index 4e8c93d6..00000000 --- a/hailort/tools/hailo15-scripts/update_hrt_and_infer.sh +++ /dev/null @@ -1,23 +0,0 @@ -#! /bin/bash -set -e - -# Include Environment declarations -script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) -source "$script_directory"/hailo15_env_vars.sh - -# Build hailo15 artifacts -/bin/bash "$script_directory"/load_hrt.sh - -# Build hailo15 PCR -/bin/bash "$script_directory"/load_pcr.sh - -# Build hailo15 fw -cd $local_platform_sw_path -./install.sh comp build_fw --fw vpu --hw-arch hailo15 -scp firmware/vpu_firmware/build/hailo15_nnc_fw.bin root@$h15:/lib/firmware/hailo/hailo15_nnc_fw.bin - -# Build integrated_nnc (hailo15) driver -/bin/bash "$script_directory"/load_driver.sh - -# Run sanity infer -/bin/bash "$script_directory"/sanity_infer.sh diff --git a/hailort/tools/hw_debug/CMakeLists.txt b/hailort/tools/hw_debug/CMakeLists.txt deleted file mode 100644 index 5cdfa811..00000000 --- a/hailort/tools/hw_debug/CMakeLists.txt +++ /dev/null @@ -1,59 +0,0 @@ -cmake_minimum_required(VERSION 3.0.0) - -include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/spdlog.cmake) -include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/cli11.cmake) - -set(FILES - main.cpp - shell.cpp - readline_wrapper.cpp - driver_memory.cpp - memory_commands.cpp - hailo15_fields.cpp - - # Depends on hailort_driver and its dependencies - ${HAILO_OS_DIR}/hailort_driver.cpp - ${HAILO_OS_DIR}/file_descriptor.cpp - ${HAILO_FULL_OS_DIR}/driver_scan.cpp -) - -if(WIN32) - # hailort_driver.cpp in windows depends on string_conversion - set(FILES ${FILES} - ${HAILORT_COMMON_OS_DIR}/string_conversion.cpp) -endif() - -add_executable(debalex ${FILES}) -target_compile_options(debalex PRIVATE ${HAILORT_COMPILE_OPTIONS}) -set_property(TARGET debalex PROPERTY CXX_STANDARD 14) -target_link_libraries(debalex PRIVATE - libhailort - spdlog::spdlog - CLI11::CLI11 - ) -target_include_directories(debalex - PRIVATE - ${HAILORT_COMMON_DIR} - ${HAILORT_SRC_DIR} - ${DRIVER_INC_DIR} -) - -if(CMAKE_SYSTEM_NAME STREQUAL QNX) - target_link_libraries(debalex PRIVATE pci) -endif() - -find_path(READLINE_INCLUDE_DIR NAMES readline/readline.h) -find_library(READLINE_LIBRARY NAMES readline) - -if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY) - target_link_libraries(debalex PRIVATE ${READLINE_LIBRARY}) - target_include_directories(debalex PRIVATE ${READLINE_INCLUDE_DIR}) - add_definitions(-DUSE_READLINE) -else() - message(WARNING "Could not find readline library. To better UI, please install it by calling `sudo apt install libreadline6-dev`") -endif() - -install(TARGETS debalex - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} -) -cli11_install_completion_file(debalex) \ No newline at end of file diff --git a/hailort/tools/hw_debug/driver_memory.cpp b/hailort/tools/hw_debug/driver_memory.cpp deleted file mode 100644 index 83d34396..00000000 --- a/hailort/tools/hw_debug/driver_memory.cpp +++ /dev/null @@ -1,155 +0,0 @@ -/** - * @file driver_memory.cpp - * @brief Implements MemorySource over HailoRT driver, reads/write all interfaces. - */ - -#include "driver_memory.hpp" -#include "hailo15_fields.hpp" - -DriverMemorySource::DriverMemorySource(std::shared_ptr driver, HailoRTDriver::MemoryType memory_type) : - m_driver(driver), - m_memory_type(memory_type) -{} - -hailo_status DriverMemorySource::read(uint64_t offset, uint8_t *data, size_t size) -{ - return m_driver->read_memory(m_memory_type, offset, data, size); -} - -hailo_status DriverMemorySource::write(uint64_t offset, const uint8_t *data, size_t size) -{ - return m_driver->write_memory(m_memory_type, offset, data, size); -} - -size_t DriverMemorySource::total_size() const -{ - // TODO HRT-7984: return the actual size - return std::numeric_limits::max(); -} - - -static constexpr size_t VDMA_CHANNELS_COUNT = 32; -static constexpr size_t VDMA_H2D_CHANNELS_COUNT = 16; - -#pragma pack(push, 1) -struct VdmaDataPerDirection { - // Control - uint64_t start_abort : 1; - uint64_t pause_resume : 1; - uint64_t abort_on_err : 1; - uint64_t reserved0 : 2; - uint64_t irq_on_err : 1; - uint64_t irq_on_host : 1; - uint64_t irq_on_device : 1; - - // Depth id - uint64_t id : 3; - uint64_t depth : 4; - uint64_t reserved1 : 1; - - uint64_t num_available : 16; - uint64_t num_processed : 16; - uint64_t num_ongoing : 16; - - uint64_t error : 8; - uint64_t reserved2 : 8; - uint64_t desc_address : 48; -}; -static_assert(0x10 == sizeof(VdmaDataPerDirection), "Invalid VdmaDataPerDirection size"); - -struct VdmaChannelData { - VdmaDataPerDirection src; - VdmaDataPerDirection dest; -}; -#pragma pack(pop) - -class VdmaChannelField : public Field { -public: - VdmaChannelField() : - Field("channel", "vDMA channel register") - {} - - virtual size_t elements_count() const - { - return VDMA_CHANNELS_COUNT; - }; - - virtual std::string print_element(MemorySource& memory, size_t index) const - { - assert(index < elements_count()); - VdmaChannelData data{}; - auto status = memory.read(index * sizeof(data), reinterpret_cast(&data), sizeof(data)); - if (HAILO_SUCCESS != status) { - throw std::runtime_error(fmt::format("Failed reading memory, status {}", status)); - } - - return fmt::format("channel[{}] (offset=0x{:X} size=0x{:X} type= {}):\n", index, index * sizeof(data), sizeof(data), - index < VDMA_H2D_CHANNELS_COUNT ? "H2D" : "D2H") + - fmt::format(" Src status: {}\n", print_src_status(data.src)) + - fmt::format(" Dest status: {}\n", print_dest_status(data.dest)) + - fmt::format(" Src: {}\n", print_direction(data.src)) + - fmt::format(" Dest: {}\n", print_direction(data.dest)); - } - -private: - static std::string print_src_status(const VdmaDataPerDirection &data) { - auto max_desc_mask = static_cast((1 << data.depth) - 1); - std::string status = - data.error ? "CHANNEL ERROR" : - !data.start_abort ? "ABORTED" : - data.pause_resume ? "PAUSED" : - (data.num_ongoing & max_desc_mask) != (data.num_processed & max_desc_mask) ? "DURING TRANSFER" : - (data.num_available & max_desc_mask) != (data.num_processed & max_desc_mask) ? "WAITING TO SEND" : - "IDLE"; - return status; - } - - static std::string print_dest_status(const VdmaDataPerDirection &data) { - auto max_desc_mask = static_cast((1 << data.depth) - 1); - std::string status = - data.error ? "CHANNEL ERROR" : - !data.start_abort ? "ABORTED" : - data.pause_resume ? "PAUSED" : - (data.num_ongoing & max_desc_mask) != (data.num_processed & max_desc_mask) ? "DURING TRANSFER" : - (data.num_available & max_desc_mask) != (data.num_processed & max_desc_mask) ? "WAITING TO RECEIVE" : - "IDLE"; - return status; - } - - static std::string print_direction(const VdmaDataPerDirection &data) - { - return fmt::format( - "control=({} | {}) id={} depth={:02} num_avail=0x{:04X} num_proc=0x{:04X} num_ongoing=0x{:04X} err=0x{:02X} desc_address=0x{:016X}", - data.start_abort ? "START" : "ABORT", - data.pause_resume ? "PAUSE" : "RESUME", - data.id, - data.depth, - data.num_available, - data.num_processed, - data.num_ongoing, - data.error, - data.desc_address << DESC_ADDRESS_SHIFT); - } - - static constexpr size_t DESC_ADDRESS_SHIFT = 16; -}; - -VdmaMemorySource::VdmaMemorySource(std::shared_ptr driver, MemoryType memory_type) : - DriverMemorySource(std::move(driver), memory_type) -{ - add_field(std::make_shared()); -} - -size_t VdmaMemorySource::total_size() const -{ - return VDMA_CHANNELS_COUNT * sizeof(VdmaChannelData); -} - -DramDmaEngineMemorySource::DramDmaEngineMemorySource(std::shared_ptr driver, MemoryType memory_type) : - DriverMemorySource(std::move(driver), memory_type) -{ - add_field(std::make_shared()); - add_field(std::make_shared()); - add_field(std::make_shared()); - add_field(std::make_shared()); -} \ No newline at end of file diff --git a/hailort/tools/hw_debug/driver_memory.hpp b/hailort/tools/hw_debug/driver_memory.hpp deleted file mode 100644 index 60e6a650..00000000 --- a/hailort/tools/hw_debug/driver_memory.hpp +++ /dev/null @@ -1,39 +0,0 @@ -/** - * @file driver_memory.hpp - * @brief Implements MemorySource over HailoRT driver, reads/write all interfaces. - */ - -#ifndef _HW_DEBUG_DRIVER_MEMORY_HPP_ -#define _HW_DEBUG_DRIVER_MEMORY_HPP_ - -#include "memory_commands.hpp" -#include "os/hailort_driver.hpp" - -using hailort::HailoRTDriver; -using MemoryType = HailoRTDriver::MemoryType; - -class DriverMemorySource : public MemorySource { -public: - DriverMemorySource(std::shared_ptr driver, MemoryType memory_type); - - hailo_status read(uint64_t offset, uint8_t *data, size_t size) override; - hailo_status write(uint64_t offset, const uint8_t *data, size_t size) override; - size_t total_size() const override; - -private: - std::shared_ptr m_driver; - MemoryType m_memory_type; -}; - -class VdmaMemorySource : public DriverMemorySource { -public: - VdmaMemorySource(std::shared_ptr driver, MemoryType memory_type); - size_t total_size() const override; -}; - -class DramDmaEngineMemorySource : public DriverMemorySource { -public: - DramDmaEngineMemorySource(std::shared_ptr driver, MemoryType memory_type); -}; - -#endif /* _HW_DEBUG_DRIVER_MEMORY_HPP_ */ diff --git a/hailort/tools/hw_debug/hailo15_fields.cpp b/hailort/tools/hw_debug/hailo15_fields.cpp deleted file mode 100644 index d4df9dd6..00000000 --- a/hailort/tools/hw_debug/hailo15_fields.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/** - * @file hailo15_fields.cpp - * @brief Contains all memory fields related to hailo15 - */ - -#include "hailo15_fields.hpp" -#include "hw_consts/hailo15/dram_dma_engine_config_regs.h" - -// Implement our own offsetof to allow access to array -#define my_offsetof(type,field) ((size_t)(&(((type*)(0))->field))) -#define dram_dma_offsetof(field) my_offsetof(DRAM_DMA_ENGINE_CONFIG_t, field) - - -static constexpr auto CCB_ADDRESS_SHIFT = 9; - - -QddcField::QddcField() : - Field("qddc", "Queue dest device channel (qddc)") -{} - -size_t QddcField::elements_count() const -{ - return DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH; -} - -std::string QddcField::print_element(MemorySource& memory, size_t index) const -{ - return fmt::format("qddc[{}] enabled={} mode={} shmifo_id={}\n", index, - is_enabled(memory, index), mode(memory, index), shmifo_id(memory, index)); -} - -bool QddcField::is_enabled(MemorySource &memory, size_t index) const -{ - return (1 == memory.read(dram_dma_offsetof(QddcEnable[index]))); -} - -uint32_t QddcField::shmifo_id(MemorySource &memory, size_t index) const -{ - return memory.read(dram_dma_offsetof(QddcShmifoId[index])); -} - -std::string QddcField::mode(MemorySource &memory, size_t index) const -{ - const auto mode = memory.read(dram_dma_offsetof(QddcMode[index])); - switch (mode) { - case 0: return "CONTINUOUS"; - case 1: return "BURST"; - default: - return fmt::format("Unknown {}", mode); - } -} - -QsdcField::QsdcField() : - Field("qsdc", "Queue source device channel (qsdc)") -{} - -size_t QsdcField::elements_count() const -{ - return DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH; -} - -std::string QsdcField::print_element(MemorySource& memory, size_t index) const -{ - return fmt::format("qsdc[{}] enabled={} shmifo_id={}\n", index, - is_enabled(memory, index), shmifo_id(memory, index)); -} - -bool QsdcField::is_enabled(MemorySource &memory, size_t index) const -{ - return (1 == memory.read(dram_dma_offsetof(QsdcEnable[index]))); -} - -uint32_t QsdcField::shmifo_id(MemorySource &memory, size_t index) const -{ - return memory.read(dram_dma_offsetof(QsdcShmifoId[index])); -} - -QdmcField::QdmcField() : - Field("qdmc", "Queue dest memory channel (qdmc)") -{} - -size_t QdmcField::elements_count() const -{ - return DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH; -} - -std::string QdmcField::print_element(MemorySource& memory, size_t index) const -{ - return fmt::format("qdmc[{}] enabled={} address=0x{:x} desc_count={} desc_per_irq={}\n", index, - is_enabled(memory, index), base_address(memory, index), descriptors_count(memory, index), - descriptors_per_irq(memory, index)); -} - -bool QdmcField::is_enabled(MemorySource &memory, size_t index) const -{ - return (1 == memory.read(dram_dma_offsetof(QdmcEnable[index]))); -} - -uint64_t QdmcField::base_address(MemorySource &memory, size_t index) const -{ - const uint64_t address = memory.read(dram_dma_offsetof(QdmcMemBaseAddr[index])); - return address << CCB_ADDRESS_SHIFT; -} - -uint32_t QdmcField::descriptors_count(MemorySource &memory, size_t index) const -{ - if (index > DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_REGULAR_CH) { - return memory.read(dram_dma_offsetof(QdmcMemCcbSize[index - DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_REGULAR_CH])); - } - else { - const auto desc_count_log2 = memory.read(dram_dma_offsetof(QdmcMemCcbSizeLog2[index])); - uint32_t size = 1; - for (uint32_t i = 0; i < desc_count_log2; i++) { - size <<= 1; - } - return size; - } -} - -uint32_t QdmcField::descriptors_per_irq(MemorySource &memory, size_t index) const -{ - return memory.read(dram_dma_offsetof(QdmcDescCsInterrupt[index])); -} - -QsmcField::QsmcField() : - Field("qsmc", "Queue source memory channel (qsmc)") -{} - -size_t QsmcField::elements_count() const -{ - return DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH; -} - -std::string QsmcField::print_element(MemorySource& memory, size_t index) const -{ - return fmt::format("qdmc[{}] mode={} enabled={} address=0x{:x} desc_count={}\n", index, - mode(memory, index), is_enabled(memory, index), base_address(memory, index), descriptors_count(memory, index)); -} - -bool QsmcField::is_enabled(MemorySource &memory, size_t index) const -{ - return (1 == memory.read(dram_dma_offsetof(QsmcEnable[index]))); -} - -uint64_t QsmcField::base_address(MemorySource &memory, size_t index) const -{ - const uint64_t address = memory.read(dram_dma_offsetof(QsmcMemBaseAddr[index])); - return address << CCB_ADDRESS_SHIFT; -} - -uint32_t QsmcField::descriptors_count(MemorySource &memory, size_t index) const -{ - const auto desc_count = memory.read(dram_dma_offsetof(QsmcMemCcbSize[index])); - return desc_count + 1; // The reg contains desc_count-1 -} - -std::string QsmcField::mode(MemorySource &memory, size_t index) const -{ - const auto mode = memory.read(dram_dma_offsetof(QsmcMode[index])); - switch (mode) { - case 0: return "CONTINUOUS"; - case 2: return "BURST"; - case 3: // C2C mode - { - auto c2c_sel = memory.read(dram_dma_offsetof(QsmcC2cSel[index])); - return fmt::format("C2C (from {})", c2c_sel); - } - default: - return fmt::format("Unknown {}", mode); - } -} diff --git a/hailort/tools/hw_debug/hailo15_fields.hpp b/hailort/tools/hw_debug/hailo15_fields.hpp deleted file mode 100644 index a159f01a..00000000 --- a/hailort/tools/hw_debug/hailo15_fields.hpp +++ /dev/null @@ -1,66 +0,0 @@ -/** - * @file hailo15_fields.hpp - * @brief Contains all memory fields related to hailo15 - */ - -#ifndef _HW_DEBUG_HAILO15_FIELDS_H_ -#define _HW_DEBUG_HAILO15_FIELDS_H_ - -#include "memory_commands.hpp" - - -class QddcField : public Field { -public: - QddcField(); - - virtual size_t elements_count() const override; - virtual std::string print_element(MemorySource& memory, size_t index) const override; - -private: - bool is_enabled(MemorySource &memory, size_t index) const; - uint32_t shmifo_id(MemorySource &memory, size_t index) const; - std::string mode(MemorySource &memory, size_t index) const; -}; - -class QsdcField : public Field { -public: - QsdcField(); - - virtual size_t elements_count() const override; - virtual std::string print_element(MemorySource& memory, size_t index) const override; - -private: - bool is_enabled(MemorySource &memory, size_t index) const; - uint32_t shmifo_id(MemorySource &memory, size_t index) const; -}; - - -class QdmcField : public Field { -public: - QdmcField(); - - virtual size_t elements_count() const override; - virtual std::string print_element(MemorySource& memory, size_t index) const override; - -private: - bool is_enabled(MemorySource &memory, size_t index) const; - uint64_t base_address(MemorySource &memory, size_t index) const; - uint32_t descriptors_count(MemorySource &memory, size_t index) const; - uint32_t descriptors_per_irq(MemorySource &memory, size_t index) const; -}; - -class QsmcField : public Field { -public: - QsmcField(); - - virtual size_t elements_count() const override; - virtual std::string print_element(MemorySource& memory, size_t index) const override; - -private: - bool is_enabled(MemorySource &memory, size_t index) const; - uint64_t base_address(MemorySource &memory, size_t index) const; - uint32_t descriptors_count(MemorySource &memory, size_t index) const; - std::string mode(MemorySource &memory, size_t index) const; -}; - -#endif /* _HW_DEBUG_HAILO15_FIELDS_H_ */ diff --git a/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_engine_config_macros.h b/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_engine_config_macros.h deleted file mode 100644 index 889f3cca..00000000 --- a/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_engine_config_macros.h +++ /dev/null @@ -1,2270 +0,0 @@ -/*------------------------------------------------------------------------------------- -// Copyright (c) 2022 by Hailotech This model is the confidential and -// proprietary property of Hailotech and the possession or use of this -// file requires a written license from Hailotech. --------------------------------------------------------------------------------------*/ - - - -#include - -#ifndef DRAM_DMA_ENGINE_CONFIG_MACRO_H -#define DRAM_DMA_ENGINE_CONFIG_MACRO_H - - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDCENABLE : val */ -/* Description: Enable per channel,when disabled do not give credits to vDMA */ -#define DRAM_DMA_ENGINE_CONFIG__QDDCENABLE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDCENABLE__VAL__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__QDDCENABLE__VAL__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__QDDCENABLE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDDCENABLE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDDCENABLE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__QDDCENABLE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__QDDCENABLE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDCRESET : val */ -/* Description: Soft reset per channel,when write 1'b1 should clear all internal credits/counter/status. Should be set when channel is disabled,usually with vDMA channel reset (abort). Write 1'b0 should do nothing. Read always return 1'b0. Implemented as external register type. */ -#define DRAM_DMA_ENGINE_CONFIG__QDDCRESET__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDCRESET__VAL__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__QDDCRESET__VAL__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__QDDCRESET__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDDCRESET__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDDCRESET__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__QDDCRESET__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__QDDCRESET__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDCMODE : val */ -/* Description: 0 - CONT_MODE. 1 - BURST_MODE */ -#define DRAM_DMA_ENGINE_CONFIG__QDDCMODE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDCMODE__VAL__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__QDDCMODE__VAL__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__QDDCMODE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDDCMODE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDDCMODE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__QDDCMODE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__QDDCMODE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDCADDBURSTVAL : val */ -/* Description: Writing to this register increment the remain burst counter in QDDC by QddcAddBurstVal x 8 Bytes: RemainBurstCount += QddcAddBurstVal. Reading this register should return the current available credit counter (RemainBurstCount) in 2s complement format - can be negative. Implemented as external register type. */ -#define DRAM_DMA_ENGINE_CONFIG__QDDCADDBURSTVAL__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDCADDBURSTVAL__VAL__WIDTH (27) -#define DRAM_DMA_ENGINE_CONFIG__QDDCADDBURSTVAL__VAL__MASK (0x07FFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDCADDBURSTVAL__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDDCADDBURSTVAL__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x07FFFFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDDCADDBURSTVAL__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x07FFFFFFL) | (((uint32_t)(value) << 0) & 0x07FFFFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDDCADDBURSTVAL__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x07FFFFFFL) | 0x07FFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDCADDBURSTVAL__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x07FFFFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDCMAXDESC : val */ -/* Description: Maximum in flight descriptors,this is a TH for number of descriptors the QM might give the vDMA. 3'd0 - 1 descriptor (debug mode). 3'd1 - N_QM_DESC*1/8 (2). 3'd2 - N_QM_DESC*2/8 (4). 3'd3 - N_QM_DESC*3/8 (6). 3'd4 - N_QM_DESC*2/4 (8). 3'd5 - N_QM_DESC*5/8 (10). 3'd6 - N_QM_DESC*6/8 (12). 3'd7 - N_QM_DESC-1 (15-maximum),default. */ -#define DRAM_DMA_ENGINE_CONFIG__QDDCMAXDESC__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDCMAXDESC__VAL__WIDTH (3) -#define DRAM_DMA_ENGINE_CONFIG__QDDCMAXDESC__VAL__MASK (0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__QDDCMAXDESC__VAL__RESET (0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__QDDCMAXDESC__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000007L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDDCMAXDESC__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L)) -#define DRAM_DMA_ENGINE_CONFIG__QDDCMAXDESC__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__QDDCMAXDESC__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDCSHMIFOID : val */ -/* Description: The RX-SHMIFO ID. Used to know the SHMIFO base address (from a global parameter/define) and used to select the correct SHMIFO credit signal (nn_core_inbound_buffer_ready_pulse). 0-19: for DSM-RX 0-19. 20-23: for CSM 0-3. 24-30: reserved. 31: NULL ignore any credit from NN Core. */ -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOID__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOID__VAL__WIDTH (5) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOID__VAL__MASK (0x0000001FL) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOID__VAL__RESET (0x0000001FL) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOID__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000001FL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOID__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000001FL) | (((uint32_t)(value) << 0) & 0x0000001FL)) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOID__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000001FL) | 0x0000001FL) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOID__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000001FL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDCSHMIFOCREDITSIZE : val */ -/* Description: The credit size in 8B granularity minus 1. 0 - indicates 8B 1 - indicates 16B ... 10'd1023 - indicates 8kB */ -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOCREDITSIZE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOCREDITSIZE__VAL__WIDTH (10) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOCREDITSIZE__VAL__MASK (0x000003FFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOCREDITSIZE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOCREDITSIZE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000003FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOCREDITSIZE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000003FFL) | (((uint32_t)(value) << 0) & 0x000003FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOCREDITSIZE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000003FFL) | 0x000003FFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOCREDITSIZE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000003FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDCSHMIFOINITCREDIT : val */ -/* Description: Writing to this register set the amount of credit from SHMIFO RX (AvailableCredits),used to configure the initial amount of credits,reading this register should return the value of AvailableCredits. Implemented as external register type. */ -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOINITCREDIT__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOINITCREDIT__VAL__WIDTH (13) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOINITCREDIT__VAL__MASK (0x00001FFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOINITCREDIT__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOINITCREDIT__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00001FFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOINITCREDIT__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00001FFFL) | (((uint32_t)(value) << 0) & 0x00001FFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOINITCREDIT__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00001FFFL) | 0x00001FFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOINITCREDIT__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00001FFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDCENABLE : val */ -/* Description: Enable per channel,when disabled do not give credits to vDMA */ -#define DRAM_DMA_ENGINE_CONFIG__QSDCENABLE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCENABLE__VAL__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__QSDCENABLE__VAL__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCENABLE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCENABLE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCENABLE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__QSDCENABLE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__QSDCENABLE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDCRESET : val */ -/* Description: Soft reset per channel,when write 1'b1 should clear all internal credits/counter/status. Should be set when channel is disabled,usually with vDMA channel reset (abort). Write 1'b0 should do nothing. Read always return 1'b0. Implemented as external register type. */ -#define DRAM_DMA_ENGINE_CONFIG__QSDCRESET__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCRESET__VAL__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__QSDCRESET__VAL__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCRESET__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCRESET__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCRESET__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__QSDCRESET__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__QSDCRESET__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDCMAXDESC : val */ -/* Description: Maximum in flight descriptors,this is a TH for number of descriptors the QM might give the vDMA. 3'd0 - 1 descriptor (debug mode). 3'd1 - N_QM_DESC*1/8 (2). 3'd2 - N_QM_DESC*2/8 (4). 3'd3 - N_QM_DESC*3/8 (6). 3'd4 - N_QM_DESC*4/8 (8). 3'd5 - N_QM_DESC*5/8 (10). 3'd6 - N_QM_DESC*6/8 (12). 3'd7 - N_QM_DESC-1 (15-maximum),default. */ -#define DRAM_DMA_ENGINE_CONFIG__QSDCMAXDESC__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCMAXDESC__VAL__WIDTH (3) -#define DRAM_DMA_ENGINE_CONFIG__QSDCMAXDESC__VAL__MASK (0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCMAXDESC__VAL__RESET (0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCMAXDESC__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000007L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCMAXDESC__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L)) -#define DRAM_DMA_ENGINE_CONFIG__QSDCMAXDESC__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCMAXDESC__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDCSHMIFOID : val */ -/* Description: The TX-SHMIFO ID. Used to know the SHMIFO base address (from a global parameter/define) and used to select the correct SHMIFO credit signal (nn_core_outbound_buffer_valid_pulse). 0-19: for DSM-TX 0-19. 20-30: reserved. 31: NULL ignore any credit from NN Core. */ -#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOID__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOID__VAL__WIDTH (5) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOID__VAL__MASK (0x0000001FL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOID__VAL__RESET (0x0000001FL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOID__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000001FL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOID__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000001FL) | (((uint32_t)(value) << 0) & 0x0000001FL)) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOID__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000001FL) | 0x0000001FL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOID__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000001FL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDCSHMIFOCREDITSIZE : val */ -/* Description: The credit size in 8B granularity minus 1. 0 - indicates 8B 1 - indicates 16B ... 10'd1023 - indicates 8kB */ -#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOCREDITSIZE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOCREDITSIZE__VAL__WIDTH (10) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOCREDITSIZE__VAL__MASK (0x000003FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOCREDITSIZE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOCREDITSIZE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000003FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOCREDITSIZE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000003FFL) | (((uint32_t)(value) << 0) & 0x000003FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOCREDITSIZE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000003FFL) | 0x000003FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOCREDITSIZE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000003FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDCFULLNUMPATTERNS : val */ -/* Description: Number of patterns per pattern ID minus one. 0 - one pattern,1 - two patterns,...,3 - four patterns. */ -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLNUMPATTERNS__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLNUMPATTERNS__VAL__WIDTH (2) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLNUMPATTERNS__VAL__MASK (0x00000003L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLNUMPATTERNS__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLNUMPATTERNS__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000003L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLNUMPATTERNS__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000003L) | (((uint32_t)(value) << 0) & 0x00000003L)) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLNUMPATTERNS__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000003L) | 0x00000003L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLNUMPATTERNS__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000003L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDCFULLPATTERNNUMLINES : val */ -/* Description: Number of lines per pattern. */ -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMLINES__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMLINES__VAL__WIDTH (18) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMLINES__VAL__MASK (0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMLINES__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMLINES__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMLINES__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMLINES__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMLINES__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDCFULLPATTERNNUMPAGES : val */ -/* Description: Number of pages per line. */ -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMPAGES__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMPAGES__VAL__WIDTH (18) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMPAGES__VAL__MASK (0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMPAGES__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMPAGES__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMPAGES__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMPAGES__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMPAGES__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDCFULLPATTERNPAGESIZE : val */ -/* Description: page size in 8B granularity,minus one,per pattern. 0-8B,1-16B,...,511-4kB */ -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNPAGESIZE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNPAGESIZE__VAL__WIDTH (9) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNPAGESIZE__VAL__MASK (0x000001FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNPAGESIZE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNPAGESIZE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000001FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNPAGESIZE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000001FFL) | (((uint32_t)(value) << 0) & 0x000001FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNPAGESIZE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000001FFL) | 0x000001FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNPAGESIZE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000001FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDCFULLPATTERNRESIDUEPAGESIZE : val */ -/* Description: Residue page size in 8B granularity,minus one,per pattern. 0-8B,1-16B,...,511-4kB */ -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNRESIDUEPAGESIZE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNRESIDUEPAGESIZE__VAL__WIDTH (9) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNRESIDUEPAGESIZE__VAL__MASK (0x000001FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNRESIDUEPAGESIZE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNRESIDUEPAGESIZE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000001FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNRESIDUEPAGESIZE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000001FFL) | (((uint32_t)(value) << 0) & 0x000001FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNRESIDUEPAGESIZE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000001FFL) | 0x000001FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNRESIDUEPAGESIZE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000001FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDCSIMPPATTERNNUMPAGES : val */ -/* Description: Number of pages per line (simplified pattern has single line/pattern). */ -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNNUMPAGES__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNNUMPAGES__VAL__WIDTH (18) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNNUMPAGES__VAL__MASK (0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNNUMPAGES__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNNUMPAGES__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNNUMPAGES__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNNUMPAGES__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNNUMPAGES__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDCSIMPPATTERNPAGESIZE : val */ -/* Description: Log2(Page size/512B),valid values are 0 to PAGE_SIZE_MAX-10. 0 - 512B,1 - 1kB,2 - 2kB,3 - 4kB */ -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNPAGESIZE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNPAGESIZE__VAL__WIDTH (2) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNPAGESIZE__VAL__MASK (0x00000003L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNPAGESIZE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNPAGESIZE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000003L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNPAGESIZE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000003L) | (((uint32_t)(value) << 0) & 0x00000003L)) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNPAGESIZE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000003L) | 0x00000003L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNPAGESIZE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000003L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDCSIMPPATTERNRESIDUEPAGESIZE : val */ -/* Description: Residue page size in 8B granularity,minus one,per pattern. 0-8B,1-16B,...,511-4kB */ -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNRESIDUEPAGESIZE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNRESIDUEPAGESIZE__VAL__WIDTH (9) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNRESIDUEPAGESIZE__VAL__MASK (0x000001FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNRESIDUEPAGESIZE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNRESIDUEPAGESIZE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000001FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNRESIDUEPAGESIZE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000001FFL) | (((uint32_t)(value) << 0) & 0x000001FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNRESIDUEPAGESIZE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000001FFL) | 0x000001FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNRESIDUEPAGESIZE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000001FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDMCENABLE : val */ -/* Description: Enable per channel,when disabled do not give credits to vDMA */ -#define DRAM_DMA_ENGINE_CONFIG__QDMCENABLE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCENABLE__VAL__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__QDMCENABLE__VAL__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__QDMCENABLE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDMCENABLE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCENABLE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__QDMCENABLE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__QDMCENABLE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDMCRESET : val */ -/* Description: Soft reset per channel,when write 1'b1 should clear all internal credits/counter/status. Should be set when channel is disabled,usually with vDMA channel reset (abort). Write 1'b0 should do nothing. Read always return 1'b0. Implemented as external register type. */ -#define DRAM_DMA_ENGINE_CONFIG__QDMCRESET__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCRESET__VAL__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__QDMCRESET__VAL__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__QDMCRESET__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDMCRESET__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCRESET__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__QDMCRESET__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__QDMCRESET__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDMCMEMBASEADDR : val */ -/* Description: Base address to the CCB in the DDR memory space. aligned to minimum page size of 512B. */ -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMBASEADDR__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMBASEADDR__VAL__WIDTH (26) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMBASEADDR__VAL__MASK (0x03FFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMBASEADDR__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMBASEADDR__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x03FFFFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMBASEADDR__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x03FFFFFFL) | (((uint32_t)(value) << 0) & 0x03FFFFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMBASEADDR__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x03FFFFFFL) | 0x03FFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMBASEADDR__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x03FFFFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDMCMEMCCBSIZELOG2 : val */ -/* Description: The CCB size Log2(memory size/512B): 1 - 1kB (2 pages). 2 - 2kB. valid values are 1 to W_CCB_DESC_INDEX */ -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZELOG2__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZELOG2__VAL__WIDTH (5) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZELOG2__VAL__MASK (0x0000001FL) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZELOG2__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZELOG2__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000001FL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZELOG2__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000001FL) | (((uint32_t)(value) << 0) & 0x0000001FL)) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZELOG2__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000001FL) | 0x0000001FL) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZELOG2__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000001FL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDMCDESCCSINTERRUPT : val */ -/* Description: When > 0 the QDMC will interrupt the CS manager every written QdmcDescCsInterrupt descriptors. */ -#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCCSINTERRUPT__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCCSINTERRUPT__VAL__WIDTH (18) -#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCCSINTERRUPT__VAL__MASK (0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCCSINTERRUPT__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCCSINTERRUPT__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCCSINTERRUPT__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCCSINTERRUPT__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCCSINTERRUPT__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDMCBANKINTERLEAVEMODE : val */ -/* Description: Select the bank interleave mode: 2'd0 - interleave 8 banks (default),2'd1 - Interleave 4 banks,2'd2 - Interleave 2 banks,2'd3 - no interleave. */ -#define DRAM_DMA_ENGINE_CONFIG__QDMCBANKINTERLEAVEMODE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCBANKINTERLEAVEMODE__VAL__WIDTH (2) -#define DRAM_DMA_ENGINE_CONFIG__QDMCBANKINTERLEAVEMODE__VAL__MASK (0x00000003L) -#define DRAM_DMA_ENGINE_CONFIG__QDMCBANKINTERLEAVEMODE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDMCBANKINTERLEAVEMODE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000003L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCBANKINTERLEAVEMODE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000003L) | (((uint32_t)(value) << 0) & 0x00000003L)) -#define DRAM_DMA_ENGINE_CONFIG__QDMCBANKINTERLEAVEMODE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000003L) | 0x00000003L) -#define DRAM_DMA_ENGINE_CONFIG__QDMCBANKINTERLEAVEMODE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000003L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDMCMODE : val */ -/* Description: 0 - CONT_MODE. 1 - BURST_MODE */ -#define DRAM_DMA_ENGINE_CONFIG__QDMCMODE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMODE__VAL__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMODE__VAL__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMODE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMODE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMODE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMODE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMODE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDMCADDBURSTVAL : val */ -/* Description: Writing to this register increment the available descriptor counter in QDMC by QdmcAddBurstVal descriptors: AvailableDescsCounter += QdmcAddBurstVal. Reading this register should return the current available descriptors counter (AvailableDescsCounter). Implemented as external register type. */ -#define DRAM_DMA_ENGINE_CONFIG__QDMCADDBURSTVAL__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCADDBURSTVAL__VAL__WIDTH (18) -#define DRAM_DMA_ENGINE_CONFIG__QDMCADDBURSTVAL__VAL__MASK (0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMCADDBURSTVAL__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDMCADDBURSTVAL__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCADDBURSTVAL__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDMCADDBURSTVAL__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMCADDBURSTVAL__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDMCMEMCCBSIZE : val */ -/* Description: The CCB size Log2(memory size/512B): 1 - 1kB (2 pages). 2 - 2kB. valid values are 1 to W_CCB_DESC_INDEX */ -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZE__VAL__WIDTH (18) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZE__VAL__MASK (0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDMCDESCPERIPHINTERRUPT : val */ -/* Description: When > 0 the QDMC will interrupt the peripheral every written QdmcDescPeriphInterrupt descriptors. */ -#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCPERIPHINTERRUPT__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCPERIPHINTERRUPT__VAL__WIDTH (18) -#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCPERIPHINTERRUPT__VAL__MASK (0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCPERIPHINTERRUPT__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCPERIPHINTERRUPT__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCPERIPHINTERRUPT__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCPERIPHINTERRUPT__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCPERIPHINTERRUPT__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDMCCCBPROCESSEDINDEX : val */ -/* Description: Used by the peripheral to indicates how many data is ready in the CCB (process). This is the CcbIndex (free pointer in CCB). */ -#define DRAM_DMA_ENGINE_CONFIG__QDMCCCBPROCESSEDINDEX__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDMCCCBPROCESSEDINDEX__VAL__WIDTH (18) -#define DRAM_DMA_ENGINE_CONFIG__QDMCCCBPROCESSEDINDEX__VAL__MASK (0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMCCCBPROCESSEDINDEX__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDMCCCBPROCESSEDINDEX__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMCENABLE : val */ -/* Description: Enable per channel,when disabled do not give credits to vDMA */ -#define DRAM_DMA_ENGINE_CONFIG__QSMCENABLE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCENABLE__VAL__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__QSMCENABLE__VAL__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCENABLE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCENABLE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCENABLE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__QSMCENABLE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__QSMCENABLE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMCRESET : val */ -/* Description: Soft reset per channel,when write 1'b1 should clear all internal credits/counter/status. Should be set when channel is disabled,usually with vDMA channel reset (abort). Write 1'b0 should do nothing. Read always return 1'b0. Implemented as external register type. */ -#define DRAM_DMA_ENGINE_CONFIG__QSMCRESET__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCRESET__VAL__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__QSMCRESET__VAL__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCRESET__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCRESET__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCRESET__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__QSMCRESET__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__QSMCRESET__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMCMODE : val */ -/* Description: QSMC mode of operation: 2'd0 - CONT_MODE 2'd1 - reserved. 2'd2 - BURST_MODE 2'd3 - C2C_MODE */ -#define DRAM_DMA_ENGINE_CONFIG__QSMCMODE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMODE__VAL__WIDTH (2) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMODE__VAL__MASK (0x00000003L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMODE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMODE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000003L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMODE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000003L) | (((uint32_t)(value) << 0) & 0x00000003L)) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMODE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000003L) | 0x00000003L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMODE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000003L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMCC2CSEL : val */ -/* Description: Selector for Channel-to-Channel credit input,selects QDMC channel as source for HW available descriptors */ -#define DRAM_DMA_ENGINE_CONFIG__QSMCC2CSEL__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCC2CSEL__VAL__WIDTH (6) -#define DRAM_DMA_ENGINE_CONFIG__QSMCC2CSEL__VAL__MASK (0x0000003FL) -#define DRAM_DMA_ENGINE_CONFIG__QSMCC2CSEL__VAL__RESET (0x0000003FL) -#define DRAM_DMA_ENGINE_CONFIG__QSMCC2CSEL__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000003FL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCC2CSEL__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000003FL) | (((uint32_t)(value) << 0) & 0x0000003FL)) -#define DRAM_DMA_ENGINE_CONFIG__QSMCC2CSEL__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000003FL) | 0x0000003FL) -#define DRAM_DMA_ENGINE_CONFIG__QSMCC2CSEL__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000003FL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMCADDBURSTVAL : val */ -/* Description: Writing to this register increment the available descriptor counter in QSMC by QsmcAddBurstVal descriptors: AvailableDescsCounter += QsmcAddBurstVal. Reading this register should return the current available descriptors counter (AvailableDescsCounter). Implemented as external register type. */ -#define DRAM_DMA_ENGINE_CONFIG__QSMCADDBURSTVAL__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCADDBURSTVAL__VAL__WIDTH (18) -#define DRAM_DMA_ENGINE_CONFIG__QSMCADDBURSTVAL__VAL__MASK (0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMCADDBURSTVAL__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCADDBURSTVAL__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCADDBURSTVAL__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSMCADDBURSTVAL__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMCADDBURSTVAL__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMCMEMBASEADDR : val */ -/* Description: Base address to the CCB in the DDR memory space. aligned to minimum page size of 512B. */ -#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMBASEADDR__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMBASEADDR__VAL__WIDTH (26) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMBASEADDR__VAL__MASK (0x03FFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMBASEADDR__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMBASEADDR__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x03FFFFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMBASEADDR__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x03FFFFFFL) | (((uint32_t)(value) << 0) & 0x03FFFFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMBASEADDR__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x03FFFFFFL) | 0x03FFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMBASEADDR__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x03FFFFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMCMEMCCBSIZE : val */ -/* Description: The CCB size minus one in page size granularity. 0 - 1 desc 1 - 2 desc ... N_CCB_MAX_DESC-1 - N_CCB_MAX_DESC desc. */ -#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMCCBSIZE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMCCBSIZE__VAL__WIDTH (18) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMCCBSIZE__VAL__MASK (0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMCCBSIZE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMCCBSIZE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMCCBSIZE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMCCBSIZE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMCCBSIZE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMCPAGESIZE : val */ -/* Description: M2D Memory page size. Valid values are: 0 - 512B,1 - 1KB,2 - 2KB,3 - 4KB,4 - 1536B. */ -#define DRAM_DMA_ENGINE_CONFIG__QSMCPAGESIZE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCPAGESIZE__VAL__WIDTH (3) -#define DRAM_DMA_ENGINE_CONFIG__QSMCPAGESIZE__VAL__MASK (0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCPAGESIZE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCPAGESIZE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000007L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCPAGESIZE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L)) -#define DRAM_DMA_ENGINE_CONFIG__QSMCPAGESIZE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCPAGESIZE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMCSIMPPATTERNNUMPAGES : val */ -/* Description: Number of pages per line (simplified pattern has single line/pattern). */ -#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNNUMPAGES__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNNUMPAGES__VAL__WIDTH (18) -#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNNUMPAGES__VAL__MASK (0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNNUMPAGES__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNNUMPAGES__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNNUMPAGES__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNNUMPAGES__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNNUMPAGES__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMCSIMPPATTERNRESIDUEPAGESIZE : val */ -/* Description: Residue page size in 8B granularity,minus one,per pattern. 0-8B,1-16B,...,511-4kB */ -#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNRESIDUEPAGESIZE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNRESIDUEPAGESIZE__VAL__WIDTH (9) -#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNRESIDUEPAGESIZE__VAL__MASK (0x000001FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNRESIDUEPAGESIZE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNRESIDUEPAGESIZE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000001FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNRESIDUEPAGESIZE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000001FFL) | (((uint32_t)(value) << 0) & 0x000001FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNRESIDUEPAGESIZE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000001FFL) | 0x000001FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNRESIDUEPAGESIZE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000001FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMCBANKINTERLEAVEMODE : val */ -/* Description: Select the bank interleave mode: 2'd0 - interleave 8 banks (default),2'd1 - Interleave 4 banks,2'd2 - Interleave 2 banks,2'd3 - no interleave. */ -#define DRAM_DMA_ENGINE_CONFIG__QSMCBANKINTERLEAVEMODE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCBANKINTERLEAVEMODE__VAL__WIDTH (2) -#define DRAM_DMA_ENGINE_CONFIG__QSMCBANKINTERLEAVEMODE__VAL__MASK (0x00000003L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCBANKINTERLEAVEMODE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCBANKINTERLEAVEMODE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000003L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCBANKINTERLEAVEMODE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000003L) | (((uint32_t)(value) << 0) & 0x00000003L)) -#define DRAM_DMA_ENGINE_CONFIG__QSMCBANKINTERLEAVEMODE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000003L) | 0x00000003L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCBANKINTERLEAVEMODE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000003L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMCDESCPERIPHINTERRUPT : val */ -/* Description: When > 0 the QSMC will interrupt the peripheral every read QsmcDescPeriphInterrupt descriptors. */ -#define DRAM_DMA_ENGINE_CONFIG__QSMCDESCPERIPHINTERRUPT__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCDESCPERIPHINTERRUPT__VAL__WIDTH (18) -#define DRAM_DMA_ENGINE_CONFIG__QSMCDESCPERIPHINTERRUPT__VAL__MASK (0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMCDESCPERIPHINTERRUPT__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCDESCPERIPHINTERRUPT__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCDESCPERIPHINTERRUPT__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSMCDESCPERIPHINTERRUPT__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMCDESCPERIPHINTERRUPT__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0003FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMCCCBFREEINDEX : val */ -/* Description: Used by the peripheral to indicates how many data is ready in the CCB for write (process). This is the CcbIndex (free pointer in CCB). */ -#define DRAM_DMA_ENGINE_CONFIG__QSMCCCBFREEINDEX__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMCCCBFREEINDEX__VAL__WIDTH (18) -#define DRAM_DMA_ENGINE_CONFIG__QSMCCCBFREEINDEX__VAL__MASK (0x0003FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMCCCBFREEINDEX__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMCCCBFREEINDEX__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_CS_INTR_MASK : val */ -/* Description: INT register bits[15:0] per M2D channel,indicating one of the following events: a. Internal desc - QSMC processed last CCB descriptor. Implemented by set the interrupt when CCB-free-index is wrapped (become zero),might be used for CONF channel - to indicates conf is done. bits[31:16] per D2M channel indicating one of the following events: Internal desc - QDMC processed descriptors per QdmcDescCsInterrupt (OR) External desc - domain#0 (local) source/destination event. */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_MASK__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_MASK__VAL__WIDTH (32) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_MASK__VAL__MASK (0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_MASK__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_MASK__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_MASK__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | (((uint32_t)(value) << 0) & 0xFFFFFFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_MASK__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | 0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_MASK__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_CS_INTR_STATUS : val */ -/* Description: INT register bits[15:0] per M2D channel,indicating one of the following events: a. Internal desc - QSMC processed last CCB descriptor. Implemented by set the interrupt when CCB-free-index is wrapped (become zero),might be used for CONF channel - to indicates conf is done. bits[31:16] per D2M channel indicating one of the following events: Internal desc - QDMC processed descriptors per QdmcDescCsInterrupt (OR) External desc - domain#0 (local) source/destination event. */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_STATUS__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_STATUS__VAL__WIDTH (32) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_STATUS__VAL__MASK (0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_STATUS__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_STATUS__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_CS_INTR_W1C : val */ -/* Description: INT register bits[15:0] per M2D channel,indicating one of the following events: a. Internal desc - QSMC processed last CCB descriptor. Implemented by set the interrupt when CCB-free-index is wrapped (become zero),might be used for CONF channel - to indicates conf is done. bits[31:16] per D2M channel indicating one of the following events: Internal desc - QDMC processed descriptors per QdmcDescCsInterrupt (OR) External desc - domain#0 (local) source/destination event. */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1C__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1C__VAL__WIDTH (32) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1C__VAL__MASK (0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1C__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1C__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1C__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | (((uint32_t)(value) << 0) & 0xFFFFFFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1C__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | 0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1C__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_CS_INTR_W1S : val */ -/* Description: INT register bits[15:0] per M2D channel,indicating one of the following events: a. Internal desc - QSMC processed last CCB descriptor. Implemented by set the interrupt when CCB-free-index is wrapped (become zero),might be used for CONF channel - to indicates conf is done. bits[31:16] per D2M channel indicating one of the following events: Internal desc - QDMC processed descriptors per QdmcDescCsInterrupt (OR) External desc - domain#0 (local) source/destination event. */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1S__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1S__VAL__WIDTH (32) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1S__VAL__MASK (0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1S__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1S__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1S__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | (((uint32_t)(value) << 0) & 0xFFFFFFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1S__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | 0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1S__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_AP_INTR_MASK : val */ -/* Description: INT register bit per direction/channel indicating one of the following events: Internal desc - QDMC processed descriptors per QdmcDescPeriphInterrupt (D2M enhanced channels only) (OR) Internal desc - QSMC processed descriptors per QsmcDescPeriphInterrupt (M2D enhanced channels only) (OR) External desc - domain#1 (host) source/destination event */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_MASK__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_MASK__VAL__WIDTH (32) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_MASK__VAL__MASK (0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_MASK__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_MASK__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_MASK__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | (((uint32_t)(value) << 0) & 0xFFFFFFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_MASK__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | 0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_MASK__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_AP_INTR_STATUS : val */ -/* Description: INT register bit per direction/channel indicating one of the following events: Internal desc - QDMC processed descriptors per QdmcDescPeriphInterrupt (D2M enhanced channels only) (OR) Internal desc - QSMC processed descriptors per QsmcDescPeriphInterrupt (M2D enhanced channels only) (OR) External desc - domain#1 (host) source/destination event */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_STATUS__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_STATUS__VAL__WIDTH (32) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_STATUS__VAL__MASK (0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_STATUS__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_STATUS__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_AP_INTR_W1C : val */ -/* Description: INT register bit per direction/channel indicating one of the following events: Internal desc - QDMC processed descriptors per QdmcDescPeriphInterrupt (D2M enhanced channels only) (OR) Internal desc - QSMC processed descriptors per QsmcDescPeriphInterrupt (M2D enhanced channels only) (OR) External desc - domain#1 (host) source/destination event */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1C__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1C__VAL__WIDTH (32) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1C__VAL__MASK (0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1C__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1C__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1C__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | (((uint32_t)(value) << 0) & 0xFFFFFFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1C__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | 0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1C__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_AP_INTR_W1S : val */ -/* Description: INT register bit per direction/channel indicating one of the following events: Internal desc - QDMC processed descriptors per QdmcDescPeriphInterrupt (D2M enhanced channels only) (OR) Internal desc - QSMC processed descriptors per QsmcDescPeriphInterrupt (M2D enhanced channels only) (OR) External desc - domain#1 (host) source/destination event */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1S__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1S__VAL__WIDTH (32) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1S__VAL__MASK (0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1S__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1S__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1S__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | (((uint32_t)(value) << 0) & 0xFFFFFFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1S__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | 0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1S__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_DSP_INTR_MASK : val */ -/* Description: INT register */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_MASK__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_MASK__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_MASK__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_MASK__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_MASK__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_MASK__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_MASK__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_MASK__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_DSP_INTR_STATUS : val */ -/* Description: INT register */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_STATUS__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_STATUS__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_STATUS__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_STATUS__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_STATUS__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_DSP_INTR_W1C : val */ -/* Description: INT register */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1C__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1C__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1C__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1C__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1C__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1C__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1C__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1C__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_DSP_INTR_W1S : val */ -/* Description: INT register */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1S__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1S__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1S__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1S__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1S__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1S__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1S__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1S__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_ERR_INTR_MASK : desc_err */ -/* Description: Summary of desc_err_intr register. */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__DESC_ERR__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__DESC_ERR__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__DESC_ERR__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__DESC_ERR__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__DESC_ERR__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__DESC_ERR__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__DESC_ERR__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__DESC_ERR__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/* ENGINE_ERR_INTR_MASK : qddc_crd_ovf_err */ -/* Description: Summary of qddc_crd_ovf_err_intr register. */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QDDC_CRD_OVF_ERR__SHIFT (1) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QDDC_CRD_OVF_ERR__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QDDC_CRD_OVF_ERR__MASK (0x00000002L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QDDC_CRD_OVF_ERR__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QDDC_CRD_OVF_ERR__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000002L) >> 1) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QDDC_CRD_OVF_ERR__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | (((uint32_t)(value) << 1) & 0x00000002L)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QDDC_CRD_OVF_ERR__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(1) << 1)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QDDC_CRD_OVF_ERR__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(0) << 1)) - -/* ENGINE_ERR_INTR_MASK : qsdc_crd_ovf_err */ -/* Description: Summary of qsdc_crd_ovf_err_intr register. */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QSDC_CRD_OVF_ERR__SHIFT (2) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QSDC_CRD_OVF_ERR__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QSDC_CRD_OVF_ERR__MASK (0x00000004L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QSDC_CRD_OVF_ERR__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QSDC_CRD_OVF_ERR__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000004L) >> 2) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QSDC_CRD_OVF_ERR__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000004L) | (((uint32_t)(value) << 2) & 0x00000004L)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QSDC_CRD_OVF_ERR__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000004L) | ((uint32_t)(1) << 2)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QSDC_CRD_OVF_ERR__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000004L) | ((uint32_t)(0) << 2)) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_ERR_INTR_STATUS : desc_err */ -/* Description: Summary of desc_err_intr register. */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__DESC_ERR__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__DESC_ERR__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__DESC_ERR__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__DESC_ERR__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__DESC_ERR__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) - -/* ENGINE_ERR_INTR_STATUS : qddc_crd_ovf_err */ -/* Description: Summary of qddc_crd_ovf_err_intr register. */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QDDC_CRD_OVF_ERR__SHIFT (1) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QDDC_CRD_OVF_ERR__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QDDC_CRD_OVF_ERR__MASK (0x00000002L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QDDC_CRD_OVF_ERR__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QDDC_CRD_OVF_ERR__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000002L) >> 1) - -/* ENGINE_ERR_INTR_STATUS : qsdc_crd_ovf_err */ -/* Description: Summary of qsdc_crd_ovf_err_intr register. */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QSDC_CRD_OVF_ERR__SHIFT (2) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QSDC_CRD_OVF_ERR__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QSDC_CRD_OVF_ERR__MASK (0x00000004L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QSDC_CRD_OVF_ERR__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QSDC_CRD_OVF_ERR__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000004L) >> 2) - -/*----------------------------------------------------------------------------------------------------*/ -/* DESC_ERR_INTR_MASK : DescStatus */ -/* Description: Interrupt bit per DESC_STATUS fields of vDMA descriptor which returned unexpected value (Note that successful descriptor returns status of 8'h1). Refer to EngErrInterruptSource register for the error origin. */ -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DESCSTATUS__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DESCSTATUS__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DESCSTATUS__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DESCSTATUS__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DESCSTATUS__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DESCSTATUS__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DESCSTATUS__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DESCSTATUS__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/* DESC_ERR_INTR_MASK : RemainPageSize */ -/* Description: non-zero REMAINING_PAGE_SIZE. Refer to EngErrInterruptSource register for the error origin. Refer to EngErrRemainPageSize register for the returned value. */ -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__REMAINPAGESIZE__SHIFT (8) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__REMAINPAGESIZE__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__REMAINPAGESIZE__MASK (0x00000100L) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__REMAINPAGESIZE__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__REMAINPAGESIZE__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000100L) >> 8) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__REMAINPAGESIZE__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000100L) | (((uint32_t)(value) << 8) & 0x00000100L)) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__REMAINPAGESIZE__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000100L) | ((uint32_t)(1) << 8)) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__REMAINPAGESIZE__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000100L) | ((uint32_t)(0) << 8)) - -/* DESC_ERR_INTR_MASK : SrcDescWdataPar */ -/* Description: Source descriptor complete with error status. Refer to EngErrInterruptSource register for the error origin. */ -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__SRCDESCWDATAPAR__SHIFT (9) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__SRCDESCWDATAPAR__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__SRCDESCWDATAPAR__MASK (0x00000200L) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__SRCDESCWDATAPAR__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__SRCDESCWDATAPAR__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000200L) >> 9) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__SRCDESCWDATAPAR__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000200L) | (((uint32_t)(value) << 9) & 0x00000200L)) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__SRCDESCWDATAPAR__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000200L) | ((uint32_t)(1) << 9)) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__SRCDESCWDATAPAR__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000200L) | ((uint32_t)(0) << 9)) - -/* DESC_ERR_INTR_MASK : DstDescWdataPar */ -/* Description: Destination descriptor complete with error status. Refer to EngErrInterruptSource register for the error origin. */ -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DSTDESCWDATAPAR__SHIFT (10) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DSTDESCWDATAPAR__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DSTDESCWDATAPAR__MASK (0x00000400L) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DSTDESCWDATAPAR__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DSTDESCWDATAPAR__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000400L) >> 10) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DSTDESCWDATAPAR__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000400L) | (((uint32_t)(value) << 10) & 0x00000400L)) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DSTDESCWDATAPAR__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000400L) | ((uint32_t)(1) << 10)) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DSTDESCWDATAPAR__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000400L) | ((uint32_t)(0) << 10)) - -/*----------------------------------------------------------------------------------------------------*/ -/* DESC_ERR_INTR_STATUS : DescStatus */ -/* Description: Interrupt bit per DESC_STATUS fields of vDMA descriptor which returned unexpected value (Note that successful descriptor returns status of 8'h1). Refer to EngErrInterruptSource register for the error origin. */ -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DESCSTATUS__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DESCSTATUS__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DESCSTATUS__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DESCSTATUS__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DESCSTATUS__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) - -/* DESC_ERR_INTR_STATUS : RemainPageSize */ -/* Description: non-zero REMAINING_PAGE_SIZE. Refer to EngErrInterruptSource register for the error origin. Refer to EngErrRemainPageSize register for the returned value. */ -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__REMAINPAGESIZE__SHIFT (8) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__REMAINPAGESIZE__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__REMAINPAGESIZE__MASK (0x00000100L) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__REMAINPAGESIZE__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__REMAINPAGESIZE__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000100L) >> 8) - -/* DESC_ERR_INTR_STATUS : SrcDescWdataPar */ -/* Description: Source descriptor complete with error status. Refer to EngErrInterruptSource register for the error origin. */ -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__SRCDESCWDATAPAR__SHIFT (9) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__SRCDESCWDATAPAR__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__SRCDESCWDATAPAR__MASK (0x00000200L) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__SRCDESCWDATAPAR__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__SRCDESCWDATAPAR__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000200L) >> 9) - -/* DESC_ERR_INTR_STATUS : DstDescWdataPar */ -/* Description: Destination descriptor complete with error status. Refer to EngErrInterruptSource register for the error origin. */ -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DSTDESCWDATAPAR__SHIFT (10) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DSTDESCWDATAPAR__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DSTDESCWDATAPAR__MASK (0x00000400L) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DSTDESCWDATAPAR__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DSTDESCWDATAPAR__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000400L) >> 10) - -/*----------------------------------------------------------------------------------------------------*/ -/* DESC_ERR_INTR_W1C : DescStatus */ -/* Description: Interrupt bit per DESC_STATUS fields of vDMA descriptor which returned unexpected value (Note that successful descriptor returns status of 8'h1). Refer to EngErrInterruptSource register for the error origin. */ -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1C__DESCSTATUS__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1C__DESCSTATUS__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1C__DESCSTATUS__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1C__DESCSTATUS__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1C__DESCSTATUS__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1C__DESCSTATUS__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1C__DESCSTATUS__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1C__DESCSTATUS__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* DESC_ERR_INTR_W1S : DescStatus */ -/* Description: Interrupt bit per DESC_STATUS fields of vDMA descriptor which returned unexpected value (Note that successful descriptor returns status of 8'h1). Refer to EngErrInterruptSource register for the error origin. */ -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1S__DESCSTATUS__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1S__DESCSTATUS__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1S__DESCSTATUS__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1S__DESCSTATUS__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1S__DESCSTATUS__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1S__DESCSTATUS__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1S__DESCSTATUS__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1S__DESCSTATUS__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDC_CRD_OVF_ERR_INTR_MASK : ch */ -/* Description: Interrupt bit per QDDC channel indicating overflow or underflow in Core credit counter. */ -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_MASK__CH__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_MASK__CH__WIDTH (16) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_MASK__CH__MASK (0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_MASK__CH__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_MASK__CH__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_MASK__CH__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_MASK__CH__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_MASK__CH__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDC_CRD_OVF_ERR_INTR_STATUS : ch */ -/* Description: Interrupt bit per QDDC channel indicating overflow or underflow in Core credit counter. */ -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_STATUS__CH__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_STATUS__CH__WIDTH (16) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_STATUS__CH__MASK (0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_STATUS__CH__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_STATUS__CH__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDC_CRD_OVF_ERR_INTR_W1C : ch */ -/* Description: Interrupt bit per QDDC channel indicating overflow or underflow in Core credit counter. */ -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1C__CH__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1C__CH__WIDTH (16) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1C__CH__MASK (0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1C__CH__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1C__CH__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1C__CH__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1C__CH__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1C__CH__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDC_CRD_OVF_ERR_INTR_W1S : ch */ -/* Description: Interrupt bit per QDDC channel indicating overflow or underflow in Core credit counter. */ -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1S__CH__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1S__CH__WIDTH (16) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1S__CH__MASK (0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1S__CH__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1S__CH__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1S__CH__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1S__CH__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1S__CH__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDC_CRD_OVF_ERR_INTR_MASK : ch */ -/* Description: Interrupt bit per QSDC channel indicating overflow or underflow in Core credit counter. */ -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_MASK__CH__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_MASK__CH__WIDTH (16) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_MASK__CH__MASK (0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_MASK__CH__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_MASK__CH__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_MASK__CH__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_MASK__CH__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_MASK__CH__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDC_CRD_OVF_ERR_INTR_STATUS : ch */ -/* Description: Interrupt bit per QSDC channel indicating overflow or underflow in Core credit counter. */ -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_STATUS__CH__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_STATUS__CH__WIDTH (16) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_STATUS__CH__MASK (0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_STATUS__CH__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_STATUS__CH__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDC_CRD_OVF_ERR_INTR_W1C : ch */ -/* Description: Interrupt bit per QSDC channel indicating overflow or underflow in Core credit counter. */ -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1C__CH__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1C__CH__WIDTH (16) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1C__CH__MASK (0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1C__CH__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1C__CH__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1C__CH__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1C__CH__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1C__CH__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDC_CRD_OVF_ERR_INTR_W1S : ch */ -/* Description: Interrupt bit per QSDC channel indicating overflow or underflow in Core credit counter. */ -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1S__CH__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1S__CH__WIDTH (16) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1S__CH__MASK (0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1S__CH__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1S__CH__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1S__CH__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1S__CH__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1S__CH__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGERRINTERRUPTSOURCE : ChannelID */ -#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__CHANNELID__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__CHANNELID__WIDTH (4) -#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__CHANNELID__MASK (0x0000000FL) -#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__CHANNELID__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__CHANNELID__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000000FL) >> 0) - -/* ENGERRINTERRUPTSOURCE : Direction */ -/* Description: 0 - Destination. 1 - Source. */ -#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DIRECTION__SHIFT (4) -#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DIRECTION__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DIRECTION__MASK (0x00000010L) -#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DIRECTION__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DIRECTION__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000010L) >> 4) - -/* ENGERRINTERRUPTSOURCE : Domain */ -/* Description: 0 - Device. 1 - Memory. */ -#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DOMAIN__SHIFT (5) -#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DOMAIN__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DOMAIN__MASK (0x00000020L) -#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DOMAIN__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DOMAIN__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000020L) >> 5) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGERRREMAINPAGESIZE : val */ -/* Description: In case of non-zero REMAINING_PAGE_SIZE this register holds the latched value until cleared by writing to this register */ -#define DRAM_DMA_ENGINE_CONFIG__ENGERRREMAINPAGESIZE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGERRREMAINPAGESIZE__VAL__WIDTH (24) -#define DRAM_DMA_ENGINE_CONFIG__ENGERRREMAINPAGESIZE__VAL__MASK (0x00FFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGERRREMAINPAGESIZE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGERRREMAINPAGESIZE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00FFFFFFL) >> 0) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGTRANSFERPAGESIZE : size */ -/* Description: TRANSFERRED_PAGE_SIZE value of last descriptor write to QDMC */ -#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__SIZE__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__SIZE__WIDTH (24) -#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__SIZE__MASK (0x00FFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__SIZE__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__SIZE__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00FFFFFFL) >> 0) - -/* ENGTRANSFERPAGESIZE : ch_id */ -/* Description: QDMC Channel ID */ -#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__CH_ID__SHIFT (24) -#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__CH_ID__WIDTH (4) -#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__CH_ID__MASK (0x0F000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__CH_ID__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__CH_ID__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0F000000L) >> 24) - -/*----------------------------------------------------------------------------------------------------*/ -/* VDMASOFTRESET : val */ -/* Description: Apply soft reset to vDMA. Must be cleared in order to release vDMA from soft reset. */ -#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__VAL__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__VAL__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/* VDMASOFTRESET : par */ -#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__PAR__SHIFT (31) -#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__PAR__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__PAR__MASK (0x80000000L) -#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__PAR__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__PAR__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x80000000L) >> 31) -#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__PAR__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x80000000L) | (((uint32_t)(value) << 31) & 0x80000000L)) -#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__PAR__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x80000000L) | ((uint32_t)(1) << 31)) -#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__PAR__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x80000000L) | ((uint32_t)(0) << 31)) - -/*----------------------------------------------------------------------------------------------------*/ -/* VDMA_SHAREDBUS : cs_mask */ -/* Description: Bit mask on vDMA Sharedbus interrupt source for CS */ -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__CS_MASK__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__CS_MASK__WIDTH (4) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__CS_MASK__MASK (0x0000000FL) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__CS_MASK__RESET (0x0000000AL) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__CS_MASK__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000000FL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__CS_MASK__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000000FL) | (((uint32_t)(value) << 0) & 0x0000000FL)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__CS_MASK__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000000FL) | 0x0000000FL) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__CS_MASK__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000000FL)) - -/* VDMA_SHAREDBUS : ap_mask */ -/* Description: Bit mask on vDMA Sharedbus interrupt source for AP */ -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__AP_MASK__SHIFT (4) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__AP_MASK__WIDTH (4) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__AP_MASK__MASK (0x000000F0L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__AP_MASK__RESET (0x00000050L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__AP_MASK__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000F0L) >> 4) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__AP_MASK__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000F0L) | (((uint32_t)(value) << 4) & 0x000000F0L)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__AP_MASK__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000F0L) | 0x000000F0L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__AP_MASK__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000F0L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* CFG_QDDC_REDUNDANT_EN : val */ -/* Description: Redundancy mode enable bit per QM pair. bit i makes QM[i*2+1] a redundancy for QM[i*2] */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_QDDC_REDUNDANT_EN__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QDDC_REDUNDANT_EN__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QDDC_REDUNDANT_EN__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QDDC_REDUNDANT_EN__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QDDC_REDUNDANT_EN__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QDDC_REDUNDANT_EN__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QDDC_REDUNDANT_EN__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QDDC_REDUNDANT_EN__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* CFG_QSDC_REDUNDANT_EN : val */ -/* Description: Redundancy mode enable bit per QM pair. bit i makes QM[i*2+1] a redundancy for QM[i*2] */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_QSDC_REDUNDANT_EN__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QSDC_REDUNDANT_EN__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QSDC_REDUNDANT_EN__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QSDC_REDUNDANT_EN__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QSDC_REDUNDANT_EN__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QSDC_REDUNDANT_EN__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QSDC_REDUNDANT_EN__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QSDC_REDUNDANT_EN__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* CFG_QDMC_REDUNDANT_EN : val */ -/* Description: Redundancy mode enable bit per QM pair. bit i makes QM[i*2+1] a redundancy for QM[i*2] */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_QDMC_REDUNDANT_EN__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QDMC_REDUNDANT_EN__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QDMC_REDUNDANT_EN__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QDMC_REDUNDANT_EN__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QDMC_REDUNDANT_EN__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QDMC_REDUNDANT_EN__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QDMC_REDUNDANT_EN__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QDMC_REDUNDANT_EN__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* CFG_QSMC_REDUNDANT_EN : val */ -/* Description: Redundancy mode enable bit per QM pair. bit i makes QM[i*2+1] a redundancy for QM[i*2] */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_QSMC_REDUNDANT_EN__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QSMC_REDUNDANT_EN__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QSMC_REDUNDANT_EN__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QSMC_REDUNDANT_EN__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QSMC_REDUNDANT_EN__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QSMC_REDUNDANT_EN__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QSMC_REDUNDANT_EN__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__CFG_QSMC_REDUNDANT_EN__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDC_REDUNDANT_ASF_INT_MASK : val */ -/* Description: Redundancy mode compare mismatch for QM pair i */ -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_MASK__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_MASK__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_MASK__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_MASK__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_MASK__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_MASK__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_MASK__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_MASK__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDC_REDUNDANT_ASF_INT_STATUS : val */ -/* Description: Redundancy mode compare mismatch for QM pair i */ -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_STATUS__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_STATUS__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_STATUS__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_STATUS__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_STATUS__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDC_REDUNDANT_ASF_INT_W1C : val */ -/* Description: Redundancy mode compare mismatch for QM pair i */ -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1C__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1C__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1C__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1C__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1C__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1C__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1C__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1C__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDC_REDUNDANT_ASF_INT_W1S : val */ -/* Description: Redundancy mode compare mismatch for QM pair i */ -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1S__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1S__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1S__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1S__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1S__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1S__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1S__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1S__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDC_REDUNDANT_ASF_INT_MASK : val */ -/* Description: Redundancy mode compare mismatch for QM pair i */ -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_MASK__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_MASK__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_MASK__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_MASK__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_MASK__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_MASK__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_MASK__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_MASK__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDC_REDUNDANT_ASF_INT_STATUS : val */ -/* Description: Redundancy mode compare mismatch for QM pair i */ -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_STATUS__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_STATUS__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_STATUS__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_STATUS__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_STATUS__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDC_REDUNDANT_ASF_INT_W1C : val */ -/* Description: Redundancy mode compare mismatch for QM pair i */ -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1C__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1C__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1C__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1C__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1C__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1C__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1C__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1C__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDC_REDUNDANT_ASF_INT_W1S : val */ -/* Description: Redundancy mode compare mismatch for QM pair i */ -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1S__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1S__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1S__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1S__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1S__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1S__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1S__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1S__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDMC_REDUNDANT_ASF_INT_MASK : val */ -/* Description: Redundancy mode compare mismatch for QM pair i */ -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_MASK__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_MASK__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_MASK__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_MASK__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_MASK__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_MASK__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_MASK__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_MASK__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDMC_REDUNDANT_ASF_INT_STATUS : val */ -/* Description: Redundancy mode compare mismatch for QM pair i */ -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_STATUS__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_STATUS__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_STATUS__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_STATUS__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_STATUS__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDMC_REDUNDANT_ASF_INT_W1C : val */ -/* Description: Redundancy mode compare mismatch for QM pair i */ -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1C__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1C__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1C__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1C__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1C__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1C__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1C__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1C__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDMC_REDUNDANT_ASF_INT_W1S : val */ -/* Description: Redundancy mode compare mismatch for QM pair i */ -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1S__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1S__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1S__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1S__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1S__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1S__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1S__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1S__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMC_REDUNDANT_ASF_INT_MASK : val */ -/* Description: Redundancy mode compare mismatch for QM pair i */ -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_MASK__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_MASK__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_MASK__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_MASK__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_MASK__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_MASK__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_MASK__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_MASK__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMC_REDUNDANT_ASF_INT_STATUS : val */ -/* Description: Redundancy mode compare mismatch for QM pair i */ -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_STATUS__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_STATUS__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_STATUS__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_STATUS__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_STATUS__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMC_REDUNDANT_ASF_INT_W1C : val */ -/* Description: Redundancy mode compare mismatch for QM pair i */ -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1C__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1C__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1C__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1C__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1C__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1C__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1C__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1C__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMC_REDUNDANT_ASF_INT_W1S : val */ -/* Description: Redundancy mode compare mismatch for QM pair i */ -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1S__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1S__VAL__WIDTH (8) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1S__VAL__MASK (0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1S__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1S__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000000FFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1S__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1S__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1S__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000000FFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* PRIOISLP : val */ -/* Description: Indicates channel priority is low priority. */ -#define DRAM_DMA_ENGINE_CONFIG__PRIOISLP__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__PRIOISLP__VAL__WIDTH (32) -#define DRAM_DMA_ENGINE_CONFIG__PRIOISLP__VAL__MASK (0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__PRIOISLP__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__PRIOISLP__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__PRIOISLP__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | (((uint32_t)(value) << 0) & 0xFFFFFFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__PRIOISLP__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | 0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__PRIOISLP__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFFFFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* READLPTOQOSVALUE : val */ -/* Description: The QOS toward DDR-AXI master for low priority read. */ -#define DRAM_DMA_ENGINE_CONFIG__READLPTOQOSVALUE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__READLPTOQOSVALUE__VAL__WIDTH (3) -#define DRAM_DMA_ENGINE_CONFIG__READLPTOQOSVALUE__VAL__MASK (0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__READLPTOQOSVALUE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__READLPTOQOSVALUE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000007L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__READLPTOQOSVALUE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L)) -#define DRAM_DMA_ENGINE_CONFIG__READLPTOQOSVALUE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__READLPTOQOSVALUE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* READHPTOQOSVALUE : val */ -/* Description: The QOS toward DDR-AXI master for high priority read. */ -#define DRAM_DMA_ENGINE_CONFIG__READHPTOQOSVALUE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__READHPTOQOSVALUE__VAL__WIDTH (3) -#define DRAM_DMA_ENGINE_CONFIG__READHPTOQOSVALUE__VAL__MASK (0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__READHPTOQOSVALUE__VAL__RESET (0x00000002L) -#define DRAM_DMA_ENGINE_CONFIG__READHPTOQOSVALUE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000007L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__READHPTOQOSVALUE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L)) -#define DRAM_DMA_ENGINE_CONFIG__READHPTOQOSVALUE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__READHPTOQOSVALUE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* WRITELPTOQOSVALUE : val */ -/* Description: The QOS toward DDR-AXI master for low priority write. */ -#define DRAM_DMA_ENGINE_CONFIG__WRITELPTOQOSVALUE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__WRITELPTOQOSVALUE__VAL__WIDTH (3) -#define DRAM_DMA_ENGINE_CONFIG__WRITELPTOQOSVALUE__VAL__MASK (0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__WRITELPTOQOSVALUE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__WRITELPTOQOSVALUE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000007L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__WRITELPTOQOSVALUE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L)) -#define DRAM_DMA_ENGINE_CONFIG__WRITELPTOQOSVALUE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__WRITELPTOQOSVALUE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* WRITEHPTOQOSVALUE : val */ -/* Description: The QOS toward DDR-AXI master for high priority write. */ -#define DRAM_DMA_ENGINE_CONFIG__WRITEHPTOQOSVALUE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__WRITEHPTOQOSVALUE__VAL__WIDTH (3) -#define DRAM_DMA_ENGINE_CONFIG__WRITEHPTOQOSVALUE__VAL__MASK (0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__WRITEHPTOQOSVALUE__VAL__RESET (0x00000002L) -#define DRAM_DMA_ENGINE_CONFIG__WRITEHPTOQOSVALUE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000007L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__WRITEHPTOQOSVALUE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L)) -#define DRAM_DMA_ENGINE_CONFIG__WRITEHPTOQOSVALUE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__WRITEHPTOQOSVALUE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* DESCREADQOSVALUE : val */ -/* Description: The QOS toward DDR-desc-AXI master for read. */ -#define DRAM_DMA_ENGINE_CONFIG__DESCREADQOSVALUE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__DESCREADQOSVALUE__VAL__WIDTH (3) -#define DRAM_DMA_ENGINE_CONFIG__DESCREADQOSVALUE__VAL__MASK (0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__DESCREADQOSVALUE__VAL__RESET (0x00000002L) -#define DRAM_DMA_ENGINE_CONFIG__DESCREADQOSVALUE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000007L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__DESCREADQOSVALUE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L)) -#define DRAM_DMA_ENGINE_CONFIG__DESCREADQOSVALUE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__DESCREADQOSVALUE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* DESCWRITEQOSVALUE : val */ -/* Description: The QOS toward DDR-desc-AXI master for write. */ -#define DRAM_DMA_ENGINE_CONFIG__DESCWRITEQOSVALUE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__DESCWRITEQOSVALUE__VAL__WIDTH (3) -#define DRAM_DMA_ENGINE_CONFIG__DESCWRITEQOSVALUE__VAL__MASK (0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__DESCWRITEQOSVALUE__VAL__RESET (0x00000002L) -#define DRAM_DMA_ENGINE_CONFIG__DESCWRITEQOSVALUE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000007L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__DESCWRITEQOSVALUE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L)) -#define DRAM_DMA_ENGINE_CONFIG__DESCWRITEQOSVALUE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__DESCWRITEQOSVALUE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000007L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* VDMA_ARB : prio_en */ -/* Description: Enable 2 level priority based channel arbitration in vDMA */ -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PRIO_EN__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PRIO_EN__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PRIO_EN__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PRIO_EN__RESET (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PRIO_EN__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PRIO_EN__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PRIO_EN__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PRIO_EN__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/* VDMA_ARB : interleave_en */ -/* Description: Enable arbitration order to interleave between M2D and D2M channels */ -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__INTERLEAVE_EN__SHIFT (1) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__INTERLEAVE_EN__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__INTERLEAVE_EN__MASK (0x00000002L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__INTERLEAVE_EN__RESET (0x00000002L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__INTERLEAVE_EN__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000002L) >> 1) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__INTERLEAVE_EN__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | (((uint32_t)(value) << 1) & 0x00000002L)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__INTERLEAVE_EN__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(1) << 1)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__INTERLEAVE_EN__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(0) << 1)) - -/* VDMA_ARB : par */ -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PAR__SHIFT (31) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PAR__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PAR__MASK (0x80000000L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PAR__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PAR__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x80000000L) >> 31) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PAR__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x80000000L) | (((uint32_t)(value) << 31) & 0x80000000L)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PAR__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x80000000L) | ((uint32_t)(1) << 31)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PAR__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x80000000L) | ((uint32_t)(0) << 31)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QM_CFG_CG_DELAY : val */ -/* Description: Clock cycles to keep clock running after enable condition is met */ -#define DRAM_DMA_ENGINE_CONFIG__QM_CFG_CG_DELAY__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QM_CFG_CG_DELAY__VAL__WIDTH (4) -#define DRAM_DMA_ENGINE_CONFIG__QM_CFG_CG_DELAY__VAL__MASK (0x0000000FL) -#define DRAM_DMA_ENGINE_CONFIG__QM_CFG_CG_DELAY__VAL__RESET (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__QM_CFG_CG_DELAY__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000000FL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QM_CFG_CG_DELAY__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000000FL) | (((uint32_t)(value) << 0) & 0x0000000FL)) -#define DRAM_DMA_ENGINE_CONFIG__QM_CFG_CG_DELAY__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000000FL) | 0x0000000FL) -#define DRAM_DMA_ENGINE_CONFIG__QM_CFG_CG_DELAY__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000000FL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDDC_CFG_CG_BYPASS : val */ -/* Description: Bypass QDDC CG */ -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CFG_CG_BYPASS__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CFG_CG_BYPASS__VAL__WIDTH (16) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CFG_CG_BYPASS__VAL__MASK (0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CFG_CG_BYPASS__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CFG_CG_BYPASS__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CFG_CG_BYPASS__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CFG_CG_BYPASS__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDDC_CFG_CG_BYPASS__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSDC_CFG_CG_BYPASS : val */ -/* Description: Bypass QSDC CG */ -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CFG_CG_BYPASS__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CFG_CG_BYPASS__VAL__WIDTH (16) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CFG_CG_BYPASS__VAL__MASK (0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CFG_CG_BYPASS__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CFG_CG_BYPASS__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CFG_CG_BYPASS__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CFG_CG_BYPASS__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSDC_CFG_CG_BYPASS__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QDMC_CFG_CG_BYPASS : val */ -/* Description: Bypass QDMC CG */ -#define DRAM_DMA_ENGINE_CONFIG__QDMC_CFG_CG_BYPASS__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_CFG_CG_BYPASS__VAL__WIDTH (16) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_CFG_CG_BYPASS__VAL__MASK (0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_CFG_CG_BYPASS__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_CFG_CG_BYPASS__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_CFG_CG_BYPASS__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_CFG_CG_BYPASS__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QDMC_CFG_CG_BYPASS__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* QSMC_CFG_CG_BYPASS : val */ -/* Description: Bypass QSMC CG */ -#define DRAM_DMA_ENGINE_CONFIG__QSMC_CFG_CG_BYPASS__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_CFG_CG_BYPASS__VAL__WIDTH (16) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_CFG_CG_BYPASS__VAL__MASK (0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_CFG_CG_BYPASS__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_CFG_CG_BYPASS__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_CFG_CG_BYPASS__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_CFG_CG_BYPASS__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL) -#define DRAM_DMA_ENGINE_CONFIG__QSMC_CFG_CG_BYPASS__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000FFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_ASF_INT_MASK : parity_error_in_regfile */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_MASK__PARITY_ERROR_IN_REGFILE__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_MASK__PARITY_ERROR_IN_REGFILE__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_MASK__PARITY_ERROR_IN_REGFILE__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_MASK__PARITY_ERROR_IN_REGFILE__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_MASK__PARITY_ERROR_IN_REGFILE__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_MASK__PARITY_ERROR_IN_REGFILE__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_MASK__PARITY_ERROR_IN_REGFILE__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_MASK__PARITY_ERROR_IN_REGFILE__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_ASF_INT_STATUS : parity_error_in_regfile */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_STATUS__PARITY_ERROR_IN_REGFILE__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_STATUS__PARITY_ERROR_IN_REGFILE__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_STATUS__PARITY_ERROR_IN_REGFILE__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_STATUS__PARITY_ERROR_IN_REGFILE__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_STATUS__PARITY_ERROR_IN_REGFILE__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_ASF_INT_W1C : parity_error_in_regfile */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1C__PARITY_ERROR_IN_REGFILE__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1C__PARITY_ERROR_IN_REGFILE__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1C__PARITY_ERROR_IN_REGFILE__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1C__PARITY_ERROR_IN_REGFILE__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1C__PARITY_ERROR_IN_REGFILE__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1C__PARITY_ERROR_IN_REGFILE__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1C__PARITY_ERROR_IN_REGFILE__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1C__PARITY_ERROR_IN_REGFILE__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_ASF_INT_W1S : parity_error_in_regfile */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1S__PARITY_ERROR_IN_REGFILE__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1S__PARITY_ERROR_IN_REGFILE__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1S__PARITY_ERROR_IN_REGFILE__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1S__PARITY_ERROR_IN_REGFILE__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1S__PARITY_ERROR_IN_REGFILE__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1S__PARITY_ERROR_IN_REGFILE__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1S__PARITY_ERROR_IN_REGFILE__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1S__PARITY_ERROR_IN_REGFILE__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/*----------------------------------------------------------------------------------------------------*/ -/* ENGINE_RW_PARITY_BIST_MODE : val */ -/* Description: write 1 if want to work in rw_parity bist mode in which the parity bit is written by APB wdata and not from HW calculation */ -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_RW_PARITY_BIST_MODE__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_RW_PARITY_BIST_MODE__VAL__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_RW_PARITY_BIST_MODE__VAL__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_RW_PARITY_BIST_MODE__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_RW_PARITY_BIST_MODE__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_RW_PARITY_BIST_MODE__VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_RW_PARITY_BIST_MODE__VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__ENGINE_RW_PARITY_BIST_MODE__VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/*----------------------------------------------------------------------------------------------------*/ -/* VDMA_STOP_LP : dis */ -/* Description: Write 1 if want to disable LP Stop feature */ -#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__DIS__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__DIS__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__DIS__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__DIS__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__DIS__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__DIS__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__DIS__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__DIS__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/* VDMA_STOP_LP : force_val */ -/* Description: Force Stop LP state when feature is enabled */ -#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__FORCE_VAL__SHIFT (1) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__FORCE_VAL__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__FORCE_VAL__MASK (0x00000002L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__FORCE_VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__FORCE_VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000002L) >> 1) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__FORCE_VAL__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | (((uint32_t)(value) << 1) & 0x00000002L)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__FORCE_VAL__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(1) << 1)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__FORCE_VAL__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(0) << 1)) - -/*----------------------------------------------------------------------------------------------------*/ -/* VDMA_SCH : stop_th */ -/* Description: Stop scheduling for this many cycles after each successful allocation */ -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_TH__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_TH__WIDTH (7) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_TH__MASK (0x0000007FL) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_TH__RESET (0x00000007L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_TH__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000007FL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_TH__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000007FL) | (((uint32_t)(value) << 0) & 0x0000007FL)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_TH__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000007FL) | 0x0000007FL) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_TH__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000007FL)) - -/* VDMA_SCH : stop_en */ -/* Description: Enable periodic scheduling stopping mechanism */ -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_EN__SHIFT (7) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_EN__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_EN__MASK (0x00000080L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_EN__RESET (0x00000080L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_EN__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000080L) >> 7) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_EN__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000080L) | (((uint32_t)(value) << 7) & 0x00000080L)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_EN__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000080L) | ((uint32_t)(1) << 7)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_EN__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000080L) | ((uint32_t)(0) << 7)) - -/* VDMA_SCH : tsf24_mode */ -/* Description: Apply fix to increase maximum transfers to 24 */ -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF24_MODE__SHIFT (8) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF24_MODE__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF24_MODE__MASK (0x00000100L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF24_MODE__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF24_MODE__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000100L) >> 8) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF24_MODE__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000100L) | (((uint32_t)(value) << 8) & 0x00000100L)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF24_MODE__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000100L) | ((uint32_t)(1) << 8)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF24_MODE__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000100L) | ((uint32_t)(0) << 8)) - -/* VDMA_SCH : tsf_af_threshold */ -/* Description: Almost Full at 13 allocated TSF (12+8=20). In tsf24_mode should be set to 12. */ -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF_AF_THRESHOLD__SHIFT (9) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF_AF_THRESHOLD__WIDTH (5) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF_AF_THRESHOLD__MASK (0x00003E00L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF_AF_THRESHOLD__RESET (0x00002800L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF_AF_THRESHOLD__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00003E00L) >> 9) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF_AF_THRESHOLD__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00003E00L) | (((uint32_t)(value) << 9) & 0x00003E00L)) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF_AF_THRESHOLD__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00003E00L) | 0x00003E00L) -#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF_AF_THRESHOLD__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00003E00L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* CFG_SRC_DESC_TRACE : en */ -/* Description: Enable tracing of descriptors read from Source QMs */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__EN__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__EN__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__EN__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__EN__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__EN__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__EN__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__EN__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__EN__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/* CFG_SRC_DESC_TRACE : stop_on_wrap */ -/* Description: Stop when reaching end of tracing buffer */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__STOP_ON_WRAP__SHIFT (1) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__STOP_ON_WRAP__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__STOP_ON_WRAP__MASK (0x00000002L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__STOP_ON_WRAP__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__STOP_ON_WRAP__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000002L) >> 1) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__STOP_ON_WRAP__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | (((uint32_t)(value) << 1) & 0x00000002L)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__STOP_ON_WRAP__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(1) << 1)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__STOP_ON_WRAP__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(0) << 1)) - -/* CFG_SRC_DESC_TRACE : mprot */ -/* Description: AWPROT value */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MPROT__SHIFT (2) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MPROT__WIDTH (3) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MPROT__MASK (0x0000001CL) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MPROT__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MPROT__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000001CL) >> 2) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MPROT__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000001CL) | (((uint32_t)(value) << 2) & 0x0000001CL)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MPROT__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000001CL) | 0x0000001CL) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MPROT__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000001CL)) - -/* CFG_SRC_DESC_TRACE : mcache */ -/* Description: AWCACHE value */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MCACHE__SHIFT (5) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MCACHE__WIDTH (4) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MCACHE__MASK (0x000001E0L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MCACHE__RESET (0x00000020L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MCACHE__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000001E0L) >> 5) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MCACHE__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000001E0L) | (((uint32_t)(value) << 5) & 0x000001E0L)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MCACHE__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000001E0L) | 0x000001E0L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MCACHE__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000001E0L)) - -/* CFG_SRC_DESC_TRACE : buff_size_m1 */ -/* Description: Buffer size minus 1 in 16B descriptors */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__BUFF_SIZE_M1__SHIFT (16) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__BUFF_SIZE_M1__WIDTH (16) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__BUFF_SIZE_M1__MASK (0xFFFF0000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__BUFF_SIZE_M1__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__BUFF_SIZE_M1__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0xFFFF0000L) >> 16) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__BUFF_SIZE_M1__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0xFFFF0000L) | (((uint32_t)(value) << 16) & 0xFFFF0000L)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__BUFF_SIZE_M1__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFF0000L) | 0xFFFF0000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__BUFF_SIZE_M1__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFF0000L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* CFG_SRC_DESC_TRACE_BASE_ADDR : base_addr */ -/* Description: Buffer base address bits 34:4 aligned to 16B */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE_BASE_ADDR__BASE_ADDR__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE_BASE_ADDR__BASE_ADDR__WIDTH (31) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE_BASE_ADDR__BASE_ADDR__MASK (0x7FFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE_BASE_ADDR__BASE_ADDR__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE_BASE_ADDR__BASE_ADDR__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x7FFFFFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE_BASE_ADDR__BASE_ADDR__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x7FFFFFFFL) | (((uint32_t)(value) << 0) & 0x7FFFFFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE_BASE_ADDR__BASE_ADDR__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x7FFFFFFFL) | 0x7FFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE_BASE_ADDR__BASE_ADDR__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x7FFFFFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* CFG_DST_DESC_TRACE : en */ -/* Description: Enable tracing of descriptors read from Source QMs */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__EN__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__EN__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__EN__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__EN__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__EN__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__EN__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__EN__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__EN__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/* CFG_DST_DESC_TRACE : stop_on_wrap */ -/* Description: Stop when reaching end of tracing buffer */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__STOP_ON_WRAP__SHIFT (1) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__STOP_ON_WRAP__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__STOP_ON_WRAP__MASK (0x00000002L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__STOP_ON_WRAP__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__STOP_ON_WRAP__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000002L) >> 1) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__STOP_ON_WRAP__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | (((uint32_t)(value) << 1) & 0x00000002L)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__STOP_ON_WRAP__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(1) << 1)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__STOP_ON_WRAP__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(0) << 1)) - -/* CFG_DST_DESC_TRACE : mprot */ -/* Description: AWPROT value */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MPROT__SHIFT (2) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MPROT__WIDTH (3) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MPROT__MASK (0x0000001CL) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MPROT__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MPROT__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x0000001CL) >> 2) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MPROT__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x0000001CL) | (((uint32_t)(value) << 2) & 0x0000001CL)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MPROT__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000001CL) | 0x0000001CL) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MPROT__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x0000001CL)) - -/* CFG_DST_DESC_TRACE : mcache */ -/* Description: AWCACHE value. MER-3804 ECO: Note that bit 3 is double booked for timeout ExtRef default value which needs to be 1. In case debug tracing is enabled */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MCACHE__SHIFT (5) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MCACHE__WIDTH (4) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MCACHE__MASK (0x000001E0L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MCACHE__RESET (0x00000120L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MCACHE__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x000001E0L) >> 5) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MCACHE__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x000001E0L) | (((uint32_t)(value) << 5) & 0x000001E0L)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MCACHE__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000001E0L) | 0x000001E0L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MCACHE__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x000001E0L)) - -/* CFG_DST_DESC_TRACE : buff_size_m1 */ -/* Description: Buffer size minus 1 in 16B descriptors */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__BUFF_SIZE_M1__SHIFT (16) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__BUFF_SIZE_M1__WIDTH (16) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__BUFF_SIZE_M1__MASK (0xFFFF0000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__BUFF_SIZE_M1__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__BUFF_SIZE_M1__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0xFFFF0000L) >> 16) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__BUFF_SIZE_M1__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0xFFFF0000L) | (((uint32_t)(value) << 16) & 0xFFFF0000L)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__BUFF_SIZE_M1__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFF0000L) | 0xFFFF0000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__BUFF_SIZE_M1__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0xFFFF0000L)) - -/*----------------------------------------------------------------------------------------------------*/ -/* CFG_DST_DESC_TRACE_BASE_ADDR : base_addr */ -/* Description: Buffer base address bits 34:4 aligned to 16B. MER-3804 ECO: Note that bits 17:16 are double booked for timeout ExtRef mux. In case debug tracing and ExtRef are required to be turned on this constrain the base address bits 17:16 to be the same as the timestamp mux */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE_BASE_ADDR__BASE_ADDR__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE_BASE_ADDR__BASE_ADDR__WIDTH (31) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE_BASE_ADDR__BASE_ADDR__MASK (0x7FFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE_BASE_ADDR__BASE_ADDR__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE_BASE_ADDR__BASE_ADDR__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x7FFFFFFFL) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE_BASE_ADDR__BASE_ADDR__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x7FFFFFFFL) | (((uint32_t)(value) << 0) & 0x7FFFFFFFL)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE_BASE_ADDR__BASE_ADDR__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x7FFFFFFFL) | 0x7FFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE_BASE_ADDR__BASE_ADDR__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x7FFFFFFFL)) - -/*----------------------------------------------------------------------------------------------------*/ -/* CFG_DEBUG_TIMESTAMP : en */ -/* Description: Write 1 to enable timestamp counter for debug logic */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__EN__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__EN__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__EN__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__EN__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__EN__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__EN__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__EN__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__EN__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - -/* CFG_DEBUG_TIMESTAMP : clr */ -/* Description: Write 1 to clear timestamp counter. After writing 1 to this field need to write 0 immediately */ -#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__CLR__SHIFT (1) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__CLR__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__CLR__MASK (0x00000002L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__CLR__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__CLR__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000002L) >> 1) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__CLR__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | (((uint32_t)(value) << 1) & 0x00000002L)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__CLR__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(1) << 1)) -#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__CLR__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(0) << 1)) - -/*----------------------------------------------------------------------------------------------------*/ -/* DEBUG_TIMESTAMP : val */ -#define DRAM_DMA_ENGINE_CONFIG__DEBUG_TIMESTAMP__VAL__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__DEBUG_TIMESTAMP__VAL__WIDTH (32) -#define DRAM_DMA_ENGINE_CONFIG__DEBUG_TIMESTAMP__VAL__MASK (0xFFFFFFFFL) -#define DRAM_DMA_ENGINE_CONFIG__DEBUG_TIMESTAMP__VAL__RESET (0x00000000L) -#define DRAM_DMA_ENGINE_CONFIG__DEBUG_TIMESTAMP__VAL__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0) - -/*----------------------------------------------------------------------------------------------------*/ -/* AUTO_ADDRESS_ERR_CB_INDICATION : enable */ -/* Description: default is 1, meaning the address error is enabled, to hide the address error indication, set to 0 */ -#define DRAM_DMA_ENGINE_CONFIG__AUTO_ADDRESS_ERR_CB_INDICATION__ENABLE__SHIFT (0) -#define DRAM_DMA_ENGINE_CONFIG__AUTO_ADDRESS_ERR_CB_INDICATION__ENABLE__WIDTH (1) -#define DRAM_DMA_ENGINE_CONFIG__AUTO_ADDRESS_ERR_CB_INDICATION__ENABLE__MASK (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__AUTO_ADDRESS_ERR_CB_INDICATION__ENABLE__RESET (0x00000001L) -#define DRAM_DMA_ENGINE_CONFIG__AUTO_ADDRESS_ERR_CB_INDICATION__ENABLE__READ(reg_offset) \ - (((uint32_t)(reg_offset) & 0x00000001L) >> 0) -#define DRAM_DMA_ENGINE_CONFIG__AUTO_ADDRESS_ERR_CB_INDICATION__ENABLE__MODIFY(reg_offset, value) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L)) -#define DRAM_DMA_ENGINE_CONFIG__AUTO_ADDRESS_ERR_CB_INDICATION__ENABLE__SET(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0)) -#define DRAM_DMA_ENGINE_CONFIG__AUTO_ADDRESS_ERR_CB_INDICATION__ENABLE__CLR(reg_offset) \ - (reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0)) - - -#endif /* DRAM_DMA_ENGINE_CONFIG_MACRO_H */ diff --git a/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_engine_config_regs.h b/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_engine_config_regs.h deleted file mode 100644 index 5c2c014b..00000000 --- a/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_engine_config_regs.h +++ /dev/null @@ -1,143 +0,0 @@ -/*------------------------------------------------------------------------------------- -// Copyright (c) 2022 by Hailotech This model is the confidential and -// proprietary property of Hailotech and the possession or use of this -// file requires a written license from Hailotech. --------------------------------------------------------------------------------------*/ - - - -#include - -#ifndef DRAM_DMA_ENGINE_CONFIG_REGS_H -#define DRAM_DMA_ENGINE_CONFIG_REGS_H - -#include "dram_dma_package_macros.h" -#include "dram_dma_engine_config_macros.h" - -typedef struct DRAM_DMA_ENGINE_CONFIG_regs_s { - volatile uint32_t QddcEnable[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x0 ; repeat: [16] */ - volatile uint32_t QddcReset[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x40 ; repeat: [16] */ - volatile uint32_t QddcMode[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x80 ; repeat: [16] */ - volatile uint32_t QddcAddBurstVal[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0xc0 ; repeat: [16] */ - volatile uint32_t QddcMaxDesc[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x100 ; repeat: [16] */ - volatile uint32_t QddcShmifoId[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x140 ; repeat: [16] */ - volatile uint32_t QddcShmifoCreditSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x180 ; repeat: [16] */ - volatile uint32_t QddcShmifoInitCredit[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x1c0 ; repeat: [16] */ - volatile uint32_t QsdcEnable[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x200 ; repeat: [16] */ - volatile uint32_t QsdcReset[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x240 ; repeat: [16] */ - volatile uint32_t QsdcMaxDesc[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x280 ; repeat: [16] */ - volatile uint32_t QsdcShmifoId[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x2c0 ; repeat: [16] */ - volatile uint32_t QsdcShmifoCreditSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x300 ; repeat: [16] */ - volatile uint32_t QsdcFullNumPatterns[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_FULL_PATTERN]; /* offset: 0x340 ; repeat: [4] */ - volatile uint32_t QsdcFullPatternNumLines[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_FULL_PATTERN][DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_MAX_PATTERNS];/* offset: 0x350 ; repeat: [4, 4] */ - volatile uint32_t QsdcFullPatternNumPages[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_FULL_PATTERN][DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_MAX_PATTERNS];/* offset: 0x390 ; repeat: [4, 4] */ - volatile uint32_t QsdcFullPatternPageSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_FULL_PATTERN][DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_MAX_PATTERNS];/* offset: 0x3d0 ; repeat: [4, 4] */ - volatile uint32_t QsdcFullPatternResiduePageSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_FULL_PATTERN][DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_MAX_PATTERNS];/* offset: 0x410 ; repeat: [4, 4] */ - volatile uint32_t QsdcSimpPatternNumPages[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_SIMP_PATTERN]; /* offset: 0x450 ; repeat: [12] */ - volatile uint32_t QsdcSimpPatternPageSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_SIMP_PATTERN]; /* offset: 0x480 ; repeat: [12] */ - volatile uint32_t QsdcSimpPatternResiduePageSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_SIMP_PATTERN]; /* offset: 0x4b0 ; repeat: [12] */ - volatile uint32_t QdmcEnable[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x4e0 ; repeat: [16] */ - volatile uint32_t QdmcReset[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x520 ; repeat: [16] */ - volatile uint32_t QdmcMemBaseAddr[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x560 ; repeat: [16] */ - volatile uint32_t QdmcMemCcbSizeLog2[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_REGULAR_CH]; /* offset: 0x5a0 ; repeat: [12] */ - volatile uint32_t QdmcDescCsInterrupt[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x5d0 ; repeat: [16] */ - volatile uint32_t QdmcBankInterleaveMode[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x610 ; repeat: [16] */ - volatile uint32_t QdmcMode[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_ENHANCED_CH]; /* offset: 0x650 ; repeat: [4] */ - volatile uint32_t QdmcAddBurstVal[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_ENHANCED_CH]; /* offset: 0x660 ; repeat: [4] */ - volatile uint32_t QdmcMemCcbSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_ENHANCED_CH]; /* offset: 0x670 ; repeat: [4] */ - volatile uint32_t QdmcDescPeriphInterrupt[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_ENHANCED_CH]; /* offset: 0x680 ; repeat: [4] */ - volatile uint32_t QdmcCcbProcessedIndex[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_ENHANCED_CH]; /* offset: 0x690 ; repeat: [4] */ - volatile uint32_t QsmcEnable[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x6a0 ; repeat: [16] */ - volatile uint32_t QsmcReset[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x6e0 ; repeat: [16] */ - volatile uint32_t QsmcMode[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x720 ; repeat: [16] */ - volatile uint32_t QsmcC2cSel[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x760 ; repeat: [16] */ - volatile uint32_t QsmcAddBurstVal[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x7a0 ; repeat: [16] */ - volatile uint32_t QsmcMemBaseAddr[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x7e0 ; repeat: [16] */ - volatile uint32_t QsmcMemCcbSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x820 ; repeat: [16] */ - volatile uint32_t QsmcPageSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x860 ; repeat: [16] */ - volatile uint32_t QsmcSimpPatternNumPages[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x8a0 ; repeat: [16] */ - volatile uint32_t QsmcSimpPatternResiduePageSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x8e0 ; repeat: [16] */ - volatile uint32_t QsmcBankInterleaveMode[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH]; /* offset: 0x920 ; repeat: [16] */ - volatile uint32_t QsmcDescPeriphInterrupt[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_ENHANCED_CH]; /* offset: 0x960 ; repeat: [4] */ - volatile uint32_t QsmcCcbFreeIndex[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_ENHANCED_CH]; /* offset: 0x970 ; repeat: [4] */ - volatile uint32_t engine_cs_intr_mask; /* offset: 0x980 ; repeat: [1] */ - volatile uint32_t engine_cs_intr_status; /* offset: 0x984 ; repeat: [1] */ - volatile uint32_t engine_cs_intr_w1c; /* offset: 0x988 ; repeat: [1] */ - volatile uint32_t engine_cs_intr_w1s; /* offset: 0x98c ; repeat: [1] */ - volatile uint32_t engine_ap_intr_mask; /* offset: 0x990 ; repeat: [1] */ - volatile uint32_t engine_ap_intr_status; /* offset: 0x994 ; repeat: [1] */ - volatile uint32_t engine_ap_intr_w1c; /* offset: 0x998 ; repeat: [1] */ - volatile uint32_t engine_ap_intr_w1s; /* offset: 0x99c ; repeat: [1] */ - volatile uint32_t engine_dsp_intr_mask; /* offset: 0x9a0 ; repeat: [1] */ - volatile uint32_t engine_dsp_intr_status; /* offset: 0x9a4 ; repeat: [1] */ - volatile uint32_t engine_dsp_intr_w1c; /* offset: 0x9a8 ; repeat: [1] */ - volatile uint32_t engine_dsp_intr_w1s; /* offset: 0x9ac ; repeat: [1] */ - volatile uint32_t engine_err_intr_mask; /* offset: 0x9b0 ; repeat: [1] */ - volatile uint32_t engine_err_intr_status; /* offset: 0x9b4 ; repeat: [1] */ - volatile uint32_t desc_err_intr_mask; /* offset: 0x9b8 ; repeat: [1] */ - volatile uint32_t desc_err_intr_status; /* offset: 0x9bc ; repeat: [1] */ - volatile uint32_t desc_err_intr_w1c; /* offset: 0x9c0 ; repeat: [1] */ - volatile uint32_t desc_err_intr_w1s; /* offset: 0x9c4 ; repeat: [1] */ - volatile uint32_t qddc_crd_ovf_err_intr_mask; /* offset: 0x9c8 ; repeat: [1] */ - volatile uint32_t qddc_crd_ovf_err_intr_status; /* offset: 0x9cc ; repeat: [1] */ - volatile uint32_t qddc_crd_ovf_err_intr_w1c; /* offset: 0x9d0 ; repeat: [1] */ - volatile uint32_t qddc_crd_ovf_err_intr_w1s; /* offset: 0x9d4 ; repeat: [1] */ - volatile uint32_t qsdc_crd_ovf_err_intr_mask; /* offset: 0x9d8 ; repeat: [1] */ - volatile uint32_t qsdc_crd_ovf_err_intr_status; /* offset: 0x9dc ; repeat: [1] */ - volatile uint32_t qsdc_crd_ovf_err_intr_w1c; /* offset: 0x9e0 ; repeat: [1] */ - volatile uint32_t qsdc_crd_ovf_err_intr_w1s; /* offset: 0x9e4 ; repeat: [1] */ - volatile uint32_t EngErrInterruptSource; /* offset: 0x9e8 ; repeat: [1] */ - volatile uint32_t EngErrRemainPageSize; /* offset: 0x9ec ; repeat: [1] */ - volatile uint32_t EngTransferPageSize; /* offset: 0x9f0 ; repeat: [1] */ - volatile uint32_t VdmaSoftReset; /* offset: 0x9f4 ; repeat: [1] */ - volatile uint32_t vdma_sharedbus; /* offset: 0x9f8 ; repeat: [1] */ - volatile uint32_t cfg_qddc_redundant_en; /* offset: 0x9fc ; repeat: [1] */ - volatile uint32_t cfg_qsdc_redundant_en; /* offset: 0xa00 ; repeat: [1] */ - volatile uint32_t cfg_qdmc_redundant_en; /* offset: 0xa04 ; repeat: [1] */ - volatile uint32_t cfg_qsmc_redundant_en; /* offset: 0xa08 ; repeat: [1] */ - volatile uint32_t qddc_redundant_asf_int_mask; /* offset: 0xa0c ; repeat: [1] */ - volatile uint32_t qddc_redundant_asf_int_status; /* offset: 0xa10 ; repeat: [1] */ - volatile uint32_t qddc_redundant_asf_int_w1c; /* offset: 0xa14 ; repeat: [1] */ - volatile uint32_t qddc_redundant_asf_int_w1s; /* offset: 0xa18 ; repeat: [1] */ - volatile uint32_t qsdc_redundant_asf_int_mask; /* offset: 0xa1c ; repeat: [1] */ - volatile uint32_t qsdc_redundant_asf_int_status; /* offset: 0xa20 ; repeat: [1] */ - volatile uint32_t qsdc_redundant_asf_int_w1c; /* offset: 0xa24 ; repeat: [1] */ - volatile uint32_t qsdc_redundant_asf_int_w1s; /* offset: 0xa28 ; repeat: [1] */ - volatile uint32_t qdmc_redundant_asf_int_mask; /* offset: 0xa2c ; repeat: [1] */ - volatile uint32_t qdmc_redundant_asf_int_status; /* offset: 0xa30 ; repeat: [1] */ - volatile uint32_t qdmc_redundant_asf_int_w1c; /* offset: 0xa34 ; repeat: [1] */ - volatile uint32_t qdmc_redundant_asf_int_w1s; /* offset: 0xa38 ; repeat: [1] */ - volatile uint32_t qsmc_redundant_asf_int_mask; /* offset: 0xa3c ; repeat: [1] */ - volatile uint32_t qsmc_redundant_asf_int_status; /* offset: 0xa40 ; repeat: [1] */ - volatile uint32_t qsmc_redundant_asf_int_w1c; /* offset: 0xa44 ; repeat: [1] */ - volatile uint32_t qsmc_redundant_asf_int_w1s; /* offset: 0xa48 ; repeat: [1] */ - volatile uint32_t PrioIsLp; /* offset: 0xa4c ; repeat: [1] */ - volatile uint32_t ReadLpToQosValue; /* offset: 0xa50 ; repeat: [1] */ - volatile uint32_t ReadHpToQosValue; /* offset: 0xa54 ; repeat: [1] */ - volatile uint32_t WriteLpToQosValue; /* offset: 0xa58 ; repeat: [1] */ - volatile uint32_t WriteHpToQosValue; /* offset: 0xa5c ; repeat: [1] */ - volatile uint32_t DescReadQosValue; /* offset: 0xa60 ; repeat: [1] */ - volatile uint32_t DescWriteQosValue; /* offset: 0xa64 ; repeat: [1] */ - volatile uint32_t vdma_arb; /* offset: 0xa68 ; repeat: [1] */ - volatile uint32_t qm_cfg_cg_delay; /* offset: 0xa6c ; repeat: [1] */ - volatile uint32_t qddc_cfg_cg_bypass; /* offset: 0xa70 ; repeat: [1] */ - volatile uint32_t qsdc_cfg_cg_bypass; /* offset: 0xa74 ; repeat: [1] */ - volatile uint32_t qdmc_cfg_cg_bypass; /* offset: 0xa78 ; repeat: [1] */ - volatile uint32_t qsmc_cfg_cg_bypass; /* offset: 0xa7c ; repeat: [1] */ - volatile uint32_t engine_asf_int_mask; /* offset: 0xa80 ; repeat: [1] */ - volatile uint32_t engine_asf_int_status; /* offset: 0xa84 ; repeat: [1] */ - volatile uint32_t engine_asf_int_w1c; /* offset: 0xa88 ; repeat: [1] */ - volatile uint32_t engine_asf_int_w1s; /* offset: 0xa8c ; repeat: [1] */ - volatile uint32_t engine_rw_parity_bist_mode; /* offset: 0xa90 ; repeat: [1] */ - volatile uint32_t vdma_stop_lp; /* offset: 0xa94 ; repeat: [1] */ - volatile uint32_t vdma_sch; /* offset: 0xa98 ; repeat: [1] */ - volatile uint32_t cfg_src_desc_trace; /* offset: 0xa9c ; repeat: [1] */ - volatile uint32_t cfg_src_desc_trace_base_addr; /* offset: 0xaa0 ; repeat: [1] */ - volatile uint32_t cfg_dst_desc_trace; /* offset: 0xaa4 ; repeat: [1] */ - volatile uint32_t cfg_dst_desc_trace_base_addr; /* offset: 0xaa8 ; repeat: [1] */ - volatile uint32_t cfg_debug_timestamp; /* offset: 0xaac ; repeat: [1] */ - volatile uint32_t debug_timestamp; /* offset: 0xab0 ; repeat: [1] */ - volatile uint32_t auto_address_err_cb_indication; /* offset: 0xab4 ; repeat: [1] */ -} DRAM_DMA_ENGINE_CONFIG_t; - -#endif /* DRAM_DMA_ENGINE_CONFIG_REGS_H */ diff --git a/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_package_macros.h b/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_package_macros.h deleted file mode 100644 index 705f9a50..00000000 --- a/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_package_macros.h +++ /dev/null @@ -1,92 +0,0 @@ -/*------------------------------------------------------------------------------------- -// Copyright (c) 2022 by Hailotech This model is the confidential and -// proprietary property of Hailotech and the possession or use of this -// file requires a written license from Hailotech. --------------------------------------------------------------------------------------*/ - - - -#include - -#ifndef DRAM_DMA_PACKAGE_MACROS_H -#define DRAM_DMA_PACKAGE_MACROS_H - -/* HW constants and parameters for package "dram_dma" */ - -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_AXI_QOS_BITS (3) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_CH (32) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_CH_RX_CREDIT (4096) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_CH_TX_CREDIT (2048) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DD_DESC (16) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DESC (16) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH (16) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DM_DESC (16) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_ENHANCED_CH (4) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_FULL_PATTERN (4) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_MAX_PATTERNS (4) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_PATTERNS_MAX_LINES (262144) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_PATTERNS_MAX_PAGES (262144) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_REGULAR_CH (12) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_RX_SHMIFO (24) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_SD_DESC (16) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_SIMP_PATTERN (12) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_SM_DESC (16) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_SW_CH (16) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_SW_INT (4) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_TX_SHMIFO (20) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__PAGE_SIZE_MAX (13) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__PAGE_SIZE_MAX_8B (10) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_BURST_SIZE (29) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_BURST_SIZE_8B (26) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_C2C_SEL (6) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CCB_DESC_INDEX (18) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CCB_DESC_INDEX_LOG (5) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CFG_DATA (32) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CH (5) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CH_CREDIT_SIZE (10) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CH_RX_CREDIT (13) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CH_TX_CREDIT (12) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CORE_ADDR (35) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CORE_BASE_ADDR (29) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CSR_CFG_ADDR (13) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_DDR_ADDR (35) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_DDR_BASE_ADDR (26) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_DD_DESC (4) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_DESC (4) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_DESC_DEMUX_ADDR (43) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_DIR_CH (4) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_DM_DESC (4) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_ENG_CFG_ADDR (14) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_MAX_PATTERNS (2) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_PATTERNS_MAX_LINES (18) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_PATTERNS_MAX_PAGES (18) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_SD_DESC (4) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_SHMIFO (5) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_SM_DESC (4) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_SW_CH (4) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_AXI_ADDR (64) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_AXI_DATA_DATA (64) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_AXI_DATA_DESC (128) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_AXI_ID_DATA0 (2) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_AXI_ID_DATA1 (4) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_AXI_ID_DESC (3) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_CFG_ADDR (10) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_MEM_ADDR (5) -#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_MEM_DATA (256) -#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__ADDR_ALLSTRB_OFFSET (56) -#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__ADDR_APCMD_OFFSET (55) -#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__FPGA_N_HW_DMA_ENG (0) -#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__N_DESC_AXI (1) -#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__N_DMA_ENG (4) -#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__N_HMASTER (4) -#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__N_HW_DMA_ENG (3) -#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__N_SW_DMA_ENG (1) -#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__N_TOT_DMA_DIR_CH (48) -#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__N_VISION_CH (10) -#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__W_CFG_ADDR (16) -#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__W_CFG_DATA (32) -#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__W_CSR_CFG_ADDR (12) -#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__W_TOT_DMA_DIR_CH (6) - - -#endif /* DRAM_DMA_PACKAGE_MACROS_H */ diff --git a/hailort/tools/hw_debug/main.cpp b/hailort/tools/hw_debug/main.cpp deleted file mode 100644 index dc9ecbea..00000000 --- a/hailort/tools/hw_debug/main.cpp +++ /dev/null @@ -1,152 +0,0 @@ -/** - * @file main.cpp - * @brief Main function, and shell build for the tool. - */ - -#include "shell.hpp" -#include "readline_wrapper.hpp" -#include "memory_commands.hpp" -#include "driver_memory.hpp" - -#include "CLI/CLI.hpp" - -#include - -using namespace hailort; - -static constexpr const char *LOGO = - R"( _____ _ _ __ __ )" "\n" - R"(| __ \ | | | | \ \ / / )" "\n" - R"(| | | | ___| |__ __ _| | ___ \ V / )" "\n" - R"(| | | |/ _ \ '_ \ / _` | |/ _ \ > < )" "\n" - R"(| |__| | __/ |_) | (_| | | __// . \ )" "\n" - R"(|_____/ \___|_.__/ \__,_|_|\___/_/ \_\ )" "\n"; - - -static std::shared_ptr add_memory_subshell(Shell &base_shell, - const std::string &name, const std::string &short_name, std::shared_ptr mem) -{ - auto subshell = base_shell.add_subshell(name, short_name); - subshell->add_command(std::make_unique>(mem)); - subshell->add_command(std::make_unique>(mem)); - subshell->add_command(std::make_unique>(mem)); - - subshell->add_command(std::make_unique>(mem)); - subshell->add_command(std::make_unique>(mem)); - subshell->add_command(std::make_unique>(mem)); - - if (!mem->get_fields().empty()) { - subshell->add_command(std::make_unique(mem)); - } - - return subshell; -} - -template -static std::shared_ptr add_driver_memory_subshell(Shell &base_shell, - const std::string &name, const std::string &short_name, - std::shared_ptr driver, MemoryType memory_type) -{ - auto mem = std::make_shared(driver, memory_type); - return add_memory_subshell(base_shell, name, short_name, mem); -} - -static std::unique_ptr create_pcie_accelerator_shell(std::shared_ptr driver_ptr) -{ - auto shell = std::make_unique("> "); - add_driver_memory_subshell(*shell, "vdma", "v", driver_ptr, MemoryType::VDMA0); - add_driver_memory_subshell(*shell, "bar0", "b0", driver_ptr, MemoryType::PCIE_BAR0); - add_driver_memory_subshell(*shell, "bar2", "b2", driver_ptr, MemoryType::PCIE_BAR2); - add_driver_memory_subshell(*shell, "bar4", "b4", driver_ptr, MemoryType::PCIE_BAR4); - add_driver_memory_subshell(*shell, "mem", "m", driver_ptr, MemoryType::DIRECT_MEMORY); - return shell; -} - -static std::unique_ptr create_vpu_shell(std::shared_ptr driver_ptr) -{ - auto shell = std::make_unique("> "); - add_driver_memory_subshell(*shell, "vdma0", "v0", driver_ptr, MemoryType::VDMA0); - add_driver_memory_subshell(*shell, "vdma1", "v1", driver_ptr, MemoryType::VDMA1); - add_driver_memory_subshell(*shell, "vdma2", "v2", driver_ptr, MemoryType::VDMA2); - add_driver_memory_subshell(*shell, "engine0", "e0", driver_ptr, MemoryType::DMA_ENGINE0); - add_driver_memory_subshell(*shell, "engine1", "e1", driver_ptr, MemoryType::DMA_ENGINE1); - add_driver_memory_subshell(*shell, "engine2", "e2", driver_ptr, MemoryType::DMA_ENGINE2); - add_driver_memory_subshell(*shell, "mem", "m", driver_ptr, MemoryType::DIRECT_MEMORY); - return shell; -} - -static std::vector get_available_device_ids() -{ - auto scan_results = HailoRTDriver::scan_devices(); - if (!scan_results) { - throw std::runtime_error("Failed scan pci"); - } - if (scan_results->empty()) { - throw std::runtime_error("No hailo devices on the system..."); - } - - std::vector device_ids; - for (const auto &scan_result : scan_results.value()) { - device_ids.push_back(scan_result.device_id); - } - return device_ids; -} - -HailoRTDriver::DeviceInfo get_device_info(const std::string &device_id) -{ - auto scan_results = HailoRTDriver::scan_devices(); - if (!scan_results) { - throw std::runtime_error("Failed scan pci"); - } - - auto device_found = std::find_if(scan_results->cbegin(), scan_results->cend(), - [&device_id](const auto &compared_scan_result) { - return device_id == compared_scan_result.device_id; - }); - if (device_found == std::end(scan_results.value())) { - throw std::runtime_error("Requested device not found"); - } - - return *device_found; -} - -std::shared_ptr create_driver_object(const std::string &device_id) -{ - auto device_info = get_device_info(device_id); - auto hailort_driver = HailoRTDriver::create(device_info); - if (!hailort_driver) { - throw std::runtime_error("Failed create hailort driver object"); - } - return hailort_driver.release(); -} - -int main(int argc, char **argv) -{ - try { - ReadLineWrapper::init_library(); - - auto available_device_ids = get_available_device_ids(); - - CLI::App app{"Debalex"}; - std::string device_id = available_device_ids[0]; - app.add_option("-s,--device-id", device_id, "Device id") - ->check(CLI::IsMember(available_device_ids)); - CLI11_PARSE(app, argc, argv); - - auto driver = create_driver_object(device_id); - - - auto shell = - driver->dma_type() == HailoRTDriver::DmaType::PCIE ? - create_pcie_accelerator_shell(driver) : - create_vpu_shell(driver); - - std::cout << LOGO << std::endl; - shell->run_forever(); - return 0; - } - catch (const std::exception &exc) { - std::cerr << "Failure: " << exc.what(); - return 1; - } -} diff --git a/hailort/tools/hw_debug/memory_commands.cpp b/hailort/tools/hw_debug/memory_commands.cpp deleted file mode 100644 index b753503f..00000000 --- a/hailort/tools/hw_debug/memory_commands.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/** - * @file memory_commands.cpp - * @brief Commands to access (read/write) some memory (for example - channel registers, descriptors, physical, etc.) - */ -#include "memory_commands.hpp" - -#include -#include - -Field::Field(std::string &&name, std::string &&description) : - m_name(std::move(name)), - m_description(std::move(description)) -{} - - -const std::string &Field::name() const -{ - return m_name; -} - -const std::string &Field::description() const -{ - return m_description; -} - -const std::map> &MemorySource::get_fields() const -{ - return m_fields; -} - -void MemorySource::add_field(std::shared_ptr field) -{ - assert(m_fields.find(field->name()) == m_fields.end()); - m_fields[field->name()] = field; -} - -constexpr size_t PrintCommand::PRINT_ALL; - -PrintCommand::PrintCommand(std::shared_ptr memory) : - ShellCommand("print", "p", get_help(memory->get_fields())), - m_memory(memory) -{} - -ShellResult PrintCommand::execute(const std::vector &args) -{ - if (args.size() != 1) { - return ShellResult("Invalid params\n"); - } - - std::string field_name{}; - size_t index{}; - std::tie(field_name, index) = parse_field(args[0]); - - const auto &fields = m_memory->get_fields(); - auto field_it = fields.find(field_name); - if (fields.end() == field_it) { - throw std::runtime_error(fmt::format("Field {} does not exist", field_name)); - } - const auto &field = field_it->second; - - if (index == PRINT_ALL) { - std::vector results; - results.reserve(field->elements_count()); - for (size_t i = 0; i < field->elements_count(); i++) { - results.emplace_back(ShellResult(field->print_element(*m_memory, i))); - } - return ShellResult(results); - } - else { - if (index >= field->elements_count()) { - throw std::runtime_error(fmt::format("Index {} is out of range (max {})", index, field->elements_count())); - } - return ShellResult(field->print_element(*m_memory, index)); - } -} - -std::pair PrintCommand::parse_field(const std::string &field_arg) -{ - static const std::regex field_name_pattern("([a-zA-Z]+)"); - static const std::regex array_access_pattern("([a-zA-Z]+)\\[([0-9]+)\\]"); - std::smatch match; - - if (std::regex_match(field_arg, match, field_name_pattern)) { - assert(match.size() == 2); - const auto field = match[1]; - return std::make_pair(field, PRINT_ALL); - } - else if (std::regex_match(field_arg, match, array_access_pattern)) { - assert(match.size() == 3); - const auto &field = match[1]; - const auto index = std::atoi(match[2].str().c_str()); - return std::make_pair(field, index); - } - else { - throw std::runtime_error(fmt::format("Invalid syntax {}", field_arg)); - } -} - -std::string PrintCommand::get_help(const std::map> &fields) -{ - std::string help = "Pretty print some field, usage: print []. Fields:\n"; - for (auto field : fields) { - help += fmt::format("\t{} - {}\n", field.second->name(), field.second->description()); - } - return help; -} diff --git a/hailort/tools/hw_debug/memory_commands.hpp b/hailort/tools/hw_debug/memory_commands.hpp deleted file mode 100644 index 95dc913f..00000000 --- a/hailort/tools/hw_debug/memory_commands.hpp +++ /dev/null @@ -1,211 +0,0 @@ -/** - * @file memory_commands.hpp - * @brief Commands to access (read/write) some memory (for example - channel registers, descriptors, physical, etc.) - */ - -#ifndef _HW_DEBUG_MEMORY_COMMANDS_H_ -#define _HW_DEBUG_MEMORY_COMMANDS_H_ - -#include "shell.hpp" -#include "hailo/hailort.h" -#include "hailo/expected.hpp" -#include "spdlog/fmt/fmt.h" - -#include -#include -#include -#include - -class MemorySource; - -class Field { -public: - explicit Field(std::string &&name, std::string &&description); - virtual ~Field() = default; - - Field(const Field &other) = delete; - Field &operator=(const Field &other) = delete; - - const std::string &name() const; - const std::string &description() const; - - virtual size_t elements_count() const = 0; - virtual std::string print_element(MemorySource& memory, size_t index) const = 0; -private: - const std::string m_name; - const std::string m_description; -}; - -class MemorySource { -public: - virtual ~MemorySource() = default; - - virtual hailo_status read(uint64_t offset, uint8_t *data, size_t size) = 0; - virtual hailo_status write(uint64_t offset, const uint8_t *data, size_t size) = 0; - virtual size_t total_size() const = 0; - - template - T read(uint64_t offset) - { - static_assert(std::is_trivial::value, "Non trivial type"); - T value{}; - auto status = read(offset, reinterpret_cast(&value), sizeof(value)); - if (HAILO_SUCCESS != status) { - throw std::runtime_error(fmt::format("Failed read at {} (size {})", offset, sizeof(value))); - } - return value; - } - - const std::map> &get_fields() const; -protected: - void add_field(std::shared_ptr field); - -private: - std::map> m_fields; -}; - -template -class MemoryWriteCommand : public ShellCommand { -public: - static_assert(std::is_integral::value, "MemoryWriteCommand works only with integers"); - - MemoryWriteCommand(std::shared_ptr memory) : - ShellCommand(get_name(), get_short_name(), get_help()), - m_memory(memory) - {} - - ShellResult execute(const std::vector &args) { - if (args.size() != 2) { - return ShellResult("Invalid params\n"); - } - - uint64_t offset; - if (sscanf(args[0].c_str(), "%" SCNx64, &offset) != 1) { - return ShellResult(fmt::format("Invalid offset {}\n")); - } - - uint32_t data; - if (sscanf(args[1].c_str(), "%" SCNx32, &data) != 1) { - return ShellResult(fmt::format("Invalid data {}\n", args[1])); - } - - if ((offset % sizeof(IntType)) != 0) { - return ShellResult(fmt::format("Offset {:x} must be a multiple of {}\n", offset, sizeof(IntType))); - } - - if (offset + sizeof(IntType) > m_memory->total_size()) { - return ShellResult(fmt::format("Offset {:x} too large (max {:x})\n", offset, m_memory->total_size())); - } - - if (data > std::numeric_limits::max()) { - return ShellResult(fmt::format("data {:x} too large\n", data)); - } - IntType data_as_int = static_cast(data); - auto status = m_memory->write(offset, reinterpret_cast(&data_as_int), sizeof(data_as_int)); - if (HAILO_SUCCESS != status) { - return ShellResult(fmt::format("Failed write memory {}\n", status)); - } - - return ShellResult(""); - } - -private: - std::shared_ptr m_memory; - - static size_t get_bits() { return sizeof(IntType) * 8; } - static std::string get_name() { return fmt::format("write{}", get_bits()); } - static std::string get_short_name() { return fmt::format("w{}", get_bits()); } - static std::string get_help() - { - return fmt::format("Writes memory in {} granularity. Usage: {} . Offset and data are hex integers.", get_bits(), - get_name()); - } -}; - -template -class MemoryReadCommand : public ShellCommand { -public: - static_assert(std::is_integral::value, "MemoryReadCommand works only with integers"); - - MemoryReadCommand(std::shared_ptr memory) : - ShellCommand(get_name(), get_short_name(), get_help()), - m_memory(memory) - {} - - ShellResult execute(const std::vector &args) { - if (args.size() != 2) { - return ShellResult("Invalid params\n"); - } - - uint64_t offset; - if (sscanf(args[0].c_str(), "%" SCNx64, &offset) != 1) { - return ShellResult(fmt::format("Invalid offset {}\n", args[0])); - } - - uint32_t size; - if (sscanf(args[1].c_str(), "%" SCNx32, &size) != 1) { - return ShellResult(fmt::format("Invalid size {}\n", args[1])); - } - - if ((offset % sizeof(IntType)) != 0) { - return ShellResult(fmt::format("Offset {:x} must be a multiple of {}\n", offset, sizeof(IntType))); - } - - if ((size % sizeof(IntType)) != 0) { - return ShellResult(fmt::format("Size {:x} must be a multiple of {}\n", size, sizeof(IntType))); - } - - if (offset + size > m_memory->total_size()) { - return ShellResult(fmt::format("Offset {:x} and size {:x} too large (max {:x})\n", offset, size, - m_memory->total_size())); - } - - std::vector data(size, 0); - auto status = m_memory->read(offset, data.data(), data.size()); - if (HAILO_SUCCESS != status) { - return ShellResult(fmt::format("Failed read memory {}\n", status)); - } - - std::stringstream result; - result << std::hex << std::setfill('0'); - for (size_t i = 0; i < size; i += sizeof(IntType)) { - if ((i % 16) == 0) { - // Print address - result << std::endl << std::setw(8) << (offset + i) << "\t"; - } - IntType *ptr = reinterpret_cast(data.data() + i); - result << " " << std::setw(sizeof(IntType) * 2) << static_cast(*ptr); - } - result << std::endl; - return result.str(); - } - -private: - std::shared_ptr m_memory; - - static size_t get_bits() { return sizeof(IntType) * 8; } - static std::string get_name() { return fmt::format("read{}", get_bits()); } - static std::string get_short_name() { return fmt::format("r{}", get_bits()); } - static std::string get_help() - { - return fmt::format("Reads memory in {} granularity. Usage: {} . Offset and size are hex integers.", - get_bits(), get_name()); - } -}; - -class PrintCommand : public ShellCommand { -public: - PrintCommand(std::shared_ptr memory); - virtual ShellResult execute(const std::vector &args) override; - -private: - // Returns pair of field name and the index - static std::pair parse_field(const std::string &field_arg); - static std::string get_help(const std::map> &fields); - - std::shared_ptr m_memory; - - static constexpr size_t PRINT_ALL = std::numeric_limits::max(); -}; - -#endif /* _HW_DEBUG_MEMORY_COMMANDS_H_ */ diff --git a/hailort/tools/hw_debug/readline_wrapper.cpp b/hailort/tools/hw_debug/readline_wrapper.cpp deleted file mode 100644 index 03ab84ce..00000000 --- a/hailort/tools/hw_debug/readline_wrapper.cpp +++ /dev/null @@ -1,110 +0,0 @@ -/** - * @file readline_wrapper.cpp - * @brief Wrapper to the readline library, either use the library, or create simple implementation. - */ - -#include "readline_wrapper.hpp" -#include - - -#ifdef USE_READLINE -#include -#include -#include - -static void int_handler(int) -{ - printf("\n"); // Move to a new line - rl_on_new_line(); // Regenerate the prompt on a newline - rl_replace_line("", 0); // Clear the previous text - rl_redisplay(); -} - -static ReadLineWrapper::AutoCompleter g_auto_completer = nullptr; - -static char *name_generator(const char *text, int index) -{ - if (!g_auto_completer) { - return nullptr; - } - - auto results = g_auto_completer(std::string(text)); - if (static_cast(index) >= results.size()) { - return nullptr; - } - - return strdup(results[index].c_str()); -} - -static char **name_completion(const char *text, int start, int) -{ - if (start > 0) { - // We use autocomplete only for the first arg (command name). - return nullptr; - } - - rl_attempted_completion_over = 1; - return rl_completion_matches(text, name_generator); -} - -void ReadLineWrapper::init_library() -{ - rl_attempted_completion_function = name_completion; - signal(SIGINT, int_handler); -} - -std::string ReadLineWrapper::readline(const std::string &prompt) -{ - auto line_raw = ::readline(prompt.c_str()); - if (line_raw == nullptr) { - // Ctrl+D handle - printf("\n"); - return ""; - } - - const std::string line(line_raw); - free(line_raw); - return line; -} - -void ReadLineWrapper::add_history(const std::string &line) -{ - ::add_history(line.c_str()); -} - -void ReadLineWrapper::set_auto_completer(AutoCompleter completer) -{ - g_auto_completer = completer; -} - -void ReadLineWrapper::remove_auto_completer() -{ - g_auto_completer = nullptr; -} - -#else - -void ReadLineWrapper::init_library() -{} - -// Non readline implementation -std::string ReadLineWrapper::readline(const std::string &prompt) -{ - std::cout << prompt; - std::string command; - std::getline(std::cin, command); - return command; -} - -void ReadLineWrapper::add_history(const std::string &) -{ - // No history, just NOP. -} - -void ReadLineWrapper::set_auto_completer(AutoCompleter) -{} - -void ReadLineWrapper::remove_auto_completer() -{} - -#endif \ No newline at end of file diff --git a/hailort/tools/hw_debug/readline_wrapper.hpp b/hailort/tools/hw_debug/readline_wrapper.hpp deleted file mode 100644 index eae25c82..00000000 --- a/hailort/tools/hw_debug/readline_wrapper.hpp +++ /dev/null @@ -1,26 +0,0 @@ -/** - * @file readline_wrapper.hpp - * @brief Wrapper to the readline library, either use the library, or create simple implementation. - */ - -#ifndef _HW_DEBUG_READLINE_WRAPPER_H_ -#define _HW_DEBUG_READLINE_WRAPPER_H_ - -#include -#include -#include - -class ReadLineWrapper final { -public: - ReadLineWrapper() = delete; - - static void init_library(); - static std::string readline(const std::string &prompt); - static void add_history(const std::string &line); - - using AutoCompleter = std::function(const std::string &text)>; - static void set_auto_completer(AutoCompleter completer); - static void remove_auto_completer(); -}; - -#endif /* _HW_DEBUG_READLINE_WRAPPER_H_ */ \ No newline at end of file diff --git a/hailort/tools/hw_debug/shell.cpp b/hailort/tools/hw_debug/shell.cpp deleted file mode 100644 index ee5ca38b..00000000 --- a/hailort/tools/hw_debug/shell.cpp +++ /dev/null @@ -1,197 +0,0 @@ -/** - * @file shell.cpp - * @brief Generic shell - contains commands and sub-shells. The shell implements - * a parse-execute commands loop. - */ -#include "shell.hpp" -#include "readline_wrapper.hpp" -#include "spdlog/fmt/fmt.h" - -#include -#include - -static std::vector split_string(std::string s, const std::string &delimiter = " ") -{ - std::vector parts; - auto pos = std::string::npos; - while ((pos = s.find(delimiter)) != std::string::npos) { - parts.push_back(s.substr(0, pos)); - s.erase(0, pos + delimiter.size()); - } - parts.push_back(s); - return parts; -} - -ShellCommand::ShellCommand(const std::string &name, const std::string &short_name, const std::string &help) : - m_name(name), - m_short_name(short_name), - m_help(help) -{} - -Shell::Shell(const std::string &prompt) : - m_prompt(prompt), - m_commands(), - m_should_quit(false) -{ - add_command(std::make_unique(*this)); - add_command(std::make_unique(*this)); -} - -void Shell::add_command(std::unique_ptr shell_command) -{ - assert(nullptr == get_command_by_name(shell_command->name())); - assert(nullptr == get_command_by_name(shell_command->short_name())); - - m_commands.emplace_back(std::move(shell_command)); -} - -std::shared_ptr Shell::add_subshell(const std::string &name, const std::string &short_name) -{ - auto subshell_cmd = std::make_unique(name, short_name, - fmt::format("Start {} subshell", name)); - auto shell = subshell_cmd->get_shell(); - add_command(std::move(subshell_cmd)); - return shell; -} - -void Shell::run_forever() -{ - ReadLineWrapper::set_auto_completer([this](const std::string &text) { - return autocomplete(text); - }); - - std::cout << get_help() << std::endl; - while (!m_should_quit) { - std::string name; - std::vector args; - std::tie(name, args) = ask_user_command(); - - auto cmd = get_command_by_name(name); - if (cmd == nullptr) { - std::cout << fmt::format("Command {} not found...", name) << std::endl; - continue; - } - - try { - auto cmd_result = cmd->execute(args); - cmd_result.print(std::cout); - } catch (const std::runtime_error &exc) { - std::cerr << fmt::format("Error: {}", exc.what()) << std::endl; - } - } - - ReadLineWrapper::remove_auto_completer(); - - // Disable quit for next run - m_should_quit = false; -} - -std::vector Shell::autocomplete(const std::string &text) -{ - std::vector names; - for (const auto &cmd : m_commands) { - if (text.empty() || (cmd->name().rfind(text, 0) == 0)) { - names.emplace_back(cmd->name()); - } - - if (text.empty() || (cmd->short_name().rfind(text, 0) == 0)) { - names.emplace_back(cmd->short_name()); - } - } - - return names; -} - -std::pair> Shell::ask_user_command() -{ - while (true) { - auto line = ReadLineWrapper::readline(m_prompt); - auto parts = split_and_trim_line(line); - if (parts.empty()) { - continue; - } - - ReadLineWrapper::add_history(line); - const auto name = parts[0]; - const std::vector args(parts.begin() + 1, parts.end()); - return std::make_pair(name, args); - } -} - -std::vector Shell::split_and_trim_line(const std::string &line) -{ - auto parts = split_string(line, " "); - - // remove spaces - for (auto &part : parts) { - part.erase(std::remove_if(part.begin(), part.end(), [](char c) { - return std::isspace(c); - }), part.end()); - } - - // Remove empty commands - parts.erase(std::remove_if(parts.begin(), parts.end(), [](const std::string &s) { - return s.empty(); - }), parts.end()); - - return parts; -} - -std::string Shell::get_help() const -{ - std::string result; - for (const auto &cmd : m_commands) { - auto full_name = fmt::format("{}({})", cmd->name(), cmd->short_name()); - result += fmt::format("{:<30}{}\n", full_name, cmd->help()); - } - return result; -} - -ShellCommand *Shell::get_command_by_name(const std::string &name) -{ - for (const auto &cmd : m_commands) { - if ((name == cmd->name()) || (name == cmd->short_name())) { - return cmd.get(); - } - } - return nullptr; -} - -Shell::Help::Help(Shell &shell) : - ShellCommand("help", "h", "Show help on all commands"), - m_shell(shell) -{} - -ShellResult Shell::Help::execute(const std::vector &) -{ - return m_shell.get_help(); -} - -Shell::Quit::Quit(Shell &shell) : - ShellCommand("quit", "q", "Quit current shell"), - m_shell(shell) -{} - -ShellResult Shell::Quit::execute(const std::vector &) -{ - m_shell.m_should_quit = true; - return ShellResult(""); -} - - -StartSubshellCommand::StartSubshellCommand(const std::string &name, const std::string &short_name, - const std::string &help) : - ShellCommand(name, short_name, help), - m_shell(std::make_shared(fmt::format("({})> ", name))) -{} - -ShellResult StartSubshellCommand::execute(const std::vector &) -{ - m_shell->run_forever(); - return ShellResult(""); -} - -std::shared_ptr StartSubshellCommand::get_shell() -{ - return m_shell; -} diff --git a/hailort/tools/hw_debug/shell.hpp b/hailort/tools/hw_debug/shell.hpp deleted file mode 100644 index a06e42a9..00000000 --- a/hailort/tools/hw_debug/shell.hpp +++ /dev/null @@ -1,116 +0,0 @@ -/** - * @file shell.hpp - * @brief Generic shell - contains commands and sub-shells. The shell implements - * a parse-execute commands loop. - */ - -#ifndef _HW_DEBUG_SHELL_H_ -#define _HW_DEBUG_SHELL_H_ - -#include -#include -#include -#include -#include - -// Result returned from each command. Currently wrapper to the output string. -class ShellResult final { -public: - ShellResult(const std::string &str) : - m_str(str) - {} - - ShellResult(const std::vector &results) - { - std::stringstream out; - for (const auto &result : results) { - result.print(out); - } - m_str = out.str(); - } - - void print(std::ostream &out) const - { - out << m_str; - } - -private: - std::string m_str; -}; - -// Base abstract class for some shell command. -class ShellCommand { -public: - virtual ~ShellCommand() = default; - - ShellCommand(const std::string &name, const std::string &short_name, - const std::string &help); - - std::string name() const { return m_name; } - std::string short_name() const { return m_short_name; } - std::string help() const { return m_help; } - - virtual ShellResult execute(const std::vector &args) = 0; -private: - const std::string m_name; - const std::string m_short_name; - const std::string m_help; -}; - -class Shell final { -public: - explicit Shell(const std::string &prompt); - - Shell(const Shell &other) = delete; - Shell &operator=(const Shell &other) = delete; - - void add_command(std::unique_ptr shell_command); - std::shared_ptr add_subshell(const std::string &name, const std::string &short_name); - void run_forever(); - std::vector autocomplete(const std::string &text); - -private: - - class Help : public ShellCommand { - public: - Help(Shell &shell); - ShellResult execute(const std::vector &args) override; - private: - Shell &m_shell; - }; - - class Quit : public ShellCommand { - public: - Quit(Shell &shell); - ShellResult execute(const std::vector &args) override; - private: - Shell &m_shell; - }; - - // pair of command name and its arguments. - std::pair> ask_user_command(); - static std::vector split_and_trim_line(const std::string &line); - - std::string get_help() const; - // Gets a command or nullptr if it doesn't exists. - ShellCommand *get_command_by_name(const std::string &name); - - const std::string m_prompt; - std::vector> m_commands; - bool m_should_quit; -}; - - -// This command starts a new subshell -class StartSubshellCommand : public ShellCommand { -public: - StartSubshellCommand(const std::string &name, const std::string &short_name, - const std::string &help); - ShellResult execute(const std::vector &) override; - - std::shared_ptr get_shell(); -private: - std::shared_ptr m_shell; -}; - -#endif /* _HW_DEBUG_SHELL_H_ */ \ No newline at end of file